In [1]:
#Librerías para la carga, análisis y preprocesamiento de datos
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

In [2]:
# División de los datos de entrenamiento y prueba
training_files = ['OpportunityUCIDataset/dataset/S1-ADL1.dat',
                 'OpportunityUCIDataset/dataset/S1-ADL2.dat',
                 'OpportunityUCIDataset/dataset/S1-ADL3.dat',
                 'OpportunityUCIDataset/dataset/S1-Drill.dat',
                 'OpportunityUCIDataset/dataset/S2-ADL1.dat',
                 'OpportunityUCIDataset/dataset/S2-ADL2.dat',
                 'OpportunityUCIDataset/dataset/S2-ADL3.dat',
                 'OpportunityUCIDataset/dataset/S2-Drill.dat']
test_files = ['OpportunityUCIDataset/dataset/S3-ADL1.dat',
                'OpportunityUCIDataset/dataset/S3-ADL2.dat',
                'OpportunityUCIDataset/dataset/S3-ADL3.dat',
                'OpportunityUCIDataset/dataset/S3-Drill.dat']

In [3]:
# Leer columnas del dataset que se ecuentran en otro archivo
col_names = []
with open('col_names.txt','r') as f:
    lines = f.read().splitlines()
    for line in lines:
        col_names.append(line)

In [4]:
# Leer datos de entrenamiento
df_training = pd.DataFrame()
for i, file in enumerate(training_files):
    print(file,"se está leyendo...")
    file_data = pd.read_table(file, header=None, sep='\s+')
    file_data.columns = col_names      
    df_training = df_training._append(file_data, ignore_index=True)
df_training.reset_index(drop=True, inplace=True)
print("Lectura hecha!")

# Leer datos de prueba
df_test = pd.DataFrame()
for i, file in enumerate(test_files):
    print(file,"se está leyendo...")
    file_data = pd.read_table(file, header=None, sep='\s+')
    file_data.columns = col_names      
    df_test = df_test._append(file_data, ignore_index=True)
df_test.reset_index(drop=True, inplace=True)
print("Lectura hecha!")

OpportunityUCIDataset/dataset/S1-ADL1.dat se está leyendo...
OpportunityUCIDataset/dataset/S1-ADL2.dat se está leyendo...
OpportunityUCIDataset/dataset/S1-ADL3.dat se está leyendo...
OpportunityUCIDataset/dataset/S1-Drill.dat se está leyendo...
OpportunityUCIDataset/dataset/S2-ADL1.dat se está leyendo...
OpportunityUCIDataset/dataset/S2-ADL2.dat se está leyendo...
OpportunityUCIDataset/dataset/S2-ADL3.dat se está leyendo...
OpportunityUCIDataset/dataset/S2-Drill.dat se está leyendo...
Lectura hecha!
OpportunityUCIDataset/dataset/S3-ADL1.dat se está leyendo...
OpportunityUCIDataset/dataset/S3-ADL2.dat se está leyendo...
OpportunityUCIDataset/dataset/S3-ADL3.dat se está leyendo...
OpportunityUCIDataset/dataset/S3-Drill.dat se está leyendo...
Lectura hecha!


In [5]:
def segmentation_to_simple_activities(df):
    # Segmentación de la base de datos a solo las simples (Parado, caminando, sentado, acostado)
    df = df.drop(['HL_Activity','LL_Left_Arm','LL_Left_Arm_Object','LL_Right_Arm',
                  'LL_Right_Arm_Object', 'ML_Both_Arms'], axis = 1)
    # Se eliminan los ejemplos que no entran en alguna de las 4 actividades
    df = df[df['Locomotion'] != 0]
    return df

In [6]:
df_training = segmentation_to_simple_activities(df_training)
df_test = segmentation_to_simple_activities(df_test)

In [7]:
def mapping_new_labels(df):
    # Se mapean los nuevos labels
    mapping = {1:1, 2:2, 4:3, 5:4}
    df['Locomotion'] = df['Locomotion'].map(mapping)
    return df

In [8]:
df_training = mapping_new_labels(df_training)
df_test = mapping_new_labels(df_test)

In [9]:
def cut_no_body_sensors(df):
    # Se dejan solamente los features relacionados a sensores en el cuerpo humano
    df = df.drop(df.iloc[:,134:243], axis=1)
    df = df.drop(['IMU-L-SHOE-Compass', 'IMU-R-SHOE-Compass'], axis=1)

    columns_to_drop = [col for col in df.columns if 'Quaternion' in col]
    df = df.drop(columns=columns_to_drop)
    return df

In [10]:
df_training = cut_no_body_sensors(df_training)
df_test = cut_no_body_sensors(df_test)

In [11]:
def handle_missing_values(df):
    # Manejo de valores nulos por medio de imputación hacia delante
    limit = df.shape[1]*0.9
    df = df.dropna(axis='rows',thresh = limit)
    df.iloc[0] = df.iloc[0].fillna(0)
    df = df.ffill()
    return df

In [12]:
df_training = handle_missing_values(df_training)
df_test = handle_missing_values(df_test)

In [13]:
# Distribución de clases
def class_distribution(df):
    locomotion = pd.Series([0,0,0,0],index=['Stan', 'Walk', 'Sit', 'Lie'])
    for value in df.loc[:,'Locomotion']:
        if value == 1:
            locomotion['Stan'] += 1
        elif value == 2:
            locomotion['Walk'] += 1
        elif value == 3:
            locomotion['Sit'] += 1
        elif value == 4:
            locomotion['Lie'] += 1
    print(locomotion)

In [17]:
df = df_training.iloc[:900,1:132]

In [20]:
window_size = 90
total_sensors = 37
total_samples = df.shape[0]//window_size

start_rows = 0
end_rows = 90

first_column_from_sensor = 0

resultados_caracteristicas = []

for i in range(total_sensors):
    for j in range(total_samples):
        col_x = df.iloc[start_rows:end_rows, first_column_from_sensor]
        col_y = df.iloc[start_rows:end_rows, first_column_from_sensor+1]
        col_z = df.iloc[start_rows:end_rows, first_column_from_sensor+2]

        magnitud = np.sqrt(col_x**2 + col_y**2 + col_z**2)
        media_magnitud = np.mean(magnitud)
        std_magnitud = np.std(magnitud)
        auc_magnitud = np.sum(magnitud)

        mean_magnitude_name = 'MM ' + df.columns[first_column_from_sensor]
        std_magnitude_name = 'StdM ' + df.columns[first_column_from_sensor]
        AUC_magnitude_name = 'AUCM ' + df.columns[first_column_from_sensor]
        
        caracteristicas = { 
            mean_magnitude_name: media_magnitud,
            std_magnitude_name: std_magnitud,
            AUC_magnitude_name: auc_magnitud
        }

        resultados_caracteristicas.append(caracteristicas)

        start_rows = end_rows
        end_rows += 90
    
    first_column_from_sensor += 3

resultados_df = pd.DataFrame(resultados_caracteristicas)

In [21]:
resultados_df

Unnamed: 0,MM Acc-RKN^-accX,StdM Acc-RKN^-accX,AUCM Acc-RKN^-accX,MM Acc-HIP-accX,StdM Acc-HIP-accX,AUCM Acc-HIP-accX,MM Acc-LUA^-accX,StdM Acc-LUA^-accX,AUCM Acc-LUA^-accX,MM Acc-RUA_-accX,...,AUCM IMU-R-SHOE-Nav_AX,MM IMU-R-SHOE-Body_AX,StdM IMU-R-SHOE-Body_AX,AUCM IMU-R-SHOE-Body_AX,MM IMU-R-SHOE-AngVelBodyFrameX,StdM IMU-R-SHOE-AngVelBodyFrameX,AUCM IMU-R-SHOE-AngVelBodyFrameX,MM IMU-R-SHOE-AngVelNavFrameX,StdM IMU-R-SHOE-AngVelNavFrameX,AUCM IMU-R-SHOE-AngVelNavFrameX
0,1027.437893,12.862027,92469.410363,,,,,,,,...,,,,,,,,,,
1,1030.797048,32.340220,92771.734338,,,,,,,,...,,,,,,,,,,
2,1026.393735,10.819752,92375.436167,,,,,,,,...,,,,,,,,,,
3,1030.919520,22.513172,92782.756816,,,,,,,,...,,,,,,,,,,
4,1030.558733,18.705833,92750.286003,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365,,,,,,,,,,,...,,,,,,,,,,0.0
366,,,,,,,,,,,...,,,,,,,,,,0.0
367,,,,,,,,,,,...,,,,,,,,,,0.0
368,,,,,,,,,,,...,,,,,,,,,,0.0
