In [1]:
import pandas as pd 
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

dados = pd.read_csv("df_filt.csv")
teste = pd.read_csv("df_test.csv")

In [None]:
dados["Timestamp"] = pd.to_datetime(dados["Timestamp"])
dados["segundo"] = dados["Timestamp"].dt.floor("s")

teste["Timestamp"] = pd.to_datetime(teste["Timestamp"])
teste["segundo"] = teste["Timestamp"].dt.floor("s")

dados_num = dados.drop(columns=["Type", "Position", "Breed", "Subject", "Timestamp"])
teste_num = teste.drop(columns=["Type", "Position", "Breed", "Subject", "Timestamp"])

grouped = dados_num.groupby("segundo")
test_grouped = teste_num.groupby("segundo")

test_features = pd.DataFrame()
features = pd.DataFrame()

for col in dados_num.columns[:-1]:
    _ = grouped[col].describe()

    features[f"mean_{col}"] = _["mean"]
    features[f"std_{col}"] = _["std"]

print(features.head())


                     mean_Back.Acc.X  std_Back.Acc.X  mean_Back.Acc.Y  \
segundo                                                                 
2019-05-20 14:46:28         0.028276        0.087154        -0.154170   
2019-05-20 14:46:29         0.016582        0.031741        -0.204888   
2019-05-20 14:46:30         0.080908        0.054575        -0.197319   
2019-05-20 14:46:31         0.038403        0.119151        -0.070952   
2019-05-20 14:46:32        -0.071152        0.130153         0.333833   

                     std_Back.Acc.Y  mean_Back.Acc.Z  std_Back.Acc.Z  \
segundo                                                                
2019-05-20 14:46:28        0.047352         1.004082        0.048497   
2019-05-20 14:46:29        0.025774         1.001001        0.026427   
2019-05-20 14:46:30        0.035986         1.000967        0.056624   
2019-05-20 14:46:31        0.118290         1.001909        0.085840   
2019-05-20 14:46:32        0.135816         0.951445    

In [3]:
def get_axis(ax:str, dataframe:pd.DataFrame) -> pd.DataFrame: 
    """Filtra o Eixo de um dataframe"""
    dataframe_ax = pd.DataFrame()
    for col in dataframe.columns: 
        if col.endswith(ax):
            dataframe_ax[col] = dataframe[col] 
    return dataframe_ax

In [4]:
def get_axis_mean(ax: str, dataframe: pd.DataFrame) -> pd.DataFrame:
    """Retorna um Dataframe com com a média das médias e dos desvios padrões de um eixo específico"""
    dataframe_ax_mean = pd.DataFrame()
    dataframe_ax_std = pd.DataFrame()

    for col in dataframe: 
        if col.endswith(ax):
            if col.startswith("mean"):
                dataframe_ax_mean[col] = dataframe[col]
            else: 
                dataframe_ax_std[col] = dataframe[col]
    
    dataframe_ax_mean = dataframe_ax_mean.mean(axis = 1 )
    dataframe_ax_std = dataframe_ax_std.mean(axis = 1 )
        
    return pd.DataFrame({f"mean_media_{ax}": dataframe_ax_mean, f"mean_Desviopadrao_{ax}": dataframe_ax_std})

In [27]:
def get_axis_sum(ax: str, dataframe: pd.DataFrame) -> pd.DataFrame: 
    """Faz a soma do desvio padrão que acontece em um eixo em específico"""
    dataframe_ax_std = pd.DataFrame()

    for col in dataframe: 
        if col.endswith(ax):
            if col.startswith("std"):
                dataframe_ax_std[col] = dataframe[col]
            
    dataframe_ax_sum_std = dataframe_ax_std.sum(axis = 1 )

    return pd.DataFrame({f"soma_std_{ax}":dataframe_ax_sum_std})

df = get_axis_sum("X", features)
print(df.head(10))


                     soma_std_X
segundo                        
2019-05-20 14:46:28   24.397904
2019-05-20 14:46:29   35.144755
2019-05-20 14:46:30   43.419110
2019-05-20 14:46:31   65.633140
2019-05-20 14:46:32   66.202740
2019-05-20 14:46:33  128.808974
2019-05-20 14:46:34  104.035952
2019-05-20 14:46:35   38.185583
2019-05-20 14:46:36   19.563591
2019-05-20 14:46:37   17.473988


In [6]:
def get_sensor(sensor: str, dataframe: pd.DataFrame) -> pd.DataFrame:
    dataframe_sensor = pd.DataFrame()
    for col in dataframe.columns: 
        if sensor in col: 
            dataframe_sensor[col] = dataframe[col]

    return dataframe_sensor

In [14]:
standing = dados[dados["Position"] == "standing"]
conjunto_sensores = set(col[:-2] for col in dados_num.drop(columns= "segundo").columns)

eixos = ("X", "Y", "Z")

dict_sensores = {}

for sensor in conjunto_sensores:
    df_sensor = get_sensor(sensor, standing)
    dict_valores = {}

    for ax in eixos:
        col_df = get_axis(ax, df_sensor)

        # caso 1: não existe esse eixo → col_df vazio
        if col_df is None or len(col_df.columns if isinstance(col_df, pd.DataFrame) else col_df) == 0:
            dict_valores[f"mean_{ax}"] = None
            continue

        # caso 2: veio DataFrame → pegar a primeira coluna
        if isinstance(col_df, pd.DataFrame):
            col_series = col_df.iloc[:, 0]
        else:
            col_series = col_df

        dict_valores[f"mean_{ax}"] = float(col_series.mean())

    dict_sensores[sensor] = dict_valores

print(conjunto_sensores)

{'Neck.Gyr', 'Back.Gyr', 'Chest.Gyr', 'Back.Mag', 'Neck.Mag', 'Back.Acc', 'Neck.Acc', 'Chest.Mag', 'Chest.Acc'}


In [3]:

for sensor in conjunto_sensores:
    df_sensor = get_sensor(sensor, dados_num)
    dist_euc_sensor = pd.DataFrame(index=dados_num.index)
    for eixo in eixos:
        serie_ax = (get_axis(eixo, df_sensor) - dict_sensores[sensor][f"mean_{eixo}"])**2
        dist_euc_sensor[f"{sensor}_{eixo}"] = serie_ax
        dist_euc_sensor[f"dist_euc_{sensor}"] = np.sqrt(dist_euc_sensor.sum(axis=1))
        dist_euc_sensor.drop(columns=[f"{sensor}_{eixo}"], inplace=True)

        features[f"dist_euc_{sensor}"] = dist_euc_sensor[f"dist_euc_{sensor}"]

for eixo in eixos:
    df_axis_sum = get_axis_sum(eixo, features)
    features[f"soma_std_{eixo}"] = df_axis_sum[f"soma_std_{eixo}"]

for sensor in conjunto_sensores:
    _ = pd.DataFrame(index=features.index)
    _[f"sum_std_{sensor}"] = 0
    for col in features.columns:
        if f"std_{sensor}" in col:
            _[f"sum_std_{sensor}"] += features[col]

    features[f"sum_std_{sensor}"] = _[f"sum_std_{sensor}"]

features["Position"] = dados["Position"]

print(features.columns)

NameError: name 'conjunto_sensores' is not defined

In [None]:
print(len(features.columns))

NameError: name 'features' is not defined