In [1]:
import pandas as pd 
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from process import *
from datetime import timedelta
from sklearn.metrics import confusion_matrix


In [None]:
df_filt = pd.read_csv('df_filt.csv')
df_test = pd.read_csv('df_test.csv')   
df_filt['Timestamp'] = pd.to_datetime(df_filt['Timestamp'])
df_test['Timestamp'] = pd.to_datetime(df_test['Timestamp'])

df_filt.columns

Index(['Timestamp', 'Back.Acc.X', 'Back.Acc.Y', 'Back.Acc.Z', 'Back.Gyr.X',
       'Back.Gyr.Y', 'Back.Gyr.Z', 'Back.Mag.X', 'Back.Mag.Y', 'Back.Mag.Z',
       'Chest.Acc.X', 'Chest.Acc.Y', 'Chest.Acc.Z', 'Chest.Gyr.X',
       'Chest.Gyr.Y', 'Chest.Gyr.Z', 'Chest.Mag.X', 'Chest.Mag.Y',
       'Chest.Mag.Z', 'Neck.Acc.X', 'Neck.Acc.Y', 'Neck.Acc.Z', 'Neck.Gyr.X',
       'Neck.Gyr.Y', 'Neck.Gyr.Z', 'Neck.Mag.X', 'Neck.Mag.Y', 'Neck.Mag.Z',
       'Type', 'Position', 'Breed', 'Subject'],
      dtype='object')

In [3]:
features_filt = features_tsfel(df_filt, w_size=100, w_overlap=0, t_time=timedelta(microseconds=10000), n_jobs=6)

In [11]:
# Rename columns ending with '_Mean' by prepending 'mean_'
mapping = {col: 'mean_' + col[:-5] for col in features_filt.columns if col.endswith('_Mean')}
features_filt.rename(columns=mapping, inplace=True)

# Rename columns ending with 'Standard deviation' by prepending 'std_'
mapping_std = {col: 'std_' + col.replace('Standard deviation', '').rstrip(' _.') for col in features_filt.columns if col.endswith('Standard deviation')}
features_filt.rename(columns=mapping_std, inplace=True)
print(features_filt.columns)

Index(['mean_Back.Acc.X', 'std_Back.Acc.X', 'mean_Back.Acc.Y',
       'std_Back.Acc.Y', 'mean_Back.Acc.Z', 'std_Back.Acc.Z',
       'mean_Back.Gyr.X', 'std_Back.Gyr.X', 'mean_Back.Gyr.Y',
       'std_Back.Gyr.Y', 'mean_Back.Gyr.Z', 'std_Back.Gyr.Z',
       'mean_Back.Mag.X', 'std_Back.Mag.X', 'mean_Back.Mag.Y',
       'std_Back.Mag.Y', 'mean_Back.Mag.Z', 'std_Back.Mag.Z',
       'mean_Chest.Acc.X', 'std_Chest.Acc.X', 'mean_Chest.Acc.Y',
       'std_Chest.Acc.Y', 'mean_Chest.Acc.Z', 'std_Chest.Acc.Z',
       'mean_Chest.Gyr.X', 'std_Chest.Gyr.X', 'mean_Chest.Gyr.Y',
       'std_Chest.Gyr.Y', 'mean_Chest.Gyr.Z', 'std_Chest.Gyr.Z',
       'mean_Chest.Mag.X', 'std_Chest.Mag.X', 'mean_Chest.Mag.Y',
       'std_Chest.Mag.Y', 'mean_Chest.Mag.Z', 'std_Chest.Mag.Z',
       'mean_Neck.Acc.X', 'std_Neck.Acc.X', 'mean_Neck.Acc.Y',
       'std_Neck.Acc.Y', 'mean_Neck.Acc.Z', 'std_Neck.Acc.Z',
       'mean_Neck.Gyr.X', 'std_Neck.Gyr.X', 'mean_Neck.Gyr.Y',
       'std_Neck.Gyr.Y', 'mean_Neck.Gyr.Z', 

In [17]:
features_test = features_tsfel(df_test, w_size=100, w_overlap=0, t_time=timedelta(microseconds=10000), n_jobs=6)

KeyboardInterrupt: 

In [None]:
features_filt_50 = features_tsfel(df_filt, w_size=100, w_overlap=0.5, t_time=timedelta(microseconds=5000), n_jobs=6)

In [28]:
features_test_50 = features_tsfel(df_test, w_size=100, w_overlap=0.5, t_time=timedelta(microseconds=5000), n_jobs=6)

In [26]:
def get_axis(ax:str, dataframe:pd.DataFrame) -> pd.DataFrame: 
    """Filtra o Eixo de um dataframe"""
    dataframe_ax = pd.DataFrame()
    for col in dataframe.columns: 
        if col.endswith(ax):
            dataframe_ax[col] = dataframe[col] 
    return dataframe_ax

def get_axis_mean(ax: str, dataframe: pd.DataFrame) -> pd.DataFrame:
    """Retorna um Dataframe com com a mÃ©dia das mÃ©dias e dos desvios padrÃµes de um eixo especÃ­fico"""
    dataframe_ax_mean = pd.DataFrame()
    dataframe_ax_std = pd.DataFrame()

    for col in dataframe: 
        if col.endswith(ax):
            if col.startswith("mean"):
                dataframe_ax_mean[col] = dataframe[col]
            else: 
                dataframe_ax_std[col] = dataframe[col]
    
    dataframe_ax_mean = dataframe_ax_mean.mean(axis = 1 )
    dataframe_ax_std = dataframe_ax_std.mean(axis = 1 )
        
    return pd.DataFrame({f"mean_media_{ax}": dataframe_ax_mean, f"mean_Desviopadrao_{ax}": dataframe_ax_std})

def get_axis_sum(ax: str, dataframe: pd.DataFrame) -> pd.DataFrame: 
    """Faz a soma do desvio padrÃ£o que acontece em um eixo em especÃ­fico"""
    dataframe_ax_std = pd.DataFrame()

    for col in dataframe: 
        if col.endswith(ax):
            if col.startswith("std"):
                dataframe_ax_std[col] = dataframe[col]
            
    dataframe_ax_sum_std = dataframe_ax_std.sum(axis = 1 )

    return pd.DataFrame({f"soma_std_{ax}":dataframe_ax_sum_std})


def get_sensor(sensor: str, dataframe: pd.DataFrame) -> pd.DataFrame:
    dataframe_sensor = pd.DataFrame()
    for col in dataframe.columns: 
        if sensor in col: 
            dataframe_sensor[col] = dataframe[col]

    return dataframe_sensor

def df_num(df) -> pd.DataFrame:
    """Retorna um Dataframe apenas com as colunas numÃ©ricas"""
    df_numerico = df.select_dtypes(include=[np.number])
    return df_numerico

In [24]:
standing = features_filt[features_filt["Position"] == "standing"]
conjunto_sensores = pd.Series(features_filt.columns).str.extract(r'^(.*)\.(?:X|Y|Z)')[0].dropna().unique().tolist()
eixos = ("X", "Y", "Z")

dict_sensores = {}

for sensor in conjunto_sensores:
    df_sensor = get_sensor(sensor, standing)
    dict_valores = {}

    for ax in eixos:
        col_df = get_axis(ax, df_sensor)

        # caso 1: nÃ£o existe esse eixo â col_df vazio
        if col_df is None or len(col_df.columns if isinstance(col_df, pd.DataFrame) else col_df) == 0:
            dict_valores[f"mean_{ax}"] = None
            continue

        # caso 2: veio DataFrame â pegar a primeira coluna
        if isinstance(col_df, pd.DataFrame):
            col_series = col_df.iloc[:, 0]
        else:
            col_series = col_df

        dict_valores[f"mean_{ax}"] = float(col_series.mean())

    dict_sensores[sensor] = dict_valores

print(dict_sensores)

{'mean_Back.Acc': {'mean_X': 0.0618956376699029, 'mean_Y': 0.01379852145631068, 'mean_Z': 0.9869498473786407}, 'std_Back.Acc': {'mean_X': 0.0979174310921173, 'mean_Y': 0.09820910333145062, 'mean_Z': 0.06663172395320706}, 'mean_Back.Gyr': {'mean_X': -1.1458363484466016, 'mean_Y': -0.2317967234951452, 'mean_Z': -3.4489153192233015}, 'std_Back.Gyr': {'mean_X': 16.298904410002837, 'mean_Y': 19.86350777658396, 'mean_Z': 16.936061881382404}, 'mean_Back.Mag': {'mean_X': 19.145834980873786, 'mean_Y': -5.6031028651456305, 'mean_Z': 20.804228862427188}, 'std_Back.Mag': {'mean_X': 1.8688814338028301, 'mean_Y': 1.6277162269854113, 'mean_Z': 1.1976529140977075}, 'mean_Chest.Acc': {'mean_X': -0.2963278394174757, 'mean_Y': 0.06960418611650485, 'mean_Z': -0.9401505459223302}, 'std_Chest.Acc': {'mean_X': 0.10594172786426836, 'mean_Y': 0.12642492604125669, 'mean_Z': 0.09824890567319727}, 'mean_Chest.Gyr': {'mean_X': 1.675557064466019, 'mean_Y': 0.2508280814563108, 'mean_Z': 1.778455706019417}, 'std_Ches

In [None]:

# For each sensor compute Euclidean distance from its mean (using mean_ columns)
for sensor in conjunto_sensores:
    # get all columns for this sensor and keep only mean_ columns
    df_sensor = get_sensor(sensor, features_filt)
    # keep columns that start with 'mean_' and contain the sensor name
    if df_sensor.shape[1] > 0:
        df_sensor = df_sensor.loc[:, df_sensor.columns.str.startswith('mean_') & df_sensor.columns.str.contains(sensor)]

    dist_euc_sq = pd.Series(0.0, index=features_filt.index)
    for eixo in eixos:
        col_df = get_axis(eixo, df_sensor)
        if col_df is None or (isinstance(col_df, pd.DataFrame) and col_df.shape[1] == 0):
            continue
        if isinstance(col_df, pd.DataFrame):
            col_series = col_df.iloc[:, 0]
        else:
            col_series = col_df
        mean_ref = dict_sensores.get(sensor, {}).get(f"mean_{eixo}", 0.0)
        dist_euc_sq += (col_series - mean_ref) ** 2
    features_filt[f"dist_euc_{sensor}"] = np.sqrt(dist_euc_sq)

# Sum stds per axis (uses columns that start with 'std_' and end with axis)
for eixo in eixos:
    df_axis_sum = get_axis_sum(eixo, features_filt)
    features_filt[f"soma_std_{eixo}"] = df_axis_sum[f"soma_std_{eixo}"]

# Sum stds per sensor (columns that start with 'std_{sensor}')
for sensor in conjunto_sensores:
    s = pd.Series(0.0, index=features_filt.index)
    for col in features_filt.columns:
        if col.startswith(f"std_{sensor}"):
            s += features_filt[col]
    features_filt[f"sum_std_{sensor}"] = s

# keep Position from original dataframe if available
if 'Position' not in features_filt.columns and 'Position' in df_filt.columns:
    features_filt['Position'] = df_filt['Position']

print('Total columns:', len(features_filt.columns))
print([c for c in features_filt.columns if c.startswith(('mean_','std_','dist_euc_','sum_std_','soma_std_'))][:100])

n_cols_total = 95
['mean_Back.Acc.X', 'std_Back.Acc.X', 'mean_Back.Acc.Y', 'std_Back.Acc.Y', 'mean_Back.Acc.Z', 'std_Back.Acc.Z', 'mean_Back.Gyr.X', 'std_Back.Gyr.X', 'mean_Back.Gyr.Y', 'std_Back.Gyr.Y', 'mean_Back.Gyr.Z', 'std_Back.Gyr.Z', 'mean_Back.Mag.X', 'std_Back.Mag.X', 'mean_Back.Mag.Y', 'std_Back.Mag.Y', 'mean_Back.Mag.Z', 'std_Back.Mag.Z', 'mean_Chest.Acc.X', 'std_Chest.Acc.X', 'mean_Chest.Acc.Y', 'std_Chest.Acc.Y', 'mean_Chest.Acc.Z', 'std_Chest.Acc.Z', 'mean_Chest.Gyr.X', 'std_Chest.Gyr.X', 'mean_Chest.Gyr.Y', 'std_Chest.Gyr.Y', 'mean_Chest.Gyr.Z', 'std_Chest.Gyr.Z', 'mean_Chest.Mag.X', 'std_Chest.Mag.X', 'mean_Chest.Mag.Y', 'std_Chest.Mag.Y', 'mean_Chest.Mag.Z', 'std_Chest.Mag.Z', 'mean_Neck.Acc.X', 'std_Neck.Acc.X', 'mean_Neck.Acc.Y', 'std_Neck.Acc.Y', 'mean_Neck.Acc.Z', 'std_Neck.Acc.Z', 'mean_Neck.Gyr.X', 'std_Neck.Gyr.X', 'mean_Neck.Gyr.Y', 'std_Neck.Gyr.Y', 'mean_Neck.Gyr.Z', 'std_Neck.Gyr.Z', 'mean_Neck.Mag.X', 'std_Neck.Mag.X']


In [13]:
columns_mean_filt = df_filt.columns[df_filt.columns.str.contains(r'\.(X|Y|Z)_Mean$', regex=True)]
columns_mean_test = df_test.columns[df_test.columns.str.contains(r'\.(X|Y|Z)_Mean$', regex=True)]

dados_mean_filt = df_filt[columns_mean_filt]
dados_mean_test = df_test[columns_mean_test]

x_treino = dados_mean_filt
y_treino = df_filt["Position"]
x_teste = dados_mean_test
y_teste = df_test["Position"]

rf = RandomForestClassifier(
    n_estimators=300,        # nÃºmero de Ã¡rvores
    max_depth=12,          # deixe None inicialmente
    min_samples_split=2,
    min_samples_leaf=1,     # padrÃ£o bom para classificaÃ§Ã£o
    bootstrap=True,
    n_jobs=6,
    random_state=42
)

rf.fit(x_treino, y_treino)
y_pred = rf.predict(x_teste)

acc = accuracy_score(y_teste, y_pred)
print(f"AcurÃ¡cia: {acc:.4f}")

print(classification_report(y_teste, y_pred))

cm = confusion_matrix(y_teste, y_pred)
print(cm)

  columns_mean_filt = df_filt.columns[df_filt.columns.str.contains(r'\.(X|Y|Z)_Mean$', regex=True)]
  columns_mean_test = df_test.columns[df_test.columns.str.contains(r'\.(X|Y|Z)_Mean$', regex=True)]


ValueError: at least one array or dtype is required

In [None]:
columns_std_filt = df_filt.columns[df_filt.columns.str.contains(r'\.(X|Y|Z)_Standard deviation$', regex=True)]
columns_std_test = df_test.columns[df_test.columns.str.contains(r'\.(X|Y|Z)_Standard deviation$', regex=True)]
dados_std_filt = df_filt[columns_std_filt]
dados_std_test = df_test[columns_std_test]
x_treino_std = dados_std_filt
x_teste_std = dados_std_test
y_treino_std = df_filt["Position"]
y_teste_std = df_test["Position"]

rf = RandomForestClassifier(
    n_estimators=300,        # nÃºmero de Ã¡rvores
    max_depth=12,          # deixe None inicialmente
    min_samples_split=2,
    min_samples_leaf=1,     # padrÃ£o bom para classificaÃ§Ã£o
    bootstrap=True,
    n_jobs=6,
    random_state=42
)

rf.fit(x_treino_std, y_treino_std)
y_pred_std = rf.predict(x_teste_std)

acc_std = accuracy_score(y_teste_std, y_pred_std)
print(f"AcurÃ¡cia (std): {acc_std:.4f}")

print(classification_report(y_teste_std, y_pred_std))
cm_std = confusion_matrix(y_teste_std, y_pred_std)
print(cm_std)


  columns_std_filt = df_filt.columns[df_filt.columns.str.contains(r'\.(X|Y|Z)_Standard deviation$', regex=True)]
  columns_std_test = df_test.columns[df_test.columns.str.contains(r'\.(X|Y|Z)_Standard deviation$', regex=True)]


AcurÃ¡cia (std): 0.8578
              precision    recall  f1-score   support

  body shake       0.00      0.00      0.00         4
  lying down       0.00      0.00      0.00         0
     sitting       0.84      0.92      0.88       220
    standing       0.82      0.70      0.75       127
     walking       0.97      0.99      0.98        78

    accuracy                           0.86       429
   macro avg       0.53      0.52      0.52       429
weighted avg       0.85      0.86      0.85       429

[[  0   0   0   2   2]
 [  0   0   0   0   0]
 [  0   1 202  17   0]
 [  0   0  38  89   0]
 [  0   0   0   1  77]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [None]:
# # Definindo os hiperparÃ¢metros para Grid Search (3 valores para cada)
# param_grid = {
#     'n_estimators': [100, 200, 300],           # nÃºmero de Ã¡rvores
#     'max_depth': [10, 15, 20],                # profundidade mÃ¡xima
#     'min_samples_leaf': [10, 20, 30]         # mÃ­nimo de amostras por folha
# }

# # Criando o modelo base
# rf_base = RandomForestClassifier(
#     n_estimators=300,        # nÃºmero de Ã¡rvores
#     max_depth=12,          # deixe None inicialmente
#     min_samples_split=2,
#     min_samples_leaf=1,     # padrÃ£o bom para classificaÃ§Ã£o
#     bootstrap=True,
#     n_jobs=6,
#     random_state=42
# )

# # Grid Search com Cross-Validation de 5 folds
# # print("Iniciando Grid Search com 5-fold Cross-Validation...")
# # print(f"Total de combinaÃ§Ãµes: {len(param_grid['n_estimators']) * len(param_grid['max_depth']) * len(param_grid['min_samples_leaf'])}")
# # print("="*60)

# # grid_search = GridSearchCV(
# #     estimator=rf_base,
# #     param_grid=param_grid,
# #     cv=5,                    # 5 folds
# #     scoring='accuracy',
# #     n_jobs=6,
# #     verbose=2,
# #     return_train_score=True
# # )

