### Objetivo Algoritmo
Desenvolver um modelo para detectar e prever episódios de FOG de Parkinson. Esses episódios serão previstos com base em dados de séries temporais registrados para cada paciente durante a execução de um protocolo específico, além de algumas características fornecidas do paciente.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import itertools
import numpy as np
import pandas as pd
import warnings
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import lightgbm as lgb
from lightgbm import LGBMClassifier
import os
import matplotlib.pyplot as plt
import seaborn as sns

# Remover avisos para evitar poluição visual
warnings.filterwarnings(action="ignore", category=DeprecationWarning)
warnings.filterwarnings(action="ignore", category=FutureWarning)

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [6]:
from joblib import dump, load

# Parâmetros otimizados
WINDOW_SIZE = 200
SAMPLE_FRAC = 1
CALC_TYPE = 'momentos'

# Função para carregar e processar dados
def load_and_preprocess_data(path, sample_frac=None):
    if os.path.isdir(path):
        data_list = []
        for file_name in os.listdir(path):
            if file_name.endswith('.csv'):
                file_path = os.path.join(path, file_name)
                df = pd.read_csv(file_path)
                df['file_name'] = file_name.replace('.csv', '')
                df['IsFOG'] = df[['StartHesitation', 'Walking', 'Turn']].any(axis='columns').astype(int)
                data_list.append(df)
        # Concatenar todos os DataFrames em um único DataFrame
        full_data = pd.concat(data_list, ignore_index=True)
    else:
        # Carregar diretamente se for um único arquivo
        full_data = pd.read_csv(path)
        full_data['file_name'] = os.path.basename(path).replace('.csv', '')
        full_data['IsFOG'] = full_data[['StartHesitation', 'Walking', 'Turn']].any(axis='columns').astype(int)

    if sample_frac:
        # Amostragem sequencial dos dados
        n_samples = int(len(full_data) * sample_frac)
        full_data = full_data.iloc[:n_samples]

    return full_data

# Função para adicionar características de janela móvel com cálculo de momentos
def add_rolling_window_features(data, window_size):
    feature_columns = ['AccV', 'AccML', 'AccAP']
    for axis in feature_columns:
        data[f'{axis}_rolling_kurtosis'] = data[axis].rolling(window=window_size, min_periods=1).kurt()
        data[f'{axis}_rolling_skewness'] = data[axis].rolling(window=window_size, min_periods=1).skew()
        data[f'{axis}_rolling_var'] = data[axis].rolling(window=window_size, min_periods=1).var()
        data[f'{axis}_rolling_mean'] = data[axis].rolling(window=window_size, min_periods=1).mean()

    data.dropna(inplace=True)
    return data

# Função para treinar e usar o modelo com as configurações otimizadas
def run_model():
    # Carregar e processar os dados de treino
    train_data = load_and_preprocess_data('/content/drive/MyDrive/UFOP/trabalhoAnalise/train', sample_frac=SAMPLE_FRAC)
    train_data = add_rolling_window_features(train_data, WINDOW_SIZE)

    # Seleção de Features e Labels
    features = ['Time', 'AccV', 'AccML', 'AccAP',
                'AccV_rolling_kurtosis', 'AccV_rolling_skewness', 'AccV_rolling_var', 'AccV_rolling_mean',
                'AccML_rolling_kurtosis', 'AccML_rolling_skewness', 'AccML_rolling_var', 'AccML_rolling_mean',
                'AccAP_rolling_kurtosis', 'AccAP_rolling_skewness', 'AccAP_rolling_var', 'AccAP_rolling_mean']

    train_features = train_data[features]
    train_labels = train_data['IsFOG']

    # Definindo e treinando o modelo
    model = LGBMClassifier(
        n_estimators=100,
        num_leaves=30,
        max_depth=10,
        learning_rate=0.1,
        is_unbalance=True,
        objective='binary'
    )
    model.fit(train_features, train_labels)
    # Salvando o modelo treinado
    dump(model, '/content/drive/MyDrive/UFOP/trabalhoAnalise/model_fog_detection.joblib')

    # Carregar e processar os dados de teste
    test_data = load_and_preprocess_data('/content/drive/MyDrive/UFOP/trabalhoAnalise/test')
    test_data = add_rolling_window_features(test_data, WINDOW_SIZE)
    test_features = test_data[features]
    test_labels = test_data['IsFOG']

    # Realizar previsões no conjunto de teste
    y_pred_test = model.predict(test_features)

    # Calculando métricas do modelo
    accuracy = accuracy_score(test_labels, y_pred_test)
    precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred_test, average='binary')

    # Exibir resultados
    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1 Score: {f1}')

# Executar o modelo com a configuração otimizada
run_model()


[LightGBM] [Info] Number of positive: 1807501, number of negative: 3904206
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.313042 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4080
[LightGBM] [Info] Number of data points in the train set: 5711707, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.316455 -> initscore=-0.770109
[LightGBM] [Info] Start training from score -0.770109
Accuracy: 0.8564593583620969
Precision: 0.7168318874130443
Recall: 0.8146279456850452
F1 Score: 0.7626073831313079


In [8]:
def use_model_with_new_data(new_data_path, output_csv_path):
    # Carregar o modelo salvo
    model = load('/content/drive/MyDrive/UFOP/trabalhoAnalise/model_fog_detection.joblib')

    # Carregar e processar os novos dados
    new_data = load_and_preprocess_data(new_data_path)
    new_data = add_rolling_window_features(new_data, WINDOW_SIZE)

    # Seleção de Features
    features = ['Time', 'AccV', 'AccML', 'AccAP',
                'AccV_rolling_kurtosis', 'AccV_rolling_skewness', 'AccV_rolling_var', 'AccV_rolling_mean',
                'AccML_rolling_kurtosis', 'AccML_rolling_skewness', 'AccML_rolling_var', 'AccML_rolling_mean',
                'AccAP_rolling_kurtosis', 'AccAP_rolling_skewness', 'AccAP_rolling_var', 'AccAP_rolling_mean']

    new_features = new_data[features]

    # Fazer previsões nos novos dados
    y_pred_new = model.predict(new_features)

    # Adicionar as previsões ao DataFrame
    new_data['IsFOG_pred'] = y_pred_new

    # Exibir alguns resultados (opcional)
    print(new_data[['Time', 'AccV', 'AccML', 'AccAP', 'IsFOG_pred']].head())  # Mostrando as primeiras previsões

    # Salvar o DataFrame em um arquivo CSV
    new_data.to_csv(output_csv_path, index=False)  # Salva o arquivo sem o índice

    print(f"Resultados salvos em: {output_csv_path}")

    return new_data  # Retornando os dados com as previsões para uso posterior

# Caminho dos novos dados e do CSV de saída
new_data_path = '/content/drive/MyDrive/UFOP/trabalhoAnalise/test/2d481ad987.csv'
output_csv_path = '/content/drive/MyDrive/UFOP/trabalhoAnalise/predictions_output.csv'

# Fazer previsões nos novos dados e salvar em CSV
result = use_model_with_new_data(new_data_path, output_csv_path)


   Time       AccV     AccML     AccAP  IsFOG_pred
3     3  -9.690020  0.811827 -2.256484           0
4     4  -9.679607  0.875916 -2.343682           0
5     5  -9.729980  0.987272 -2.577944           0
6     6  -9.866057  0.923549 -2.725240           0
7     7 -10.077593  0.940298 -2.692860           0
Resultados salvos em: /content/drive/MyDrive/UFOP/trabalhoAnalise/predictions_output.csv
