# Reading Files

In [181]:
import os

import pandas as pd
import networkx as nx
import numpy as np

from fastdtw import fastdtw
from tqdm import tqdm
from node2vec import Node2Vec
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV

In [None]:
def batch_read(path: str) -> list[pd.DataFrame]:
    df_list = []
    for file in tqdm(os.listdir(path)):
        df = pd.read_csv(f'{path}/{file}')
        df_list.append(df)
    return df_list

In [3]:
control_data = batch_read('../data/Controls_columns')

100%|██████████| 66/66 [00:04<00:00, 16.08it/s]


In [4]:
parkinson_data = batch_read('../data/PDs_columns')

100%|██████████| 113/113 [00:07<00:00, 16.02it/s]


## Create Graphs

Demora muito para calcular a rede funcional com DTW

In [219]:
def dtw_distance(time_series1: np.array, time_series2: np.array) -> float:
    distance, _ = fastdtw(time_series1.reshape(-1, 1), time_series2.reshape(-1, 1), dist=euclidean)
    return distance

def compute_functional_network(time_series: np.array, distance_function: callable) -> np.array:
    n = time_series.shape[0]
    functional_network = np.zeros((n, n))
    for i in tqdm(range(1, n), leave=True):
        for j in tqdm(range(i+1, n), leave=False):
            distance = distance_function(time_series[i].reshape(-1, 1), time_series[j].reshape(-1, 1))
            functional_network[i, j] = distance
            functional_network[j, i] = distance
    return functional_network

In [None]:
compute_functional_network(parkinson_data[0].to_numpy(), dtw_distance)

Vamos utilizar Pearson

### Teste com a Triangular Superior

In [156]:
upper_triangular_indices = np.triu_indices(240)

parkinson_correlation_matrix = [time_series.T.corr().to_numpy()[upper_triangular_indices] for time_series in parkinson_data]
control_correlation_matrix = [time_series.T.corr().to_numpy()[upper_triangular_indices] for time_series in control_data]

In [None]:
X = np.concatenate([
    parkinson_correlation_matrix,
    control_correlation_matrix
], axis=0)

In [160]:
y = np.concatenate([
    [1 for _ in range(len(parkinson_data))],
    [0 for _ in range(len(control_data))]
])

#### Train/Test Split

In [163]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [202]:
params = {
    'hidden_layer_sizes': (100, 100, 100),
    'alpha': np.arange(1e-4, 1e-2, 1e-3),
    'learning_rate': ['constant', 'adaptive'],
}

#### Training

In [210]:
model = MLPClassifier(early_stopping=True)
hyperparam_optimization = RandomizedSearchCV(model, params, random_state=1)
search = hyperparam_optimization.fit(X_train, y_train)
search.best_params_

{'learning_rate': 'constant', 'hidden_layer_sizes': 100, 'alpha': 0.0001}

In [211]:
optimized_model = MLPClassifier(early_stopping=True, **search.best_params_).fit(X_train, y_train)
optimized_model.score(X_test, y_test)

0.5777777777777777

In [213]:
optimized_model.predict(X_train)

array([1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       1, 1])

### Node2Vec

Não tá funfando por hora. Talvez a matriz de correlação/Grafo precise ser Positiva Semi Definida (não pode ter valores negativos e umas propriedades a mais ai)

In [216]:
graph = nx.from_numpy_array(
    parkinson_data[0].T.corr().to_numpy()
)

node2vec = Node2Vec(graph, dimensions=64, walk_length=30, num_walks=200, workers=4)

n2v_model = node2vec.fit(window=10, min_count=1, batch_words=4)

Computing transition probabilities: 100%|██████████| 240/240 [00:28<00:00,  8.39it/s]
Generating walks (CPU: 1):   0%|          | 0/50 [00:00<?, ?it/s]

ValueError: probabilities are not non-negative

In [224]:
model.wv.vectors # para encontrar o embedding

array([[ 4.10542369e-01, -8.13800544e-02,  1.05252430e-01,
         3.20213556e-01,  2.94262171e-01, -8.20183694e-01,
        -7.89105952e-01, -1.98348071e-02,  6.49442732e-01,
         2.30842590e-01],
       [ 4.11620647e-01,  4.74907979e-02,  1.99726164e-01,
         2.47501627e-01,  2.30991051e-01, -8.57504308e-01,
        -7.93270350e-01, -4.75351214e-02,  6.18805051e-01,
         3.10785681e-01],
       [ 3.62728894e-01, -3.80808376e-02,  1.49629399e-01,
         3.38716775e-01,  2.45367005e-01, -8.47277462e-01,
        -7.47303724e-01,  3.06409388e-03,  6.56970203e-01,
         3.15002412e-01],
       [ 3.93635809e-01, -1.44947495e-03,  1.27001777e-01,
         3.04565161e-01,  3.16155165e-01, -8.29582632e-01,
        -7.38255560e-01, -5.62160928e-03,  6.67009890e-01,
         3.20622265e-01],
       [ 3.99246514e-01,  1.95658579e-02,  1.65241390e-01,
         3.42297286e-01,  3.17477643e-01, -7.76112199e-01,
        -7.18523741e-01, -4.24726196e-02,  7.02263057e-01,
         3.