In [1]:
import time
import random
import numpy as np
import pandas as pd
from typing import TypeVar, Callable

from sklearn.preprocessing import MinMaxScaler
from sktime.classification import BaseClassifier
from sklearn.model_selection import StratifiedKFold
from sktime.classification.kernel_based import RocketClassifier
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, matthews_corrcoef, classification_report, multilabel_confusion_matrix

In [2]:
raw_csv_data: pd.DataFrame = pd.read_csv('time_series.csv') 
df: pd.DataFrame = raw_csv_data.copy() 

In [3]:
df.head()

Unnamed: 0,id,timestamp,date,activity,owner,gender,melanch,inpatient,marriage,work,afftype,binary_class
0,0,2003-05-08 00:00:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed
1,0,2003-05-08 00:01:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed
2,0,2003-05-08 00:02:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed
3,0,2003-05-08 00:03:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed
4,0,2003-05-08 00:04:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed


In [4]:
def to_2D_array(df: pd.DataFrame, n_days: int, variable: str) -> np.ndarray:
    n_users: int = len(df.id.unique())
    arr: np.ndarray = df[variable].values.reshape(n_users, n_days)
    return arr

In [5]:
X = to_2D_array(df, 1440, 'activity')
X.shape

(1029, 1440)

In [6]:
scaler = MinMaxScaler()

n_samples, n_timesteps = X.shape
X_reshaped = X.reshape(-1, 1)

X = scaler.fit_transform(X_reshaped)
X = X.reshape(n_samples, n_timesteps)

In [7]:
y = df.iloc[[ i*1440 for i in range(1029) ], -1].values
y.shape

(1029,)

In [8]:
Classifier = TypeVar('Classifier', bound=BaseClassifier)
def run_classifier(clf: Classifier, X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) -> dict:
    clf.fit(X_train, y_train) 
    y_pred: np.ndarray = clf.predict(X_test)
    return y_pred

In [50]:
def specificity_score(y_test, y_pred):
    global classes
    mcm = multilabel_confusion_matrix(y_test, y_pred, labels=classes)
    specificity_per_class = []
    weights_per_class = []
    for i in range(len(mcm)):
        tn, fp, fn, tp = mcm[i].ravel()
        specificity = tn / (tn + fp)
        specificity_per_class.append(specificity)
        weights_per_class.append(fn + tp)
    return specificity_per_class, weights_per_class

In [51]:
def evaluate_classifier(y_test, y_pred):
    global classes
    metrics = {}
    
    # Cálculo das métricas para cada classe individualmente
    for cls in classes:
        cls_metrics = {}
        y_test_bin = (y_test == cls).astype(int)
        y_pred_bin = (y_pred == cls).astype(int)

        cls_metrics['Precision'] = precision_score(y_test_bin, y_pred_bin, zero_division=0)
        cls_metrics['Recall/Sensitivity'] = recall_score(y_test_bin, y_pred_bin, zero_division=0)
        cls_metrics['Accuracy'] = accuracy_score(y_test_bin, y_pred_bin)
        cls_metrics['MCC'] = matthews_corrcoef(y_test_bin, y_pred_bin)
        cls_metrics['F1-Score'] = f1_score(y_test_bin, y_pred_bin, zero_division=0)
        
        metrics[cls] = cls_metrics
    
    # Cálculo das métricas com média ponderada (weighted average)
    weighted_metrics = {}
    weighted_metrics['Precision'] = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    weighted_metrics['Recall/Sensitivity'] = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    weighted_metrics['F1-Score'] = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    weighted_metrics['Accuracy'] = accuracy_score(y_test, y_pred)
    weighted_metrics['MCC'] = matthews_corrcoef(y_test, y_pred)
    
    # Cálculo da Especificidade
    specificity_per_class, weights_per_class = specificity_score(y_test, y_pred)
    for i, cls in enumerate(classes):
        metrics[cls]['Specificity'] = specificity_per_class[i]
    weighted_metrics['Specificity'] = np.average(specificity_per_class, weights=weights_per_class)
    
    metrics['weighted average'] = weighted_metrics
    
    # Exibir as métricas
    for cls, cls_metrics in metrics.items():
        print(f"Class {cls}:")
        for metric_name, value in cls_metrics.items():
            print(f"  {metric_name}: {value:.5f}")
        print()

    return metrics

In [11]:
count: int = 0
metrics = dict()
metrics['Time Series Forest'] = []
metrics['ROCKET'] = []
classes = df['binary_class'].unique()
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
for train_index, test_index in cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    tsf = TimeSeriesForestClassifier(random_state=42, n_jobs=-1)
    rocket = RocketClassifier(random_state=42, n_jobs=-1)
    
    count += 1 
    print(f'Iteração {count}: ')
    print("Time Series Forest")
    y_pred = run_classifier(tsf, X_train, X_test, y_train)
    metrics['Time Series Forest'].append(evaluate_classifier(y_test, y_pred))
    print("\nROCKET")
    y_pred = run_classifier(rocket, X_train, X_test, y_train)
    metrics['ROCKET'].append(evaluate_classifier(y_test, y_pred))
    print('\n')

Iteração 1: 
Time Series Forest
Class depressed:
  Precision: 0.72727
  Recall/Sensitivity: 0.66667
  Accuracy: 0.79612
  MCC: 0.54396
  F1-Score: 0.69565
  Specificity: 0.86567

Class nondepressed:
  Precision: 0.82857
  Recall/Sensitivity: 0.86567
  Accuracy: 0.79612
  MCC: 0.54396
  F1-Score: 0.84672
  Specificity: 0.66667

Class weighted average:
  Precision: 0.79317
  Recall/Sensitivity: 0.79612
  F1-Score: 0.79392
  Accuracy: 0.79612
  MCC: 0.54396
  Specificity: 0.73622


ROCKET
Class depressed:
  Precision: 0.73529
  Recall/Sensitivity: 0.69444
  Accuracy: 0.80583
  MCC: 0.56794
  F1-Score: 0.71429
  Specificity: 0.86567

Class nondepressed:
  Precision: 0.84058
  Recall/Sensitivity: 0.86567
  Accuracy: 0.80583
  MCC: 0.56794
  F1-Score: 0.85294
  Specificity: 0.69444

Class weighted average:
  Precision: 0.80378
  Recall/Sensitivity: 0.80583
  F1-Score: 0.80448
  Accuracy: 0.80583
  MCC: 0.56794
  Specificity: 0.75429



Iteração 2: 
Time Series Forest
Class depressed:
  Preci

In [55]:
def calculate_mean_metrics(metrics_list, classes):
    mean_metrics = {cls: {key: np.mean([fold_metrics[cls][key] for fold_metrics in metrics_list]) for key in metrics_list[0][cls]} for cls in classes}
    return mean_metrics

In [13]:
rows = []
for classifier, metric_list in metrics.items():
    mean_metrics = calculate_mean_metrics(metric_list, ['depressed', 'nondepressed', 'weighted average'])
    for subclass in metric_list[0].keys():
        rows.append((classifier, subclass, mean_metrics[subclass]))

In [14]:
metrics_df = pd.DataFrame([row[2] for row in rows], index=pd.MultiIndex.from_tuples([(row[0], row[1]) for row in rows], names=['Classifier', 'Subclass']))

In [15]:
display(metrics_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Precision,Recall/Sensitivity,Accuracy,MCC,F1-Score,Specificity
Classifier,Subclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Time Series Forest,depressed,0.741585,0.665794,0.801723,0.55588,0.700089,0.874627
Time Series Forest,nondepressed,0.830621,0.874627,0.801723,0.55588,0.851699,0.665794
Time Series Forest,weighted average,0.799608,0.801723,0.801723,0.55588,0.798805,0.738698
ROCKET,depressed,0.714866,0.615556,0.780354,0.503494,0.660512,0.868657
ROCKET,nondepressed,0.808948,0.868657,0.780354,0.503494,0.837518,0.615556
ROCKET,weighted average,0.776145,0.780354,0.780354,0.503494,0.775773,0.703858


In [16]:
depressed_df = df.loc[df['binary_class']=='depressed'].copy()
print(f'Number of days: {depressed_df.shape[0]//1440}')

Number of days: 359


In [17]:
depressed_df.sort_values(by='afftype', inplace=True)

In [18]:
X = to_2D_array(depressed_df, 1440, 'activity')
X.shape

(359, 1440)

In [19]:
scaler = MinMaxScaler()

n_samples, n_timesteps = X.shape
X_reshaped = X.reshape(-1, 1)

X = scaler.fit_transform(X_reshaped)
X = X.reshape(n_samples, n_timesteps)

In [20]:
y = depressed_df.iloc[[ i*1440 for i in range(X.shape[0]) ], -2].values
y.shape

(359,)

In [56]:
classes = ['bipolar', 'unipolar depressive']

In [57]:
count: int = 0
h_metrics = dict()
h_metrics['Time Series Forest'] = []
h_metrics['ROCKET'] = []
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
for train_index, test_index in cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    tsf = TimeSeriesForestClassifier(random_state=42, n_jobs=-1)
    rocket = RocketClassifier(random_state=42, n_jobs=-1)
    
    count += 1 
    print(f'Iteração {count}: ')
    print("Time Series Forest")
    y_pred = run_classifier(tsf, X_train, X_test, y_train)
    h_metrics['Time Series Forest'].append(evaluate_classifier(y_test, y_pred))
    print("\nROCKET")
    y_pred = run_classifier(rocket, X_train, X_test, y_train)
    h_metrics['ROCKET'].append(evaluate_classifier(y_test, y_pred))
    print('\n')

Iteração 1: 
Time Series Forest
Class bipolar:
  Precision: 0.40000
  Recall/Sensitivity: 0.15385
  Accuracy: 0.61111
  MCC: 0.03252
  F1-Score: 0.22222
  Specificity: 0.86957

Class unipolar depressive:
  Precision: 0.64516
  Recall/Sensitivity: 0.86957
  Accuracy: 0.61111
  MCC: 0.03252
  F1-Score: 0.74074
  Specificity: 0.15385

Class weighted average:
  Precision: 0.55663
  Recall/Sensitivity: 0.61111
  F1-Score: 0.55350
  Accuracy: 0.61111
  MCC: 0.03252
  Specificity: 0.41230


ROCKET
Class bipolar:
  Precision: 0.50000
  Recall/Sensitivity: 0.30769
  Accuracy: 0.63889
  MCC: 0.15456
  F1-Score: 0.38095
  Specificity: 0.82609

Class unipolar depressive:
  Precision: 0.67857
  Recall/Sensitivity: 0.82609
  Accuracy: 0.63889
  MCC: 0.15456
  F1-Score: 0.74510
  Specificity: 0.30769

Class weighted average:
  Precision: 0.61409
  Recall/Sensitivity: 0.63889
  F1-Score: 0.61360
  Accuracy: 0.63889
  MCC: 0.15456
  Specificity: 0.49489



Iteração 2: 
Time Series Forest
Class bipolar:

In [58]:
h_rows = []
for classifier, metric_list in h_metrics.items():
    mean_metrics = calculate_mean_metrics(metric_list, ['bipolar', 'unipolar depressive', 'weighted average'])
    for subclass in metric_list[0].keys():
        h_rows.append((classifier, subclass, mean_metrics[subclass]))

In [59]:
h_metrics_df = pd.DataFrame([row[2] for row in h_rows], index=pd.MultiIndex.from_tuples([(row[0], row[1]) for row in h_rows], names=['Classifier', 'Subclass']))

In [60]:
display(h_metrics_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Precision,Recall/Sensitivity,Accuracy,MCC,F1-Score,Specificity
Classifier,Subclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Time Series Forest,bipolar,0.626944,0.332692,0.696111,0.27469,0.427471,0.89221
Time Series Forest,unipolar depressive,0.713172,0.89221,0.696111,0.27469,0.792081,0.332692
Time Series Forest,weighted average,0.682897,0.696111,0.696111,0.27469,0.664372,0.528791
ROCKET,bipolar,0.616984,0.396154,0.696429,0.293531,0.47238,0.858696
ROCKET,unipolar depressive,0.725654,0.858696,0.696429,0.293531,0.785215,0.396154
ROCKET,weighted average,0.687808,0.696429,0.696429,0.293531,0.675693,0.558421
