In [1]:
import time
import random
import numpy as np
import pandas as pd
from typing import TypeVar, Callable

from sklearn.preprocessing import MinMaxScaler
from sktime.classification import BaseClassifier
from sklearn.model_selection import StratifiedKFold
from sktime.classification.kernel_based import RocketClassifier
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, matthews_corrcoef, classification_report

In [2]:
raw_csv_data: pd.DataFrame = pd.read_csv('time_series.csv') 
df: pd.DataFrame = raw_csv_data.copy() 

In [3]:
df.head()

Unnamed: 0,id,timestamp,date,activity,owner,class,binary_class
0,0,2003-05-08 00:00:00,2003-05-08,0,condition_1,unipolar depressive,depressed
1,0,2003-05-08 00:01:00,2003-05-08,0,condition_1,unipolar depressive,depressed
2,0,2003-05-08 00:02:00,2003-05-08,0,condition_1,unipolar depressive,depressed
3,0,2003-05-08 00:03:00,2003-05-08,0,condition_1,unipolar depressive,depressed
4,0,2003-05-08 00:04:00,2003-05-08,0,condition_1,unipolar depressive,depressed


In [4]:
def to_2D_array(df: pd.DataFrame, n_days: int, variable: str) -> np.ndarray:
    n_users: int = len(df.id.unique())
    arr: np.ndarray = df[variable].values.reshape(n_users, n_days)
    return arr

In [5]:
X = to_2D_array(df, 1440, 'activity')
X.shape

(1029, 1440)

In [6]:
scaler = MinMaxScaler()

n_samples, n_timesteps = X.shape
X_reshaped = X.reshape(-1, 1)

X = scaler.fit_transform(X_reshaped)
X = X.reshape(n_samples, n_timesteps)

In [7]:
y = df.iloc[[ i*1440 for i in range(1029) ], -1].values
y.shape

(1029,)

In [8]:
def format_seconds(time_in_seconds: float) -> str:
    hours: int = int(time_in_seconds // 3600)
    minutes: int = int((time_in_seconds % 3600) // 60)
    seconds: int = int(time_in_seconds % 60)
    
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"

In [9]:
Classifier = TypeVar('Classifier', bound=BaseClassifier)
def run_classifier(clf: Classifier, X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) -> dict:
    clf.fit(X_train, y_train) 
    y_pred: np.ndarray = clf.predict(X_test)
    return y_pred

In [10]:
def evaluate_classifier(y_test, y_pred):
    print(classification_report(y_test, y_pred, digits=5, zero_division=np.nan))
    print()
    # Inicializa o dicionário de métricas
    metrics = {}
    
    # Lista das classes
    classes, y_test_numeric = np.unique(y_test, return_inverse=True)
    weights = np.bincount(y_test_numeric)
    
    # Cálculo das métricas para cada classe individualmente
    for i, cls in enumerate(classes):
        cls_metrics = {}
        y_test_bin = (y_test == cls).astype(int)
        y_pred_bin = (y_pred == cls).astype(int)

        cls_metrics['Precision'] = precision_score(y_test_bin, y_pred_bin, zero_division=0)
        cls_metrics['Recall/Sensitivity'] = recall_score(y_test_bin, y_pred_bin, zero_division=0)
        cls_metrics['Accuracy'] = accuracy_score(y_test_bin, y_pred_bin)
        cls_metrics['MCC'] = matthews_corrcoef(y_test_bin, y_pred_bin)
        cls_metrics['F1-Score'] = f1_score(y_test_bin, y_pred_bin, zero_division=0)
        
        cm = confusion_matrix(y_test_bin, y_pred_bin)
        tn = cm[0, 0]
        fp = cm[0, 1]
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        cls_metrics['Specificity'] = specificity
        
        metrics[cls] = cls_metrics
    
    # Cálculo das métricas com média ponderada (weighted average)
    weighted_metrics = {}
    weighted_metrics['Precision'] = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    weighted_metrics['Recall/Sensitivity'] = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    weighted_metrics['F1-Score'] = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    weighted_metrics['Accuracy'] = accuracy_score(y_test, y_pred)
    weighted_metrics['MCC'] = matthews_corrcoef(y_test, y_pred)
    
    # Cálculo da Especificidade ponderada
    cm = confusion_matrix(y_test, y_pred, labels=classes)
    specificity_per_class = []
    for i in range(len(classes)):
        tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        specificity_per_class.append(specificity)
        
    weighted_metrics['Specificity'] = np.average(specificity_per_class, weights=weights)
    
    metrics['weighted average'] = weighted_metrics
    
    # Exibir as métricas
    for cls, cls_metrics in metrics.items():
        print(f"Class {cls}:")
        for metric_name, value in cls_metrics.items():
            print(f"  {metric_name}: {value:.5f}")
        print()

    return metrics

In [11]:
start_time: float = time.process_time()

count: int = 0
metrics_tsf = []
metrics_rocket = []
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
for train_index, test_index in cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    tsf = TimeSeriesForestClassifier(random_state=42, n_jobs=-1)
    rocket = RocketClassifier(random_state=42, n_jobs=-1)
    
    count += 1 
    print(f'Iteração {count}: ')
    print("Time Series Forest")
    y_pred = run_classifier(tsf, X_train, X_test, y_train)
    metrics_tsf.append(evaluate_classifier(y_test, y_pred))
    print("\nROCKET")
    y_pred = run_classifier(rocket, X_train, X_test, y_train)
    metrics_rocket.append(evaluate_classifier(y_test, y_pred))
    print('\n')

end_time: float = time.process_time()
cpu_execution_time: float = end_time - start_time
print(f'CPU Execution time: {format_seconds(cpu_execution_time)}')

Iteração 1: 
Time Series Forest
Class depressed:
  Precision: 0.72727
  Recall/Sensitivity: 0.66667
  Accuracy: 0.79612
  MCC: 0.54396
  F1-Score: 0.69565
  Specificity: 0.86567

Class nondepressed:
  Precision: 0.82857
  Recall/Sensitivity: 0.86567
  Accuracy: 0.79612
  MCC: 0.54396
  F1-Score: 0.84672
  Specificity: 0.66667

Class weighted average:
  Precision: 0.79317
  Recall/Sensitivity: 0.79612
  F1-Score: 0.79392
  Accuracy: 0.79612
  MCC: 0.54396
  Specificity: 0.73622


ROCKET
Class depressed:
  Precision: 0.73529
  Recall/Sensitivity: 0.69444
  Accuracy: 0.80583
  MCC: 0.56794
  F1-Score: 0.71429
  Specificity: 0.86567

Class nondepressed:
  Precision: 0.84058
  Recall/Sensitivity: 0.86567
  Accuracy: 0.80583
  MCC: 0.56794
  F1-Score: 0.85294
  Specificity: 0.69444

Class weighted average:
  Precision: 0.80378
  Recall/Sensitivity: 0.80583
  F1-Score: 0.80448
  Accuracy: 0.80583
  MCC: 0.56794
  Specificity: 0.75429



Iteração 2: 
Time Series Forest
Class depressed:
  Preci

In [None]:
def calculate_mean_metrics(metrics_list):
    mean_metrics = {key: np.mean([metric[key] for metric in metrics_list]) for key in metrics_list[0]}
    return mean_metrics

In [None]:
classes = np.unique(y)
mean_metrics_tsf = calculate_mean_metrics(metrics_tsf, classes.tolist() + ['weighted average'])
mean_metrics_rocket = calculate_mean_metrics(metrics_rocket, classes.tolist() + ['weighted average'])

In [None]:
print("Resultados Médios TSF:")
for cls, cls_metrics in mean_metrics_tsf.items():
    print(f"Class {cls}:")
    for metric_name, value in cls_metrics.items():
        print(f"  {metric_name}: {value:.5f}")
    print()

print("Resultados Médios ROCKET:")
for cls, cls_metrics in mean_metrics_rocket.items():
    print(f"Class {cls}:")
    for metric_name, value in cls_metrics.items():
        print(f"  {metric_name}: {value:.5f}")
    print()

In [None]:
'''
print(f'Acurácia Média TSF: {mean_metrics_tsf["Accuracy"]:.2f}')
print(f'Precision Média TSF: {mean_metrics_tsf["Precision"]:.2f}')
print(f'Recall/Sensitivity Média TSF: {mean_metrics_tsf["Recall/Sensitivity"]:.2f}')
print(f'Specificity Média TSF: {mean_metrics_tsf["Specificity"]:.2f}')
print(f'MCC Médio TSF: {mean_metrics_tsf["MCC"]:.2f}')
print(f'F1-Score Médio TSF: {mean_metrics_tsf["F1-Score"]:.2f}')
print()
print(f'Acurácia Média ROCKET: {mean_metrics_rocket["Accuracy"]:.2f}')
print(f'Precision Média ROCKET: {mean_metrics_rocket["Precision"]:.2f}')
print(f'Recall/Sensitivity Média ROCKET: {mean_metrics_rocket["Recall/Sensitivity"]:.2f}')
print(f'Specificity Média ROCKET: {mean_metrics_rocket["Specificity"]:.2f}')
print(f'MCC Médio ROCKET: {mean_metrics_rocket["MCC"]:.2f}')
print(f'F1-Score Médio ROCKET: {mean_metrics_rocket["F1-Score"]:.2f}')
'''

Acurácia Média TSF: 0.80
Precision Média TSF: 0.80
Recall/Sensitivity Média TSF: 0.80
Specificity Média TSF: 0.67
MCC Médio TSF: 0.56
F1-Score Médio TSF: 0.80

Acurácia Média ROCKET: 0.78
Precision Média ROCKET: 0.78
Recall/Sensitivity Média ROCKET: 0.78
Specificity Média ROCKET: 0.62
MCC Médio ROCKET: 0.50
F1-Score Médio ROCKET: 0.78


In [None]:
depressed_df = df.loc[df['binary_class']=='depressed'].copy()
depressed_df.shape
print(f'Number of days: {depressed_df.shape[0]//1440}')

Number of days: 359


In [None]:
depressed_df.sort_values(by='class', inplace=True)

In [None]:
X = to_2D_array(depressed_df, 1440, 'activity')
X.shape

(359, 1440)

In [None]:
scaler = MinMaxScaler()

n_samples, n_timesteps = X.shape
X_reshaped = X.reshape(-1, 1)

X = scaler.fit_transform(X_reshaped)
X = X.reshape(n_samples, n_timesteps)

In [None]:
y = depressed_df.iloc[[ i*1440 for i in range(359) ], -2].values
y.shape

(359,)

In [None]:
count = 0
metrics_tsf = []
metrics_rocket = []
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
for train_index, test_index in cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    tsf = TimeSeriesForestClassifier(random_state=42, n_jobs=-1)
    rocket = RocketClassifier(random_state=42, n_jobs=-1)
    
    count += 1 
    print(f'Iteração {count}: ')
    print("Time Series Forest")
    y_pred = run_classifier(tsf, X_train, X_test, y_train)
    metrics_tsf.append(evaluate_classifier(y_test, y_pred))
    print("\nROCKET")
    y_pred = run_classifier(rocket, X_train, X_test, y_train)
    metrics_rocket.append(evaluate_classifier(y_test, y_pred))
    print('\n')

Iteração 1: 
Time Series Forest
Accuracy score: 0.6944444444444444
Precision: 0.6984953703703703
Recall/Sensitivity: 0.6944444444444444
Specificity: 1.0
MCC: 0.31453686265393677
F1-Score: 0.6354497354497356

ROCKET
Accuracy score: 0.75
Precision: 0.7685185185185186
Recall/Sensitivity: 0.75
Specificity: 1.0
MCC: 0.46165138416902646
F1-Score: 0.7202131376659678


Iteração 2: 
Time Series Forest
Accuracy score: 0.7222222222222222
Precision: 0.6722222222222222
Recall/Sensitivity: 0.7222222222222222
Specificity: 0.0
MCC: 0.3744154322244884
F1-Score: 0.6741891725243556

ROCKET
Accuracy score: 0.6944444444444444
Precision: 0.6319444444444444
Recall/Sensitivity: 0.6944444444444444
Specificity: 0.0
MCC: 0.3132584254597078
F1-Score: 0.6547987616099071


Iteração 3: 
Time Series Forest
Accuracy score: 0.6388888888888888
Precision: 0.5647401433691756
Recall/Sensitivity: 0.6388888888888888
Specificity: 0.5
MCC: 0.15271018972385117
F1-Score: 0.5746913580246914

ROCKET
Accuracy score: 0.7777777777777

In [None]:
classes = np.unique(y)
mean_metrics_tsf = calculate_mean_metrics(metrics_tsf, classes.tolist() + ['weighted average'])
mean_metrics_rocket = calculate_mean_metrics(metrics_rocket, classes.tolist() + ['weighted average'])

In [None]:
print("Resultados Médios TSF:")
for cls, cls_metrics in mean_metrics_tsf.items():
    print(f"Class {cls}:")
    for metric_name, value in cls_metrics.items():
        print(f"  {metric_name}: {value:.5f}")
    print()

print("Resultados Médios ROCKET:")
for cls, cls_metrics in mean_metrics_rocket.items():
    print(f"Class {cls}:")
    for metric_name, value in cls_metrics.items():
        print(f"  {metric_name}: {value:.5f}")
    print()

In [None]:
'''
print(f'Acurácia Média TSF: {mean_metrics_tsf["Accuracy"]:.2f}')
print(f'Precision Média TSF: {mean_metrics_tsf["Precision"]:.2f}')
print(f'Recall/Sensitivity Média TSF: {mean_metrics_tsf["Recall/Sensitivity"]:.2f}')
print(f'Specificity Média TSF: {mean_metrics_tsf["Specificity"]:.2f}')
print(f'MCC Médio TSF: {mean_metrics_tsf["MCC"]:.2f}')
print(f'F1-Score Médio TSF: {mean_metrics_tsf["F1-Score"]:.2f}')
print()
print(f'Acurácia Média ROCKET: {mean_metrics_rocket["Accuracy"]:.2f}')
print(f'Precision Média ROCKET: {mean_metrics_rocket["Precision"]:.2f}')
print(f'Recall/Sensitivity Média ROCKET: {mean_metrics_rocket["Recall/Sensitivity"]:.2f}')
print(f'Specificity Média ROCKET: {mean_metrics_rocket["Specificity"]:.2f}')
print(f'MCC Médio ROCKET: {mean_metrics_rocket["MCC"]:.2f}')
print(f'F1-Score Médio ROCKET: {mean_metrics_rocket["F1-Score"]:.2f}')
'''

Acurácia Média TSF: 0.69
Precision Média TSF: 0.67
Recall/Sensitivity Média TSF: 0.69
Specificity Média TSF: 0.65
MCC Médio TSF: 0.30
F1-Score Médio TSF: 0.65

Acurácia Média ROCKET: 0.72
Precision Média ROCKET: 0.69
Recall/Sensitivity Média ROCKET: 0.72
Specificity Média ROCKET: 0.30
MCC Médio ROCKET: 0.37
F1-Score Médio ROCKET: 0.69
