In [13]:
import time
import random
import numpy as np
import pandas as pd
from typing import TypeVar, Callable

from sklearn.preprocessing import MinMaxScaler
from sktime.classification import BaseClassifier
from sklearn.model_selection import StratifiedKFold
from sktime.classification.kernel_based import RocketClassifier
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, matthews_corrcoef, classification_report

In [2]:
raw_csv_data: pd.DataFrame = pd.read_csv('time_series.csv') 
df: pd.DataFrame = raw_csv_data.copy() 

In [3]:
df.head()

Unnamed: 0,id,timestamp,date,activity,owner,class,binary_class
0,0,2003-05-08 00:00:00,2003-05-08,0,condition_1,unipolar depressive,depressed
1,0,2003-05-08 00:01:00,2003-05-08,0,condition_1,unipolar depressive,depressed
2,0,2003-05-08 00:02:00,2003-05-08,0,condition_1,unipolar depressive,depressed
3,0,2003-05-08 00:03:00,2003-05-08,0,condition_1,unipolar depressive,depressed
4,0,2003-05-08 00:04:00,2003-05-08,0,condition_1,unipolar depressive,depressed


In [4]:
def to_2D_array(df: pd.DataFrame, n_days: int, variable: str) -> np.ndarray:
    n_users: int = len(df.id.unique())
    arr: np.ndarray = df[variable].values.reshape(n_users, n_days)
    return arr

In [5]:
X = to_2D_array(df, 1440, 'activity')
X.shape

(1029, 1440)

In [6]:
scaler = MinMaxScaler()

n_samples, n_timesteps = X.shape
X_reshaped = X.reshape(-1, 1)

X = scaler.fit_transform(X_reshaped)
X = X.reshape(n_samples, n_timesteps)

In [7]:
y = df.iloc[[ i*1440 for i in range(1029) ], -1].values
y.shape

(1029,)

In [8]:
def format_seconds(time_in_seconds: float) -> str:
    hours: int = int(time_in_seconds // 3600)
    minutes: int = int((time_in_seconds % 3600) // 60)
    seconds: int = int(time_in_seconds % 60)
    
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"

In [10]:
Classifier = TypeVar('Classifier', bound=BaseClassifier)
def run_classifier(clf: Classifier, X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray) -> dict:
    clf.fit(X_train, y_train) 
    y_pred: np.ndarray = clf.predict(X_test)
    
    # Cálculo das métricas
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    mcc = matthews_corrcoef(y_test, y_pred)
    
    # Cálculo da Especificidade
    cm = confusion_matrix(y_test, y_pred)
    tn = cm[0, 0]
    fp = cm[0, 1]
    fn = cm[1, 0]
    tp = cm[1, 1]
    specificity = tn / (tn + fp)

    metrics = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall/Sensitivity': recall,
        'Specificity': specificity,
        'MCC': mcc,
        'F1-Score': f1
    }
    
    print(f'Accuracy score: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall/Sensitivity: {recall}')
    print(f'Specificity: {specificity}')
    print(f'MCC: {mcc}')
    print(f'F1-Score: {f1}')
    print()
    
    return metrics

In [11]:
start_time: float = time.process_time()

count: int = 0
metrics_tsf = []
metrics_rocket = []
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
for train_index, test_index in cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    tsf = TimeSeriesForestClassifier(random_state=42, n_jobs=-1)
    rocket = RocketClassifier(random_state=42, n_jobs=-1)
    
    count += 1 
    print(f'Iteração {count}: ')
    print("Time Series Forest")
    metrics_tsf.append(run_classifier(tsf, X_train, X_test, y_train, y_test))
    print("ROCKET")
    metrics_rocket.append(run_classifier(rocket, X_train, X_test, y_train, y_test))
    print()

end_time: float = time.process_time()
cpu_execution_time: float = end_time - start_time
print(f'CPU Execution time: {format_seconds(cpu_execution_time)}')

Accuracy score: 0.7961165048543689
              precision    recall  f1-score   support

   depressed    0.72727   0.66667   0.69565        36
nondepressed    0.82857   0.86567   0.84672        67

    accuracy                        0.79612       103
   macro avg    0.77792   0.76617   0.77118       103
weighted avg    0.79317   0.79612   0.79392       103

Accuracy score: 0.8058252427184466
              precision    recall  f1-score   support

   depressed    0.73529   0.69444   0.71429        36
nondepressed    0.84058   0.86567   0.85294        67

    accuracy                        0.80583       103
   macro avg    0.78794   0.78006   0.78361       103
weighted avg    0.80378   0.80583   0.80448       103


Accuracy score: 0.883495145631068
              precision    recall  f1-score   support

   depressed    0.85294   0.80556   0.82857        36
nondepressed    0.89855   0.92537   0.91176        67

    accuracy                        0.88350       103
   macro avg    0.87575

In [12]:
print(f'Acurácia Média TSF: {np.mean(accuracies_tsf):.2f}')    
print(f'Acurácia Média ROCKET: {np.mean(accuracies_rocket):.2f}')  

Acurácia Média TSF: 0.80
Acurácia Média ROCKET: 0.78
