## Importando as dependências

In [1]:
# Python
import numpy as np
import pandas as pd

# Classificadores
from sktime.classification.kernel_based import RocketClassifier
from sktime.classification.interval_based import TimeSeriesForestClassifier

# K-Fold
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler

# Métricas de avaliação
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, multilabel_confusion_matrix

## Importando o dataset

In [2]:
raw_csv_data = pd.read_csv('time_series.csv') 
df = raw_csv_data.copy() 

In [3]:
df.head()

Unnamed: 0,id,timestamp,date,activity,owner,afftype,binary_class
0,0,2003-05-08 00:00:00,2003-05-08,0,condition_1,unipolar depressive,depressed
1,0,2003-05-08 00:01:00,2003-05-08,0,condition_1,unipolar depressive,depressed
2,0,2003-05-08 00:02:00,2003-05-08,0,condition_1,unipolar depressive,depressed
3,0,2003-05-08 00:03:00,2003-05-08,0,condition_1,unipolar depressive,depressed
4,0,2003-05-08 00:04:00,2003-05-08,0,condition_1,unipolar depressive,depressed


## Funções auxiliares

In [4]:
def to_2D_array(df, n_days, variable):
    n_users = len(df.id.unique())
    arr = df[variable].values.reshape(n_users, n_days)
    return arr

In [5]:
def scale(arr):
    scaler = MinMaxScaler()
    n_samples, n_timesteps = arr.shape
    arr_reshaped = arr.reshape(-1, 1)
    
    arr_reshaped = scaler.fit_transform(arr_reshaped)
    arr = arr_reshaped.reshape(n_samples, n_timesteps)
    return arr

In [6]:
def run_classifier(clf, X_train, X_test, y_train):
    clf.fit(X_train, y_train) 
    y_pred = clf.predict(X_test)
    return y_pred

In [7]:
def specificity_score(y_test, y_pred):
    global classes
    mcm = multilabel_confusion_matrix(y_test, y_pred, labels=classes)
    specificity_per_class = []
    weights_per_class = []
    for i in range(len(mcm)):
        tn, fp, fn, tp = mcm[i].ravel()
        specificity = tn / (tn + fp)
        specificity_per_class.append(specificity)
        weights_per_class.append(fn + tp)
    return specificity_per_class, weights_per_class

In [8]:
def evaluate_classifier(y_test, y_pred, classes):
    metrics = {}
    
    # Cálculo das métricas para cada classe individualmente
    for cls in classes:
        cls_metrics = {}
        y_test_bin = (y_test == cls).astype(int)
        y_pred_bin = (y_pred == cls).astype(int)

        cls_metrics['Precision'] = precision_score(y_test_bin, y_pred_bin, zero_division=0)
        cls_metrics['Recall/Sensitivity'] = recall_score(y_test_bin, y_pred_bin, zero_division=0)
        cls_metrics['Accuracy'] = accuracy_score(y_test_bin, y_pred_bin)
        cls_metrics['MCC'] = matthews_corrcoef(y_test_bin, y_pred_bin)
        cls_metrics['F1-Score'] = f1_score(y_test_bin, y_pred_bin, zero_division=0)
        
        metrics[cls] = cls_metrics
    
    # Cálculo das métricas com média ponderada (weighted average)
    weighted_metrics = {}
    weighted_metrics['Precision'] = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    weighted_metrics['Recall/Sensitivity'] = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    weighted_metrics['F1-Score'] = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    weighted_metrics['Accuracy'] = accuracy_score(y_test, y_pred)
    weighted_metrics['MCC'] = matthews_corrcoef(y_test, y_pred)
    
    # Cálculo da Especificidade
    specificity_per_class, weights_per_class = specificity_score(y_test, y_pred)
    for i, cls in enumerate(classes):
        metrics[cls]['Specificity'] = specificity_per_class[i]
    weighted_metrics['Specificity'] = np.average(specificity_per_class, weights=weights_per_class)
    
    metrics['weighted average'] = weighted_metrics
    
    # Exibindo as métricas no console
    for cls, cls_metrics in metrics.items():
        print(f"Class {cls}:")
        for metric_name, value in cls_metrics.items():
            print(f"  {metric_name}: {value:.5f}")
        print()

    return metrics

In [9]:
def calculate_mean_metrics(metrics_list, classes):
    mean_metrics = {cls: {key: np.mean([fold_metrics[cls][key] for fold_metrics in metrics_list]) for key in metrics_list[0][cls]} for cls in classes}
    return mean_metrics

In [10]:
def organize_metrics(metrics, classes):
    rows = []
    for classifier, metric_list in metrics.items():
        mean_metrics = calculate_mean_metrics(metric_list, classes)
        for subclass in metric_list[0].keys():
            rows.append((classifier, subclass, mean_metrics[subclass]))
    metrics_df = pd.DataFrame([row[2] for row in rows], index=pd.MultiIndex.from_tuples([(row[0], row[1]) for row in rows], names=['Classifier', 'Subclass']))
    return metrics_df

In [11]:
from IPython.display import display, HTML

def display_side_by_side(dfs:list, captions:list):
    """Display tables side by side to save vertical space
    Input:
        dfs: list of pandas.DataFrame
        captions: list of table captions
    """
    output = ""
    combined = dict(zip(captions, dfs))
    for caption, df in combined.items():
        output += df.style.set_table_attributes("style='display:inline'").set_caption(caption)._repr_html_()
        output += "\xa0\xa0\xa0"
    display(HTML(output))

# Classificação inicical ('depressed' vs 'nondepressed')

## Seperando o dataset entre X e y (features e classe)

In [12]:
X = to_2D_array(df, 1440, 'activity')
X = scale(X)
y = df.iloc[[ i*1440 for i in range(1029) ], -1].values
print('Formato X:', X.shape)
print('Formato y:', y.shape)

Formato X: (1029, 1440)
Formato y: (1029,)


## Executando os algoritmos

In [13]:
count = 0
metrics = { 'Time Series Forest': [], 'ROCKET': [] }
errors = { 'Time Series Forest': [], 'ROCKET': [] }
classes = ['depressed', 'nondepressed']
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
for train_index, test_index in cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    tsf = TimeSeriesForestClassifier(random_state=42, n_jobs=-1, min_interval=45, n_estimators=500)
    rocket = RocketClassifier(random_state=42, n_jobs=-1)
    
    count += 1 
    print(f'Iteração {count}: ')
    print("Time Series Forest")
    y_pred = run_classifier(tsf, X_train, X_test, y_train)
    errors['Time Series Forest'].extend(test_index[y_pred != y_test])
    metrics['Time Series Forest'].append(evaluate_classifier(y_test, y_pred, classes))

    print("\nROCKET")
    y_pred = run_classifier(rocket, X_train, X_test, y_train)
    errors['ROCKET'].extend(test_index[y_pred != y_test])
    metrics['ROCKET'].append(evaluate_classifier(y_test, y_pred, classes))
    print('\n')

Iteração 1: 
Time Series Forest
Class depressed:
  Precision: 0.71429
  Recall/Sensitivity: 0.69444
  Accuracy: 0.79612
  MCC: 0.54884
  F1-Score: 0.70423
  Specificity: 0.85075

Class nondepressed:
  Precision: 0.83824
  Recall/Sensitivity: 0.85075
  Accuracy: 0.79612
  MCC: 0.54884
  F1-Score: 0.84444
  Specificity: 0.69444

Class weighted average:
  Precision: 0.79491
  Recall/Sensitivity: 0.79612
  F1-Score: 0.79544
  Accuracy: 0.79612
  MCC: 0.54884
  Specificity: 0.74907


ROCKET
Class depressed:
  Precision: 0.73529
  Recall/Sensitivity: 0.69444
  Accuracy: 0.80583
  MCC: 0.56794
  F1-Score: 0.71429
  Specificity: 0.86567

Class nondepressed:
  Precision: 0.84058
  Recall/Sensitivity: 0.86567
  Accuracy: 0.80583
  MCC: 0.56794
  F1-Score: 0.85294
  Specificity: 0.69444

Class weighted average:
  Precision: 0.80378
  Recall/Sensitivity: 0.80583
  F1-Score: 0.80448
  Accuracy: 0.80583
  MCC: 0.56794
  Specificity: 0.75429



Iteração 2: 
Time Series Forest
Class depressed:
  Preci

## Mostrando os resultados

In [14]:
metrics_df = organize_metrics(metrics, list(classes + ['weighted average']))

In [15]:
display(metrics_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Precision,Recall/Sensitivity,Accuracy,MCC,F1-Score,Specificity
Classifier,Subclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Time Series Forest,depressed,0.756571,0.690873,0.813383,0.583761,0.721203,0.879104
Time Series Forest,nondepressed,0.841528,0.879104,0.813383,0.583761,0.859617,0.690873
Time Series Forest,weighted average,0.811943,0.813383,0.813383,0.583761,0.811328,0.756595
ROCKET,depressed,0.714866,0.615556,0.780354,0.503494,0.660512,0.868657
ROCKET,nondepressed,0.808948,0.868657,0.780354,0.503494,0.837518,0.615556
ROCKET,weighted average,0.776145,0.780354,0.780354,0.503494,0.775773,0.703858


In [16]:
print(f'Number of errors from TSF: {len(errors['Time Series Forest'])}')
print(f'Number of errors from ROCKET: {len(errors['ROCKET'])}')

Number of errors from TSF: 192
Number of errors from ROCKET: 226


In [17]:
just_tsf = [ x for x in errors['Time Series Forest'] if x not in errors['ROCKET'] ]
just_rocket = [ x for x in errors['ROCKET'] if x not in errors['Time Series Forest'] ]
both = [ x for x in errors['Time Series Forest'] if x in errors['ROCKET'] ]
print(f'Number of mutual errors: {len(both)}')
print(f'Number of errors just from TSF: {len(just_tsf)}')
print(f'Number of errors just from ROCKET: {len(just_rocket)}')

Number of mutual errors: 126
Number of errors just from TSF: 66
Number of errors just from ROCKET: 100


In [18]:
both_df = df.iloc[[ i*1440 for i in both ]]
just_tsf_df = df.iloc[[ i*1440 for i in just_tsf ]]
just_rocket_df = df.iloc[[ i*1440 for i in just_rocket ]]

In [19]:
df_list = [df.groupby('binary_class').count().loc[:, ['id']].floordiv(1440), both_df.groupby('binary_class').count().loc[:, ['id']], just_tsf_df.groupby('binary_class').count().loc[:, ['id']], just_rocket_df.groupby('binary_class').count().loc[:, ['id']]]
display_side_by_side(df_list, ['Original', 'Both', 'TSF', 'ROCKET'])

Unnamed: 0_level_0,id
binary_class,Unnamed: 1_level_1
depressed,359
nondepressed,670

Unnamed: 0_level_0,id
binary_class,Unnamed: 1_level_1
depressed,83
nondepressed,43

Unnamed: 0_level_0,id
binary_class,Unnamed: 1_level_1
depressed,28
nondepressed,38

Unnamed: 0_level_0,id
binary_class,Unnamed: 1_level_1
depressed,55
nondepressed,45


In [20]:
df_list = [df.groupby('afftype').count().loc[:, ['id']].floordiv(1440), both_df.groupby('afftype').count().loc[:, ['id']], just_tsf_df.groupby('afftype').count().loc[:, ['id']], just_rocket_df.groupby('afftype').count().loc[:, ['id']]]
display_side_by_side(df_list, ['Original', 'Both', 'TSF', 'ROCKET'])

Unnamed: 0_level_0,id
afftype,Unnamed: 1_level_1
bipolar I,14
bipolar II,112
control,670
unipolar depressive,233

Unnamed: 0_level_0,id
afftype,Unnamed: 1_level_1
bipolar II,19
control,43
unipolar depressive,64

Unnamed: 0_level_0,id
afftype,Unnamed: 1_level_1
bipolar II,11
control,38
unipolar depressive,17

Unnamed: 0_level_0,id
afftype,Unnamed: 1_level_1
bipolar II,13
control,45
unipolar depressive,42


# Classificação hierárquica ('bipolar' vs 'unipolar depressive')

## Filtrando o dataset

In [31]:
depressed_df = df.loc[df['binary_class']=='depressed'].copy()
print(f'Number of days: {depressed_df.shape[0]//1440}')

Number of days: 359


In [32]:
depressed_df.sort_values(by='afftype', inplace=True)

## Seperando o dataset entre X e y (features e classe)

In [33]:
h_X = to_2D_array(depressed_df, 1440, 'activity')
h_y = depressed_df.iloc[[ i*1440 for i in range(h_X.shape[0]) ], -2].values
print('Formato X:', h_X.shape)
print('Formato y:', h_y.shape)

Formato X: (359, 1440)
Formato y: (359,)


## Executando os algoritmos

In [34]:
count = 0
h_metrics = { 'Time Series Forest': [], 'ROCKET': [] }
h_errors = { 'Time Series Forest': [], 'ROCKET': [] }
classes = ['bipolar I', 'bipolar II', 'unipolar depressive']
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
for train_index, test_index in cv.split(h_X, h_y):
    X_train, X_test = h_X[train_index], h_X[test_index]
    y_train, y_test = h_y[train_index], h_y[test_index]
    
    tsf = TimeSeriesForestClassifier(random_state=42, n_jobs=-1, min_interval=45, n_estimators=500)
    rocket = RocketClassifier(random_state=42, n_jobs=-1)
    
    count += 1 
    print(f'Iteração {count}: ')
    print("Time Series Forest")
    y_pred = run_classifier(tsf, X_train, X_test, y_train)
    h_errors['Time Series Forest'].extend(test_index[y_pred != y_test])
    h_metrics['Time Series Forest'].append(evaluate_classifier(y_test, y_pred, classes))

    print("\nROCKET")
    y_pred = run_classifier(rocket, X_train, X_test, y_train)
    h_errors['ROCKET'].extend(test_index[y_pred != y_test])
    h_metrics['ROCKET'].append(evaluate_classifier(y_test, y_pred, classes))
    print('\n')

Iteração 1: 
Time Series Forest


Class bipolar I:
  Precision: 1.00000
  Recall/Sensitivity: 0.50000
  Accuracy: 0.97222
  MCC: 0.69693
  F1-Score: 0.66667
  Specificity: 1.00000

Class bipolar II:
  Precision: 0.50000
  Recall/Sensitivity: 0.18182
  Accuracy: 0.69444
  MCC: 0.14924
  F1-Score: 0.26667
  Specificity: 0.92000

Class unipolar depressive:
  Precision: 0.67742
  Recall/Sensitivity: 0.91304
  Accuracy: 0.66667
  MCC: 0.19974
  F1-Score: 0.77778
  Specificity: 0.23077

Class weighted average:
  Precision: 0.64113
  Recall/Sensitivity: 0.66667
  F1-Score: 0.61543
  Accuracy: 0.66667
  MCC: 0.23239
  Specificity: 0.48410


ROCKET
Class bipolar I:
  Precision: 1.00000
  Recall/Sensitivity: 0.50000
  Accuracy: 0.97222
  MCC: 0.69693
  F1-Score: 0.66667
  Specificity: 1.00000

Class bipolar II:
  Precision: 0.80000
  Recall/Sensitivity: 0.36364
  Accuracy: 0.77778
  MCC: 0.43108
  F1-Score: 0.50000
  Specificity: 0.96000

Class unipolar depressive:
  Precision: 0.73333
  Recall/Sensitivity: 0.95652
  Accuracy: 0

## Mostrando os resultados

In [35]:
h_metrics_df = organize_metrics(h_metrics, list(classes + ['weighted average']))

In [36]:
display(h_metrics_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Precision,Recall/Sensitivity,Accuracy,MCC,F1-Score,Specificity
Classifier,Subclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Time Series Forest,bipolar I,0.516667,0.55,0.975,0.519826,0.513333,0.994202
Time Series Forest,bipolar II,0.551667,0.214394,0.693492,0.182465,0.302161,0.911
Time Series Forest,unipolar depressive,0.70737,0.922464,0.701825,0.288552,0.80044,0.29359
Time Series Forest,weighted average,0.650728,0.685159,0.685159,0.267916,0.632357,0.513633
ROCKET,bipolar I,0.3,0.25,0.972143,0.269693,0.266667,1.0
ROCKET,bipolar II,0.613276,0.374242,0.726905,0.309896,0.458299,0.8865
ROCKET,unipolar depressive,0.751642,0.918841,0.749206,0.422319,0.826001,0.434615
ROCKET,weighted average,0.693202,0.724127,0.724127,0.370611,0.691086,0.596988


In [37]:
print(f'Number of errors from TSF: {len(h_errors['Time Series Forest'])}')
print(f'Number of errors from ROCKET: {len(h_errors['ROCKET'])}')

Number of errors from TSF: 113
Number of errors from ROCKET: 99


In [38]:
h_just_tsf = [ x for x in h_errors['Time Series Forest'] if x not in h_errors['ROCKET'] ]
h_just_rocket = [ x for x in h_errors['ROCKET'] if x not in h_errors['Time Series Forest'] ]
h_both = [ x for x in h_errors['Time Series Forest'] if x in h_errors['ROCKET'] ]
print(f'Number of mutual errors: {len(h_both)}')
print(f'Number of errors just from TSF: {len(h_just_tsf)}')
print(f'Number of errors just from ROCKET: {len(h_just_rocket)}')

Number of mutual errors: 69
Number of errors just from TSF: 44
Number of errors just from ROCKET: 30


In [39]:
h_both_df = df.iloc[[ i*1440 for i in h_both ]]
h_just_tsf_df = df.iloc[[ i*1440 for i in h_just_tsf ]]
h_just_rocket_df = df.iloc[[ i*1440 for i in h_just_rocket ]]

In [40]:
h_df_list = [depressed_df.groupby('afftype').count().loc[:, ['id']].floordiv(1440), h_both_df.groupby('afftype').count().loc[:, ['id']], h_just_tsf_df.groupby('afftype').count().loc[:, ['id']], h_just_rocket_df.groupby('afftype').count().loc[:, ['id']]]
display_side_by_side(h_df_list, ['Original', 'Both', 'TSF', 'ROCKET'])

Unnamed: 0_level_0,id
afftype,Unnamed: 1_level_1
bipolar I,14
bipolar II,112
unipolar depressive,233

Unnamed: 0_level_0,id
afftype,Unnamed: 1_level_1
bipolar I,1
bipolar II,29
unipolar depressive,39

Unnamed: 0_level_0,id
afftype,Unnamed: 1_level_1
bipolar II,15
unipolar depressive,29

Unnamed: 0_level_0,id
afftype,Unnamed: 1_level_1
bipolar I,2
bipolar II,6
unipolar depressive,22
