## Importando as dependências

In [1]:
# Python
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Classificadores
from sktime.classification.kernel_based import RocketClassifier
from sktime.classification.interval_based import TimeSeriesForestClassifier

# Grid Search
from sktime.classification.model_selection import TSCGridSearchCV

# Auxiliares
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, matthews_corrcoef, classification_report, multilabel_confusion_matrix

## Importando o dataset

In [3]:
raw_csv_data = pd.read_csv('time_series.csv') 
df: pd.DataFrame = raw_csv_data.copy() 

In [4]:
df.head()

Unnamed: 0,id,timestamp,date,activity,owner,gender,melanch,inpatient,marriage,work,afftype,binary_class
0,0,2003-05-08 00:00:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed
1,0,2003-05-08 00:01:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed
2,0,2003-05-08 00:02:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed
3,0,2003-05-08 00:03:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed
4,0,2003-05-08 00:04:00,2003-05-08,0,condition_1,male,no melancholia,outpatient,married or cohabiting,unemployed/sick leave/pension,unipolar depressive,depressed


## Funções auxiliares

In [5]:
def to_2D_array(df, n_days, variable):
    n_users = len(df.id.unique())
    arr = df[variable].values.reshape(n_users, n_days)
    return arr

In [6]:
def find_best_hyperparameters(clf, param_grid, X, y):
    grid = TSCGridSearchCV(estimator=clf, param_grid=param_grid, scoring='f1_weighted', cv=10, verbose=5)
    grid.fit(X, y)
    results_df = pd.DataFrame(grid.cv_results_)
    return results_df

## Seperando o dataset entre X e y (features e classe)

In [7]:
X = to_2D_array(df, 1440, 'activity')
y = df.iloc[[ i*1440 for i in range(1029) ], -1].values
print('Formato X:', X.shape)
print('Formato y:', y.shape)

Formato X: (1029, 1440)
Formato y: (1029,)


# TSF

In [8]:
tsf = TimeSeriesForestClassifier()

In [9]:
parameters = {  
    'random_state': [42],
    'min_interval': [3, 5, 10, 15, 20, 30, 45, 60], 
}

In [10]:
tsf_grid_min_interval = find_best_hyperparameters(tsf, parameters, X, y)

Fitting 10 folds for each of 8 candidates, totalling 80 fits
[CV 1/10] END ..min_interval=3, random_state=42;, score=0.650 total time=  41.7s
[CV 2/10] END ..min_interval=3, random_state=42;, score=0.712 total time=  46.2s
[CV 3/10] END ..min_interval=3, random_state=42;, score=0.691 total time=  44.3s
[CV 4/10] END ..min_interval=3, random_state=42;, score=0.704 total time=  44.9s
[CV 5/10] END ..min_interval=3, random_state=42;, score=0.630 total time=  44.1s
[CV 6/10] END ..min_interval=3, random_state=42;, score=0.814 total time=  50.8s
[CV 7/10] END ..min_interval=3, random_state=42;, score=0.642 total time=  45.9s
[CV 8/10] END ..min_interval=3, random_state=42;, score=0.905 total time=  46.8s
[CV 9/10] END ..min_interval=3, random_state=42;, score=0.856 total time=  47.5s
[CV 10/10] END .min_interval=3, random_state=42;, score=0.662 total time=  46.4s
[CV 1/10] END ..min_interval=5, random_state=42;, score=0.630 total time=  45.9s
[CV 2/10] END ..min_interval=5, random_state=42;

  _data = np.array(data, dtype=dtype, copy=copy,


In [11]:
parameters = {  
    'random_state': [42],
    'inner_series_length': [60, 360, 720, 180, 1440]
}

In [12]:
tsf_grid_inner_series_length = find_best_hyperparameters(tsf, parameters, X, y)

Fitting 10 folds for each of 5 candidates, totalling 50 fits
[CV 1/10] END inner_series_length=60, random_state=42;, score=0.720 total time=  19.9s
[CV 2/10] END inner_series_length=60, random_state=42;, score=0.680 total time=  19.9s
[CV 3/10] END inner_series_length=60, random_state=42;, score=0.737 total time=  19.9s
[CV 4/10] END inner_series_length=60, random_state=42;, score=0.655 total time=  19.4s
[CV 5/10] END inner_series_length=60, random_state=42;, score=0.640 total time=  18.4s
[CV 6/10] END inner_series_length=60, random_state=42;, score=0.803 total time=  19.0s
[CV 7/10] END inner_series_length=60, random_state=42;, score=0.576 total time=  20.3s
[CV 8/10] END inner_series_length=60, random_state=42;, score=0.923 total time=  19.5s
[CV 9/10] END inner_series_length=60, random_state=42;, score=0.902 total time=  20.2s
[CV 10/10] END inner_series_length=60, random_state=42;, score=0.649 total time=  21.1s
[CV 1/10] END inner_series_length=360, random_state=42;, score=0.704

In [13]:
parameters = {  
    'random_state': [42],
    'n_estimators': [100, 200, 500, 750, 1000],
}

In [14]:
tsf_grid_n_estimators = find_best_hyperparameters(tsf, parameters, X, y)

Fitting 10 folds for each of 5 candidates, totalling 50 fits
[CV 1/10] END n_estimators=100, random_state=42;, score=0.638 total time=  21.7s
[CV 2/10] END n_estimators=100, random_state=42;, score=0.700 total time=  21.2s
[CV 3/10] END n_estimators=100, random_state=42;, score=0.699 total time=  21.9s
[CV 4/10] END n_estimators=100, random_state=42;, score=0.696 total time=  20.8s
[CV 5/10] END n_estimators=100, random_state=42;, score=0.611 total time=  20.8s
[CV 6/10] END n_estimators=100, random_state=42;, score=0.801 total time=  21.1s
[CV 7/10] END n_estimators=100, random_state=42;, score=0.628 total time=  20.6s
[CV 8/10] END n_estimators=100, random_state=42;, score=0.895 total time=  20.8s
[CV 9/10] END n_estimators=100, random_state=42;, score=0.865 total time=  21.2s
[CV 10/10] END n_estimators=100, random_state=42;, score=0.662 total time=  21.1s
[CV 1/10] END n_estimators=200, random_state=42;, score=0.650 total time=  41.2s
[CV 2/10] END n_estimators=200, random_state=42

In [18]:
display(tsf_grid_min_interval.sort_values(by='rank_test_score').head())
display(tsf_grid_inner_series_length.sort_values(by='rank_test_score').head())
display(tsf_grid_n_estimators.sort_values(by='rank_test_score').head())

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_min_interval,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
6,44.163174,0.529419,2.164575,0.01812,45,42,"{'min_interval': 45, 'random_state': 42}",0.642445,0.729047,0.761972,0.684685,0.601637,0.802928,0.642077,0.91433,0.855614,0.708374,0.734311,0.09503,1
0,43.75806,2.253319,2.204832,0.112461,3,42,"{'min_interval': 3, 'random_state': 42}",0.650459,0.712143,0.690783,0.704393,0.630137,0.813544,0.642077,0.904685,0.855614,0.661897,0.726573,0.091835,2
5,44.557225,1.324236,2.13991,0.015831,30,42,"{'min_interval': 30, 'random_state': 42}",0.645676,0.716567,0.712108,0.681856,0.630013,0.783221,0.642077,0.895292,0.865563,0.691585,0.726396,0.088053,3
7,45.413552,1.135674,2.201952,0.028246,60,42,"{'min_interval': 60, 'random_state': 42}",0.638486,0.716567,0.741274,0.684685,0.582667,0.772383,0.648132,0.904371,0.874865,0.671024,0.723446,0.097663,4
2,43.485074,0.528298,2.142159,0.032162,10,42,"{'min_interval': 10, 'random_state': 42}",0.634435,0.687361,0.716567,0.717507,0.611169,0.811969,0.648132,0.866885,0.855614,0.683029,0.723267,0.086659,5


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_inner_series_length,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
1,23.58166,0.378417,1.105592,0.063473,360,42,"{'inner_series_length': 360, 'random_state': 42}",0.70374,0.707154,0.78496,0.646616,0.630137,0.770218,0.620385,0.903974,0.836363,0.683029,0.728657,0.088634,1
0,19.038085,0.64724,0.8226,0.064889,60,42,"{'inner_series_length': 60, 'random_state': 42}",0.720448,0.679553,0.737311,0.654744,0.639576,0.803368,0.575917,0.923497,0.90224,0.648638,0.728529,0.109252,2
4,39.641299,1.911688,2.083875,0.215875,1440,42,"{'inner_series_length': 1440, 'random_state': 42}",0.650459,0.712143,0.690783,0.704393,0.630137,0.813544,0.642077,0.904685,0.855614,0.661897,0.726573,0.091835,3
2,28.754553,0.782024,1.457465,0.157094,720,42,"{'inner_series_length': 720, 'random_state': 42}",0.681856,0.687361,0.785467,0.678654,0.620355,0.750237,0.620385,0.904918,0.798785,0.679275,0.720729,0.084461,4
3,20.665663,0.375385,0.912276,0.021147,180,42,"{'inner_series_length': 180, 'random_state': 42}",0.712143,0.679553,0.779552,0.646616,0.589809,0.763514,0.577,0.914084,0.816104,0.691585,0.716996,0.098764,5


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
3,151.463915,2.932755,7.746858,0.242159,750,42,"{'n_estimators': 750, 'random_state': 42}",0.658481,0.695572,0.749889,0.693212,0.630013,0.790199,0.649748,0.895292,0.865563,0.677996,0.730597,0.08747,1
2,105.271997,4.540337,5.325963,0.33113,500,42,"{'n_estimators': 500, 'random_state': 42}",0.654744,0.708204,0.712108,0.715409,0.630013,0.80116,0.649748,0.894992,0.856275,0.674859,0.729751,0.086244,2
4,200.591126,5.425059,10.177572,0.111062,1000,42,"{'n_estimators': 1000, 'random_state': 42}",0.658481,0.695572,0.712108,0.715409,0.620626,0.790199,0.649748,0.895292,0.865563,0.669933,0.727293,0.088371,3
1,40.59381,1.775137,2.035832,0.05193,200,42,"{'n_estimators': 200, 'random_state': 42}",0.650459,0.712143,0.690783,0.704393,0.630137,0.813544,0.642077,0.904685,0.855614,0.661897,0.726573,0.091835,4
0,20.205948,0.386683,1.024237,0.026761,100,42,"{'n_estimators': 100, 'random_state': 42}",0.637783,0.699878,0.698944,0.695782,0.611169,0.80116,0.627858,0.895292,0.864885,0.661897,0.719465,0.094978,5


In [19]:
parameters = {  
    'random_state': [42],
    'min_interval': [3, 45],
    'inner_series_length': [60, 360], 
    'n_estimators': [500, 750],
}

In [20]:
tsf_grid = find_best_hyperparameters(tsf, parameters, X, y)

Fitting 10 folds for each of 8 candidates, totalling 80 fits
[CV 1/10] END inner_series_length=60, min_interval=3, n_estimators=500, random_state=42;, score=0.720 total time=  47.7s
[CV 2/10] END inner_series_length=60, min_interval=3, n_estimators=500, random_state=42;, score=0.673 total time=  47.9s
[CV 3/10] END inner_series_length=60, min_interval=3, n_estimators=500, random_state=42;, score=0.746 total time=  47.3s
[CV 4/10] END inner_series_length=60, min_interval=3, n_estimators=500, random_state=42;, score=0.647 total time=  46.6s
[CV 5/10] END inner_series_length=60, min_interval=3, n_estimators=500, random_state=42;, score=0.649 total time=  46.2s
[CV 6/10] END inner_series_length=60, min_interval=3, n_estimators=500, random_state=42;, score=0.785 total time=  47.8s
[CV 7/10] END inner_series_length=60, min_interval=3, n_estimators=500, random_state=42;, score=0.569 total time=  48.6s
[CV 8/10] END inner_series_length=60, min_interval=3, n_estimators=500, random_state=42;, sc

In [21]:
display(tsf_grid.sort_values(by='rank_test_score').head())

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_inner_series_length,param_min_interval,param_n_estimators,param_random_state,params,split0_test_score,...,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
2,56.23228,1.778592,2.211766,0.069836,60,45,500,42,"{'inner_series_length': 60, 'min_interval': 45...",0.744604,...,0.678654,0.592202,0.747583,0.620558,0.867102,0.874865,0.703659,0.732173,0.086991,1
6,75.742151,1.300069,3.355748,0.128557,360,45,500,42,"{'inner_series_length': 360, 'min_interval': 4...",0.715671,...,0.670313,0.620607,0.763514,0.605967,0.894992,0.835461,0.716813,0.730183,0.086667,2
5,132.334918,83.737551,4.76006,0.352423,360,3,750,42,"{'inner_series_length': 360, 'min_interval': 3...",0.720576,...,0.65857,0.639576,0.761392,0.584076,0.885246,0.82628,0.70416,0.727641,0.084761,3
0,46.040288,0.876946,1.907623,0.0512,60,3,500,42,"{'inner_series_length': 60, 'min_interval': 3,...",0.720448,...,0.646616,0.649104,0.785467,0.569054,0.904685,0.901464,0.674859,0.727054,0.104374,4
4,58.418357,1.072893,2.670229,0.058716,360,3,500,42,"{'inner_series_length': 360, 'min_interval': 3...",0.724172,...,0.646616,0.601637,0.761392,0.591153,0.894602,0.836363,0.70416,0.726502,0.09209,5


In [22]:
parameters = {  
    'random_state': [42],
    'min_interval': [45],
    'inner_series_length': [1440], 
    'n_estimators': [500],
}

In [23]:
tsf_grid_final_test = find_best_hyperparameters(tsf, parameters, X, y)

Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV 1/10] END inner_series_length=1440, min_interval=45, n_estimators=500, random_state=42;, score=0.630 total time= 2.5min
[CV 2/10] END inner_series_length=1440, min_interval=45, n_estimators=500, random_state=42;, score=0.704 total time= 2.3min
[CV 3/10] END inner_series_length=1440, min_interval=45, n_estimators=500, random_state=42;, score=0.725 total time= 2.3min
[CV 4/10] END inner_series_length=1440, min_interval=45, n_estimators=500, random_state=42;, score=0.707 total time= 2.2min
[CV 5/10] END inner_series_length=1440, min_interval=45, n_estimators=500, random_state=42;, score=0.640 total time= 2.1min
[CV 6/10] END inner_series_length=1440, min_interval=45, n_estimators=500, random_state=42;, score=0.781 total time= 2.1min
[CV 7/10] END inner_series_length=1440, min_interval=45, n_estimators=500, random_state=42;, score=0.634 total time= 2.1min
[CV 8/10] END inner_series_length=1440, min_interval=45, n_estimators=5

In [24]:
display(tsf_grid_final_test.sort_values(by='rank_test_score').head())

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_inner_series_length,param_min_interval,param_n_estimators,param_random_state,params,split0_test_score,...,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
0,148.048741,34.999552,8.566427,2.784058,1440,45,500,42,"{'inner_series_length': 1440, 'min_interval': ...",0.629898,...,0.706719,0.639595,0.781276,0.634432,0.904685,0.874865,0.70416,0.730442,0.09131,1


In [25]:
tsf_grid.to_csv('grid_search_tsf.csv', index=False)

# ROCKET

In [26]:
rocket = RocketClassifier()
parameters = {  'random_state': [42],
                'num_kernels': [ 1000, 5000, 10000, 20000, 30000, 40000, 50000 ] }

In [27]:
rocket_grid = find_best_hyperparameters(rocket, parameters, X, y)

Fitting 10 folds for each of 7 candidates, totalling 70 fits
[CV 1/10] END num_kernels=1000, random_state=42;, score=0.658 total time=  42.6s
[CV 2/10] END num_kernels=1000, random_state=42;, score=0.591 total time=  42.8s
[CV 3/10] END num_kernels=1000, random_state=42;, score=0.765 total time=  38.0s
[CV 4/10] END num_kernels=1000, random_state=42;, score=0.726 total time=  36.6s
[CV 5/10] END num_kernels=1000, random_state=42;, score=0.639 total time=  36.8s
[CV 6/10] END num_kernels=1000, random_state=42;, score=0.777 total time=  37.5s
[CV 7/10] END num_kernels=1000, random_state=42;, score=0.591 total time=  37.0s
[CV 8/10] END num_kernels=1000, random_state=42;, score=0.807 total time=  37.9s
[CV 9/10] END num_kernels=1000, random_state=42;, score=0.776 total time=  36.0s
[CV 10/10] END num_kernels=1000, random_state=42;, score=0.635 total time=  22.6s
[CV 1/10] END num_kernels=5000, random_state=42;, score=0.719 total time= 1.3min
[CV 2/10] END num_kernels=5000, random_state=42

In [28]:
display(rocket_grid.sort_values(by='rank_test_score').head())

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_num_kernels,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
1,2812.703027,8222.386977,4.820005,0.548711,5000,42,"{'num_kernels': 5000, 'random_state': 42}",0.718823,0.583149,0.799159,0.699959,0.657704,0.770218,0.642878,0.864078,0.778608,0.632906,0.714748,0.083217,1
2,166.252612,67.861982,10.130954,0.088238,10000,42,"{'num_kernels': 10000, 'random_state': 42}",0.70374,0.629898,0.828356,0.745823,0.666839,0.758991,0.575917,0.835461,0.714753,0.669933,0.712971,0.078274,2
3,305.09317,4.715266,24.69473,0.600827,20000,42,"{'num_kernels': 20000, 'random_state': 42}",0.710275,0.612928,0.819243,0.680602,0.638688,0.75259,0.584076,0.827153,0.723161,0.671024,0.701974,0.077299,3
6,1995.961953,2811.597823,113.303735,32.926572,50000,42,"{'num_kernels': 50000, 'random_state': 42}",0.675053,0.603991,0.830161,0.695782,0.639276,0.735036,0.548416,0.835461,0.760458,0.650463,0.69741,0.088929,4
5,712.404922,142.151333,66.634522,5.571079,40000,42,"{'num_kernels': 40000, 'random_state': 42}",0.678654,0.590597,0.830161,0.673419,0.64895,0.763514,0.555308,0.837112,0.73224,0.658583,0.696854,0.088941,5


In [29]:
rocket_grid.to_csv('grid_search_rocket.csv', index=False)