### Importing required Libraries

In [195]:
!pip install sktime



In [196]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline
from sktime.transformations.panel.rocket import Rocket

### Data Cleaning 
After setting the timestamps to index, we combined the 55 datasets to facilitate the process of feeding it into the model.

In [197]:
condition = {a: pd.read_csv("condition_{x}.csv".format(x = str(a))) for a in range(1, 24)}
control = {b+23: pd.read_csv("control_{x}.csv".format(x = str(b))) for b in range(1, 33)}

In [198]:
for i in range(1, 24):
    condition[i].drop(columns = 'date', inplace = True)
    condition[i]["timestamp"] = pd.to_datetime(condition[i]["timestamp"])
    condition[i] = condition[i].set_index('timestamp')
    
for j in range(24, 56):
    control[j].drop(columns = 'date', inplace = True)
    control[j]["timestamp"] = pd.to_datetime(control[j]["timestamp"])
    control[j] = control[j].set_index('timestamp')    

In [199]:
for i in range(1, 24):
    index = 0
    for j in condition[i]['activity']:
        if(j==0):
            index += 1
        else:
            break       
    print(index)
    condition[i] = condition[i][index:]
#Removing the zeros at the start    

1
0
0
0
25
101
0
0
0
0
1
3
5
24
27
1
22
38
0
0
1
0
0


In [202]:
for i in range(24, 56):
    index = 0
    for j in control[i]['activity']:
        if(j==0):
            index += 1
        else:
            break       
    print(index)
    control[i] = control[i][index:]
#Removing the zeros at the start    

0
0
22
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
40
40
0
67
16
0
0
0
0
0
0
0
8


In [203]:
condition.update(control)

Slicing the arrays so that they can be combined smoothly.

In [204]:
array=[np.array(condition[x]['activity'][:19299]) for x in range(1,56)]
lengths=[len(x) for x in array]
max(lengths)

19299

In [205]:
z=pd.DataFrame()
z['x']=array
print(z.shape)
z.head()

(55, 1)


Unnamed: 0,x
0,"[143, 0, 20, 166, 160, 17, 646, 978, 306, 277,..."
1,"[1468, 1006, 468, 306, 143, 205, 410, 349, 566..."
2,"[7, 8, 7, 8, 7, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, ..."
3,"[510, 637, 598, 251, 93, 180, 199, 526, 251, 5..."
4,"[2, 2, 127, 161, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."


In [206]:
X=z
y=[]
from sklearn.model_selection import train_test_split
for i in range(23):
  y.append(1)
for i in range(32):
  y.append(0)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1)

### Training the Model
Finally, the moment we've all been waiting for, we use the ROCKET model on the timeseries to extract the features. The new datasets are fed into a Rigid Classifier to classify them as Depressed (1) or not (0).

In [207]:
rocket = Rocket(num_kernels=100, random_state=111) 
rocket.fit(X_train)
X_train_transform = rocket.transform(X_train)
X_train_transform.shape

(49, 200)

In [208]:
classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
classifier.fit(X_train_transform, y_train)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
                  normalize=True)

In [209]:
X_test_transform = rocket.transform(X_test)
classifier.score(X_test_transform,y_test)

0.8333333333333334

In [210]:
y_test
y_pred=classifier.predict(X_test_transform)

### Testing the model validity with K-fold cross-validation

In [211]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=11,shuffle=True)

In [212]:
y_2=pd.DataFrame(y)
accuracies=[]
for train_index, test_index in kfold.split(X):
  # splitting
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X.loc[train_index], X.loc[test_index]
    y_train, y_test = y_2.loc[train_index], y_2.loc[test_index]

    # rocket transformation
    rocket = Rocket(num_kernels=100, random_state=111) 
    rocket.fit(X_train)
    X_train_transform = rocket.transform(X_train)
    print(X_train_transform.shape)

    # classifier
    classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    classifier.fit(X_train_transform, y_train[0])
    X_test_transform = rocket.transform(X_test)
    ac=classifier.score(X_test_transform,y_test[0])
    print(ac)
    accuracies.append(ac)


TRAIN: [ 0  1  2  3  4  5  6  7  8  9 12 13 14 15 16 17 18 19 20 22 23 24 25 26
 27 28 29 30 31 32 33 34 35 36 37 38 39 41 42 43 44 46 47 48 49 50 51 52
 53 54] TEST: [10 11 21 40 45]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.8
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 32 33 34 35 37 38 39 40 41 42 43 45 46 47 48 49 50 51 52
 53 54] TEST: [13 30 31 36 44]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




1.0
TRAIN: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 16 17 18 19 20 21 22 23 24 25 26
 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 48 49 51 52
 53 54] TEST: [ 8 14 15 47 50]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.6
TRAIN: [ 1  2  3  4  5  6  8  9 10 11 12 13 14 15 17 18 20 21 22 23 24 25 26 27
 28 29 30 31 32 33 34 35 36 37 38 39 40 42 43 44 45 46 47 48 49 50 51 52
 53 54] TEST: [ 0  7 16 19 41]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




1.0
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 27 28 30 31 32 33 34 35 36 37 38 39 40 41 42 44 45 46 47 48 49 50 51 52
 53 54] TEST: [24 25 26 29 43]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.8
TRAIN: [ 0  1  2  5  6  7  8  9 10 11 12 13 14 15 16 17 19 21 22 23 24 25 26 27
 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 50 51 52
 53 54] TEST: [ 3  4 18 20 49]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




1.0
TRAIN: [ 0  1  2  3  4  6  7  8  9 10 11 13 14 15 16 18 19 20 21 22 23 24 25 26
 27 28 29 30 31 33 34 35 36 37 38 40 41 42 43 44 45 46 47 48 49 50 51 52
 53 54] TEST: [ 5 12 17 32 39]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.4
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 23 24
 25 26 27 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 49 50
 52 54] TEST: [22 28 48 51 53]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




1.0
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 34 36 38 39 40 41 43 44 45 46 47 48 49 50 51
 53 54] TEST: [33 35 37 42 52]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.4
TRAIN: [ 0  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 35 36 37 38 39 40 41 42 43 44 45 47 48 49 50 51
 52 53] TEST: [ 1  9 34 46 54]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.6
TRAIN: [ 0  1  3  4  5  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 24 25 26
 28 29 30 31 32 33 34 35 36 37 39 40 41 42 43 44 45 46 47 48 49 50 51 52
 53 54] TEST: [ 2  6 23 27 38]
(50, 200)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.8


In [217]:
df = pd.DataFrame(accuracies)
df.value_counts()

1.0    4
0.8    3
0.6    2
0.4    2
dtype: int64

In [213]:
df = pd.DataFrame(accuracies)
df.mean()

0    0.763636
dtype: float64

### Hyperparameter Tuning

We have hypertuned the model using GridSearchCV with 5 fold cross-validation.

In [225]:
nums = np.arange(100,1001,100)
scores = []
for i in nums:
  # Create and train a new model instance.
  rocket = Rocket(num_kernels= i, random_state=111) 
  rocket.fit(X_train)

  X_train_transform = rocket.transform(X_train)
  # X_train_transform.shape

  classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
  classifier.fit(X_train_transform, y_train)

  X_test_transform = rocket.transform(X_test)
  score = classifier.score(X_test_transform,y_test)
  scores.append(score)
  print(score)


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.8


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.4


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.4


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.6


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.4


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.4


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.6


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.4


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.6


  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.6


In [226]:
scores

[0.8, 0.4, 0.4, 0.6, 0.4, 0.4, 0.6, 0.4, 0.6, 0.6]

As we can see the optimal parameter is num_kernels = 100, with the an accuracy of 0.8.

### Saving the Model for Deployment


In [227]:
# Create and train a new model instance.
rocket = Rocket(num_kernels=100, random_state=111) 
rocket.fit(X_train)

X_train_transform = rocket.transform(X_train)
X_train_transform.shape

classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
classifier.fit(X_train_transform, y_train)

X_test_transform = rocket.transform(X_test)
print(classifier.score(X_test_transform,y_test))

  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




0.8


In [228]:
from pickle import dump
# save the model
dump(rocket, open('rocket_new.pkl', 'wb'))
dump(classifier, open('classifier_new.pkl', 'wb'))

### Extra Models

In [229]:
from sktime.classification.dictionary_based import ContractableBOSS

In [231]:
Shape = ContractableBOSS()
Shape.fit(X_train,y_train[0])

Shape.score(X_test,y_test)

0.6

In [233]:
from sktime.classification.interval_based import RandomIntervalSpectralForest

In [236]:
nums = np.arange(100,501,50)
scores = []
for i in nums:
  # Create and train a new model instance.
  Rise = RandomIntervalSpectralForest(n_estimators = i)
  Rise.fit(X_train,y_train[0]) 

  score = Rise.score(X_test,y_test)
  scores.append(score)
  print(score)

0.6
0.6
0.6
0.6
0.6
0.6
0.6
0.6
0.6
