# Research Skills: Spatiotemporal Data Analyis
## Take-home Assignment 3
Sharon Ong, Department of Cognitive Science and Artificial Intelligence, 
Tilburg University
Academic Year 2023-2024 

Your First Name: Christophe

Your Last Name: Friezas Gonçalves

Your Student ID: 2059012

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from sktime.classification.kernel_based import RocketClassifier
from sktime.classification.feature_based import Catch22Classifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from sklearn.model_selection import GridSearchCV
from sktime.split import temporal_train_test_split

from sklearn.model_selection import KFold
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

from sklearn.metrics import accuracy_score

# 3A First part: Data loading and baseline

In [None]:
data = np.load("ts_challenge.npz")
print(data.files)
X = data['arr_0']
y = data['arr_1']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.20, random_state = 42)

clf = KNeighborsTimeSeriesClassifier(5)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_pred))

# 2) GridSearchCV KNN

In [None]:
clf = KNeighborsTimeSeriesClassifier(distance="dtw")
param_grid = {'n_neighbors' : [1,3,7]}
grid_search = GridSearchCV(clf, param_grid, cv = KFold(n_splits = 5))
grid_search.fit(X_train, y_train)

print(grid_search.best_estimator_)

y_pred = grid_search.predict(X_test)
print(accuracy_score(y_test, y_pred))

# 3) Classification of own choice

# All tested classifiers and parameter search (in markdown for easier running of notebook)

clf = RocketClassifier(random_state=42)
param_grid = {"num_kernels": [500,1000], "rocket_transform": ["rocket","minirocket","multirocket"],
             "max_dilations_per_kernel":[8,16]}
grid_search = GridSearchCV(clf, param_grid, cv = KFold(n_splits = 5))
grid_search.fit(X_train, y_train)
print(grid_search.best_estimator_)
y_pred = grid_search.predict(X_test)
print(accuracy_score(y_test, y_pred))

for i in [7,9,200,1000]:
    clf = Catch22Classifier(
        estimator=RandomForestClassifier(n_estimators=i)
    ) 
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"Accuracy for {i} estimators :{accuracy_score(y_test, y_pred)}")
    
for i in [0.1,0.01,0.001]:
    for j in [1000,2000,5000,10000]:
        clf = Catch22Classifier(
            estimator= GradientBoostingClassifier(random_state=42, learning_rate=i,n_estimators=j,max_depth=1)
        ) 
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        print(f"Accuracy for {i} learning rate,{j} estimators and {n} depth :{accuracy_score(y_test, y_pred)}")

In [None]:
clf = Catch22Classifier(
            estimator= GradientBoostingClassifier(random_state=42, learning_rate=0.1,n_estimators=900,max_depth=1)
) 
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(f"Accuracy for {0.1} learning rate,{900} estimators and {1} depth :{accuracy_score(y_test, y_pred)}")

# Hold-out set

In [None]:
test_data = np.load('ts_test.npz')
X_test = test_data['arr_0']

y_pred = clf.predict(X_test)
print(y_pred)
np.savetxt("ypred.csv", y_pred, delimiter=",")