In [63]:
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [54]:
raw_csv_data: pd.DataFrame = pd.read_csv('time_series.csv') 
df: pd.DataFrame = raw_csv_data.copy() 

In [55]:
df.head()

Unnamed: 0,id,timestamp,date,activity,owner,class,binary_class
0,0,2003-05-08 00:00:00,2003-05-08,0,condition_1,unipolar depressive,depressed
1,0,2003-05-08 00:01:00,2003-05-08,0,condition_1,unipolar depressive,depressed
2,0,2003-05-08 00:02:00,2003-05-08,0,condition_1,unipolar depressive,depressed
3,0,2003-05-08 00:03:00,2003-05-08,0,condition_1,unipolar depressive,depressed
4,0,2003-05-08 00:04:00,2003-05-08,0,condition_1,unipolar depressive,depressed


In [56]:
def to_2D_array(df: pd.DataFrame, n_days: int, variable: str) -> np.ndarray:
    n_users: int = len(df.id.unique())
    arr: np.ndarray = df[variable].values.reshape(n_users, n_days)
    return arr

In [57]:
X = to_2D_array(df, 1440, 'activity')
X.shape

(1029, 1440)

In [58]:
y = df.iloc[[ i*1440 for i in range(1029) ], -1].values
y.shape

(1029,)

In [59]:
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)

In [60]:
accuracies = []
for train_index, test_index in cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    tsf = TimeSeriesForestClassifier(random_state=1)
    tsf.fit(X_train, y_train)
    y_pred = tsf.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)
    
    print(f'Acurácia: {accuracy:.2f}')

Acurácia: 0.87
Acurácia: 0.80
Acurácia: 0.80
Acurácia: 0.79
Acurácia: 0.78
Acurácia: 0.82
Acurácia: 0.80
Acurácia: 0.83
Acurácia: 0.78
Acurácia: 0.79


In [61]:
print(f'Acurácia Média: {np.mean(accuracies):.2f}')

Acurácia Média: 0.80
