In [1]:
import time
import random
import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score

import tensorflow as tf
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input

In [2]:
raw_csv_data: pd.DataFrame = pd.read_csv('time_series.csv') 
df: pd.DataFrame = raw_csv_data.copy() 

In [3]:
df.head()

Unnamed: 0,id,timestamp,date,activity,owner,class,binary_class
0,0,2003-05-08 00:00:00,2003-05-08,0,condition_1,unipolar depressive,depressed
1,0,2003-05-08 00:01:00,2003-05-08,0,condition_1,unipolar depressive,depressed
2,0,2003-05-08 00:02:00,2003-05-08,0,condition_1,unipolar depressive,depressed
3,0,2003-05-08 00:03:00,2003-05-08,0,condition_1,unipolar depressive,depressed
4,0,2003-05-08 00:04:00,2003-05-08,0,condition_1,unipolar depressive,depressed


In [4]:
def to_2D_array(df: pd.DataFrame, n_days: int, variable: str) -> np.ndarray:
    n_users: int = len(df.id.unique())
    arr: np.ndarray = df[variable].values.reshape(n_users, n_days)
    return arr

In [5]:
X = to_2D_array(df, 1440, 'activity')
X.shape

(1029, 1440)

In [6]:
scaler = MinMaxScaler()

n_samples, n_timesteps = X.shape
X_reshaped = X.reshape(-1, 1)

X = scaler.fit_transform(X_reshaped)
X = X.reshape(n_samples, n_timesteps)

In [7]:
y = df.iloc[[ i*1440 for i in range(1029) ], -1].values
y.shape

(1029,)

In [8]:
def format_seconds(time_in_seconds: float) -> str:
    hours: int = int(time_in_seconds // 3600)
    minutes: int = int((time_in_seconds % 3600) // 60)
    seconds: int = int(time_in_seconds % 60)
    
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"

In [9]:
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

In [10]:
start_time: float = time.process_time()

accuracies_tsf = []
accuracies_lstm = []
for train_index, test_index in cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    tsf = TimeSeriesForestClassifier(random_state=42, n_jobs=-1)
    tsf.fit(X_train, y_train)
    y_pred = tsf.predict(X_test)
    
    accuracy_tsf = accuracy_score(y_test, y_pred)
    accuracies_tsf.append(accuracy_tsf)
    print(f'Accuracy TSF: {accuracy_tsf:.2f}')
    
    seed = 42
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)

    model = Sequential()
    model.add(Input(shape=(X_train.shape[1], 1)))
    model.add(LSTM(50, return_sequences=True))
    model.add(LSTM(50))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    y_train_binary = np.array([int(c == 'depressed') for c in y_train])
    y_test_binary = np.array([int(c == 'depressed') for c in y_test])

    model.fit(X_train_reshaped, y_train_binary, epochs=10, batch_size=32, verbose=0)

    y_pred_lstm = (model.predict(X_test_reshaped) > 0.5).astype("int32")

    accuracy_lstm = accuracy_score(y_test_binary, y_pred_lstm)
    accuracies_lstm.append(accuracy_lstm)
    print(f'Accuracy LSTM: {accuracy_lstm:.2f}')  

end_time: float = time.process_time()
cpu_execution_time: float = end_time - start_time
print(f'CPU Execution time: {format_seconds(cpu_execution_time)}')

Accuracy TSF: 0.80
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 294ms/step
Accuracy LSTM: 0.65
Accuracy TSF: 0.88
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 398ms/step
Accuracy LSTM: 0.65
Accuracy TSF: 0.77
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 297ms/step
Accuracy LSTM: 0.65
Accuracy TSF: 0.82
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 312ms/step
Accuracy LSTM: 0.65
Accuracy TSF: 0.78
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 303ms/step
Accuracy LSTM: 0.65
Accuracy TSF: 0.77
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 325ms/step
Accuracy LSTM: 0.65
Accuracy TSF: 0.77
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 308ms/step
Accuracy LSTM: 0.65
Accuracy TSF: 0.83
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 317ms/step
Accuracy LSTM: 0.65
Accuracy TSF: 0.83
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 400ms/step
Accuracy LST

In [11]:
print(f'Acurácia Média TSF: {np.mean(accuracies_tsf):.2f}')    
print(f'Acurácia Média LSTM: {np.mean(accuracies_lstm):.2f}')  

Acurácia Média TSF: 0.80
Acurácia Média LSTM: 0.65
