In [15]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.utils import timeseries_dataset_from_array
import matplotlib.pyplot as plt
import seaborn as sns
from MultiSeriesWindowsGenerator import MultiSeriesWindowsGenerator
import IPython.display

2023-04-16 10:28:11.343218: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
pd.set_option("display.max_row", 200)

In [17]:
df = pd.read_csv("data/all_data_aggr_nonan.csv", index_col=0)

In [18]:
df.mood = df.mood.round(0)

In [20]:
LABELS = ['mood']
REGRESSORS = ['weekday', 'circumplex.arousal', 'circumplex.valence',
              'activity', 'screen', 'call', 'sms', 'appCat.builtin',
              'appCat.communication', 'appCat.entertainment', 'appCat.finance',
              'appCat.game', 'appCat.office', 'appCat.other', 'appCat.social',
              'appCat.travel', 'appCat.unknown', 'appCat.utilities', 'appCat.weather']

DATE = 'days'  # always correct
IN_STEPS = 7  # use 7 days
OUT_STEPS = 1  # to predict 1 day in the future
GROUPBY = ['subject_id']
BATCH_SIZE = 8

In [21]:
n = len(df)
train_series = df.groupby(GROUPBY, as_index=False, group_keys=False).apply(
    lambda x: x.iloc[:int(len(x) * 0.7)]).reset_index(drop=True)
val_series = df.groupby(GROUPBY, as_index=False, group_keys=False).apply(
    lambda x: x.iloc[int(len(x) * 0.7):int(len(x) * 0.9)]).reset_index(drop=True)
test_series = df.groupby(GROUPBY, as_index=False, group_keys=False).apply(
    lambda x: x.iloc[int(len(x) * 0.9):]).reset_index(drop=True)

test_window = MultiSeriesWindowsGenerator(
    input_width=IN_STEPS, label_width=OUT_STEPS, shift=1, batch_size=BATCH_SIZE, GROUPBY=GROUPBY,
    label_columns=LABELS, regressor_columns=REGRESSORS, DATE=DATE, LABELS=LABELS)

test_window.update_datasets(train_series, val_series, test_series)

2023-04-16 10:28:20.089151: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [22]:
a, b = test_window.example


In [23]:
lstm_model_classifier = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(64, return_sequences=False),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=11, activation='softmax')
])

MAX_EPOCHS = 5

In [24]:
def compile_and_fit_classifier(model, window, patience=2):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=patience,
                                                      mode='min')

    # model.compile(loss=tf.keras.losses.MeanSquaredError(),
    #               optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1),
    #               metrics=[tf.keras.metrics.MeanAbsoluteError()])
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1),
              metrics=[tf.keras.metrics.SparseCategoricalAccuracy(), tf.keras.losses.MeanSquaredError(),tf.keras.losses.MeanAbsoluteError()])


    history = model.fit(window.train, epochs=MAX_EPOCHS,
                        validation_data=window.val,
                        callbacks=[early_stopping])
    return history

In [25]:
history = compile_and_fit_classifier(lstm_model_classifier, test_window)
val_performance = {}
performance = {}

probs = lstm_model_classifier.predict(test_window.val)
preds = probs.argmax(axis=-1)
val_performance['LSTM'] = lstm_model_classifier.evaluate(test_window.val, verbose=0)

probs = lstm_model_classifier.predict(test_window.test)
preds = probs.argmax(axis=-1)
performance['LSTM'] = lstm_model_classifier.evaluate(test_window.test, verbose=0)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [26]:
print("The accuracy on the test set is:",round(performance["LSTM"][1], 4))

The accuracy on the test set is: 0.7704


Construct CI for performance

In [43]:
n_simulations = 10
accuracies = np.empty(n_simulations)
for i in range(n_simulations):
    history = compile_and_fit_classifier(lstm_model_classifier, test_window)
    probs = lstm_model_classifier.predict(test_window.test)
    preds = probs.argmax(axis=-1)
    print(lstm_model_classifier.evaluate(test_window.test, verbose=0))
    accuracies[i]= lstm_model_classifier.evaluate(test_window.test, verbose=0)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
[1.0010699033737183, 0.7462962865829468]
Epoch 1/5
Epoch 2/5
Epoch 3/5
[1.009655475616455, 0.7444444298744202]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
[1.0373154878616333, 0.7425925731658936]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[1.0144188404083252, 0.7407407164573669]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.986910879611969, 0.7462962865829468]
Epoch 1/5
Epoch 2/5
Epoch 3/5
[1.0532596111297607, 0.729629635810852]
Epoch 1/5
Epoch 2/5
Epoch 3/5
[1.1142174005508423, 0.7370370626449585]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[1.1462810039520264, 0.7314814925193787]
Epoch 1/5
Epoch 2/5
Epoch 3/5
[1.1943678855895996, 0.7277777791023254]
Epoch 1/5
Epoch 2/5
Epoch 3/5
[1.1534501314163208, 0.7314814925193787]


In [44]:
accuracies

array([0.74629629, 0.74444443, 0.74259257, 0.74074072, 0.74629629,
       0.72962964, 0.73703706, 0.73148149, 0.72777778, 0.73148149])