In [19]:
import numpy as np
import pandas as pd
import keras
from dataset_generator import TimeWindowDatasetGenerator
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import LabelEncoder


In [20]:
data_generator = TimeWindowDatasetGenerator()
labeled_data = data_generator.get_labelled_timewindow_dataframe(student_data_filepath='task/student_data.csv',
                                                                time_window=10,
                                                                label_feature='affect',
                                                                exclude_after_notification=False,
                                                                exclude_after_engagement=False)


labeled_data

Unnamed: 0,sessionId,timestamp,x,y,z,ppgValue,hr,hrIbi,hrStatus,ibiStatus,notification,engagement,affect,context,label_id,label
12763,1,493272,884.0,1564.0,3767.0,2103379.0,,,,,,,,,1,RELAXED
12764,1,493311,900.0,1518.0,3782.0,2104145.0,,,,,,,,,1,RELAXED
12765,1,493351,894.0,1553.0,3734.0,2104953.0,,,,,,,,,1,RELAXED
12766,1,493391,907.0,1488.0,3729.0,2105398.0,,,,,,,,,1,RELAXED
12767,1,493431,861.0,1559.0,3731.0,2105114.0,,,,,,,,,1,RELAXED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8101814,58,6184903,453.0,552.0,4126.0,2126027.0,,,,,,,,,240,HAPPY
8101815,58,6184943,494.0,607.0,4059.0,2126361.0,,,,,,,,,240,HAPPY
8101816,58,6184983,695.0,841.0,4124.0,2127234.0,,,,,,,,,240,HAPPY
8101817,58,6185023,497.0,591.0,4329.0,2128049.0,,,,,,,,,240,HAPPY


In [21]:
df = labeled_data
def replace_nan_hr(row):
    if not pd.isna(row['hr']) and row['hrStatus'] != 1:  
        mean_hr = df[(df['hrStatus'] == 1)]['hr'].median()
        return mean_hr
    return row['hr']
def replace_nan_hrIbi(row):
    if not pd.isna(row['hrIbi']) and row['ibiStatus'] != 0:  
        mean_hr = df[(df['ibiStatus'] == 1)]['hrIbi'].median()
        return mean_hr
    return row['hrIbi']


In [22]:
labeled_data['hr'] = df.apply(replace_nan_hr, axis=1)
labeled_data['hrIbi'] = df.apply(replace_nan_hrIbi, axis=1)
labeled_data

Unnamed: 0,sessionId,timestamp,x,y,z,ppgValue,hr,hrIbi,hrStatus,ibiStatus,notification,engagement,affect,context,label_id,label
12763,1,493272,884.0,1564.0,3767.0,2103379.0,,,,,,,,,1,RELAXED
12764,1,493311,900.0,1518.0,3782.0,2104145.0,,,,,,,,,1,RELAXED
12765,1,493351,894.0,1553.0,3734.0,2104953.0,,,,,,,,,1,RELAXED
12766,1,493391,907.0,1488.0,3729.0,2105398.0,,,,,,,,,1,RELAXED
12767,1,493431,861.0,1559.0,3731.0,2105114.0,,,,,,,,,1,RELAXED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8101814,58,6184903,453.0,552.0,4126.0,2126027.0,,,,,,,,,240,HAPPY
8101815,58,6184943,494.0,607.0,4059.0,2126361.0,,,,,,,,,240,HAPPY
8101816,58,6184983,695.0,841.0,4124.0,2127234.0,,,,,,,,,240,HAPPY
8101817,58,6185023,497.0,591.0,4329.0,2128049.0,,,,,,,,,240,HAPPY


In [23]:
labeled_data['hrIbi'].fillna(method='ffill', inplace=True)
labeled_data['hr'].fillna(method='ffill', inplace=True)
#labeled_data.groupby('hr').head()
nan_in_hr=labeled_data['hr'].isna().any()
nan_count_in_hr = labeled_data['hr'].isna().sum()
if nan_in_hr:
    print("There are NaN values in the 'hr' column after filling.",nan_count_in_hr)

if nan_count_in_hr > 0:
    fill_value = 649
    labeled_data.loc[labeled_data['hrIbi'].isna(), 'hrIbi'] = fill_value
    labeled_data.loc[labeled_data['hr'].isna(), 'hr'] = 95

print("Number of NaN values in the 'hr' column after filling:", labeled_data['hrIbi'].isna().sum())

labeled_data

There are NaN values in the 'hr' column after filling. 19
Number of NaN values in the 'hr' column after filling: 0


Unnamed: 0,sessionId,timestamp,x,y,z,ppgValue,hr,hrIbi,hrStatus,ibiStatus,notification,engagement,affect,context,label_id,label
12763,1,493272,884.0,1564.0,3767.0,2103379.0,95.0,649.0,,,,,,,1,RELAXED
12764,1,493311,900.0,1518.0,3782.0,2104145.0,95.0,649.0,,,,,,,1,RELAXED
12765,1,493351,894.0,1553.0,3734.0,2104953.0,95.0,649.0,,,,,,,1,RELAXED
12766,1,493391,907.0,1488.0,3729.0,2105398.0,95.0,649.0,,,,,,,1,RELAXED
12767,1,493431,861.0,1559.0,3731.0,2105114.0,95.0,649.0,,,,,,,1,RELAXED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8101814,58,6184903,453.0,552.0,4126.0,2126027.0,81.0,917.0,,,,,,,240,HAPPY
8101815,58,6184943,494.0,607.0,4059.0,2126361.0,81.0,917.0,,,,,,,240,HAPPY
8101816,58,6184983,695.0,841.0,4124.0,2127234.0,81.0,917.0,,,,,,,240,HAPPY
8101817,58,6185023,497.0,591.0,4329.0,2128049.0,81.0,917.0,,,,,,,240,HAPPY


In [24]:
def preprocess_data(data):
  
    data = data.drop(columns=[ 'notification', 'engagement','context','affect','hrStatus','ibiStatus',])

    scaler = StandardScaler()
    numerical_cols = ['ppgValue','x', 'y', 'z','hr','hrIbi']
    data[numerical_cols] = scaler.fit_transform(data[numerical_cols].ffill())

    label_encoder = LabelEncoder()
    data['label'] = label_encoder.fit_transform(data['label'])

    return data

In [25]:
processed_data = preprocess_data(labeled_data)
processed_data

Unnamed: 0,sessionId,timestamp,x,y,z,ppgValue,hr,hrIbi,label_id,label
12763,1,493272,0.769742,0.827824,0.279626,-0.198057,1.863799,-0.108217,1,2
12764,1,493311,0.783048,0.800829,0.290323,-0.194905,1.863799,-0.108217,1,2
12765,1,493351,0.778058,0.821369,0.256091,-0.191581,1.863799,-0.108217,1,2
12766,1,493391,0.788869,0.783223,0.252526,-0.189750,1.863799,-0.108217,1,2
12767,1,493431,0.750615,0.824890,0.253952,-0.190918,1.863799,-0.108217,1,2
...,...,...,...,...,...,...,...,...,...,...
8101814,58,6184903,0.411319,0.233920,0.535653,-0.104877,-0.056656,0.466064,240,1
8101815,58,6184943,0.445415,0.266198,0.487871,-0.103503,-0.056656,0.466064,240,1
8101816,58,6184983,0.612568,0.403523,0.534227,-0.099911,-0.056656,0.466064,240,1
8101817,58,6185023,0.447910,0.256808,0.680426,-0.096558,-0.056656,0.466064,240,1


In [26]:
grouped = processed_data.groupby('label_id')
grouped_label =  processed_data.groupby('label')

min = len(grouped.get_group(1))
for i in range(len(grouped)-1):
    if len(grouped.get_group(i+2))<min:
        min = len(grouped.get_group(i+2))

X = []
Y = []
Xa = []
Xb = []
for i in range(len(grouped)):
    X.append(grouped.get_group(i+1)[["x", "y", "z","ppgValue"]][:min])
    Y.append(grouped.get_group(i+1).iloc[0][["label"]])
    Xa.append(grouped.get_group(i+1)[["hr"]][:min])
    Xb.append(grouped.get_group(i+1)[["hrIbi"]][:min])
    

X = np.array(X)
Y = np.array(Y)
Xa = np.array(Xa)
Xb = np.array(Xb)
print(X.shape)
print(Y.shape)
print(Xa.shape)
print(Xb.shape)

(240, 259, 4)
(240, 1)
(240, 259, 1)
(240, 259, 1)


In [27]:
X_train, X_test,Xa_train, Xa_test,Xb_train, Xb_test, Y_train, Y_test = train_test_split(X, Xa, Xb, Y, test_size=0.1, random_state=42)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)
print(Xa_train.shape)
print(Xa_test.shape)

(216, 259, 4)
(216, 1)
(24, 259, 4)
(24, 1)
(216, 259, 1)
(24, 259, 1)


In [28]:
""" model = Sequential()
model.add(LSTM(64,input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(LSTM(64))
model.add(Dense(256, activation='sigmoid'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(4, activation='softmax'))
model.summary() """

" model = Sequential()\nmodel.add(LSTM(64,input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))\nmodel.add(LSTM(64))\nmodel.add(Dense(256, activation='sigmoid'))\nmodel.add(Dense(64, activation='sigmoid'))\nmodel.add(Dense(4, activation='softmax'))\nmodel.summary() "

In [29]:
""" adam_optimizer = keras.optimizers.AdamW(learning_rate=0.0001)
model.compile(loss='sparse_categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) """

" adam_optimizer = keras.optimizers.AdamW(learning_rate=0.0001)\nmodel.compile(loss='sparse_categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) "

In [30]:
""" model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_split=0.1) """

' model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_split=0.1) '

In [31]:
""" Y_pred = model.predict(X_test)

Y_pred = np.argmax(Y_pred, axis=1)

from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(Y_test, Y_pred)
print("Accuracy:", accuracy)

# You can also print a classification report for more detailed evaluation metrics
print(classification_report(Y_test, Y_pred)) """

' Y_pred = model.predict(X_test)\n\nY_pred = np.argmax(Y_pred, axis=1)\n\nfrom sklearn.metrics import accuracy_score, classification_report\n\naccuracy = accuracy_score(Y_test, Y_pred)\nprint("Accuracy:", accuracy)\n\n# You can also print a classification report for more detailed evaluation metrics\nprint(classification_report(Y_test, Y_pred)) '

### Multi Input Model Definition

In [32]:
# define two sets of inputs
inputA = keras.Input(shape=(X_train.shape[1], X_train.shape[2]))
inputB = keras.Input(shape=(Xa_train.shape[1],))
inputC = keras.Input(shape=(Xb_train.shape[1],))
# the first branch operates on the first input
x = LSTM(64,input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True)(inputA)
x = LSTM(64)(x)
x = Dense(8, activation="sigmoid")(x)
x = Dense(4, activation="sigmoid")(x)
x = keras.Model(inputs=inputA, outputs=x)
# the second branch opreates on the second input
y = Dense(8, activation="sigmoid")(inputB)
y = Dense(4, activation="sigmoid")(y)
y = keras.Model(inputs=inputB, outputs=y)

v = Dense(8, activation="sigmoid")(inputC)
v = Dense(4, activation="sigmoid")(v)
v = keras.Model(inputs=inputC, outputs=v)
# combine the output of the two branches
combined = keras.layers.concatenate([x.output, y.output, v.output])
# apply a FC layer and then a regression prediction on the
# combined outputs
z = Dense(8, activation="sigmoid")(combined)
z = Dense(4, activation="softmax")(z)
# our model will accept the inputs of the two branches and
# then output a single value
model = keras.Model(inputs=[x.input, y.input, v.input], outputs=z)
model.summary()

  super().__init__(**kwargs)


In [33]:
adam_optimizer = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(loss='sparse_categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy'])

In [34]:
model.fit([X_train, Xa_train, Xb_train], Y_train, epochs=100, batch_size=32, validation_split=0.1)

Epoch 1/100


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 113ms/step - accuracy: 0.4221 - loss: 1.4084 - val_accuracy: 0.4545 - val_loss: 1.3854
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - accuracy: 0.4449 - loss: 1.4028 - val_accuracy: 0.4545 - val_loss: 1.3837
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - accuracy: 0.5035 - loss: 1.3418 - val_accuracy: 0.4545 - val_loss: 1.3818
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - accuracy: 0.4383 - loss: 1.3969 - val_accuracy: 0.4545 - val_loss: 1.3796
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - accuracy: 0.4619 - loss: 1.3883 - val_accuracy: 0.4545 - val_loss: 1.3774
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - accuracy: 0.4683 - loss: 1.3722 - val_accuracy: 0.4545 - val_loss: 1.3751
Epoch 7/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x7fb21c5ecfa0>

In [35]:
Y_pred = model.predict([X_test,Xa_test,Xb_test])

Y_pred = np.argmax(Y_pred, axis=1)

from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(Y_test, Y_pred)
print("Accuracy:", accuracy)

# You can also print a classification report for more detailed evaluation metrics
print(classification_report(Y_test, Y_pred))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step
Accuracy: 0.625
              precision    recall  f1-score   support

         1.0       0.62      1.00      0.77        15
         2.0       0.00      0.00      0.00         8
         3.0       0.00      0.00      0.00         1

    accuracy                           0.62        24
   macro avg       0.21      0.33      0.26        24
weighted avg       0.39      0.62      0.48        24



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [36]:
keras.utils.plot_model(model)

AttributeError: module 'pydot' has no attribute 'InvocationException'