In [3]:
import numpy as np
import pandas as pd
import keras
from dataset_generator import TimeWindowDatasetGenerator

from sklearn.calibration import LabelEncoder
from sklearn.discriminant_analysis import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import Dropout


In [4]:
data_generator = TimeWindowDatasetGenerator()
labeled_data = data_generator.get_labelled_timewindow_dataframe(student_data_filepath='task/student_data.csv',
                                                                time_window=10,
                                                                label_feature='affect',
                                                                exclude_after_notification=False,
                                                                exclude_after_engagement=False)


labeled_data

Unnamed: 0,sessionId,timestamp,x,y,z,ppgValue,hr,hrIbi,hrStatus,ibiStatus,notification,engagement,affect,context,label_id,label
12763,1,493272,884.0,1564.0,3767.0,2103379.0,,,,,,,,,1,RELAXED
12764,1,493311,900.0,1518.0,3782.0,2104145.0,,,,,,,,,1,RELAXED
12765,1,493351,894.0,1553.0,3734.0,2104953.0,,,,,,,,,1,RELAXED
12766,1,493391,907.0,1488.0,3729.0,2105398.0,,,,,,,,,1,RELAXED
12767,1,493431,861.0,1559.0,3731.0,2105114.0,,,,,,,,,1,RELAXED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8101814,58,6184903,453.0,552.0,4126.0,2126027.0,,,,,,,,,240,HAPPY
8101815,58,6184943,494.0,607.0,4059.0,2126361.0,,,,,,,,,240,HAPPY
8101816,58,6184983,695.0,841.0,4124.0,2127234.0,,,,,,,,,240,HAPPY
8101817,58,6185023,497.0,591.0,4329.0,2128049.0,,,,,,,,,240,HAPPY


In [5]:
df = labeled_data
def replace_nan_hr(row):
    if not pd.isna(row['hr']) and row['hrStatus'] != 1:  
        mean_hr = df[(df['hrStatus'] == 1)]['hr'].median()
        return mean_hr
    return row['hr']
def replace_nan_hrIbi(row):
    if not pd.isna(row['hrIbi']) and row['ibiStatus'] != 0:  
        mean_hr = df[(df['ibiStatus'] == 1)]['hrIbi'].median()
        return mean_hr
    return row['hrIbi']


In [6]:
labeled_data['hr'] = df.apply(replace_nan_hr, axis=1)
labeled_data['hrIbi'] = df.apply(replace_nan_hrIbi, axis=1)
labeled_data

Unnamed: 0,sessionId,timestamp,x,y,z,ppgValue,hr,hrIbi,hrStatus,ibiStatus,notification,engagement,affect,context,label_id,label
12763,1,493272,884.0,1564.0,3767.0,2103379.0,,,,,,,,,1,RELAXED
12764,1,493311,900.0,1518.0,3782.0,2104145.0,,,,,,,,,1,RELAXED
12765,1,493351,894.0,1553.0,3734.0,2104953.0,,,,,,,,,1,RELAXED
12766,1,493391,907.0,1488.0,3729.0,2105398.0,,,,,,,,,1,RELAXED
12767,1,493431,861.0,1559.0,3731.0,2105114.0,,,,,,,,,1,RELAXED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8101814,58,6184903,453.0,552.0,4126.0,2126027.0,,,,,,,,,240,HAPPY
8101815,58,6184943,494.0,607.0,4059.0,2126361.0,,,,,,,,,240,HAPPY
8101816,58,6184983,695.0,841.0,4124.0,2127234.0,,,,,,,,,240,HAPPY
8101817,58,6185023,497.0,591.0,4329.0,2128049.0,,,,,,,,,240,HAPPY


In [7]:
labeled_data['hrIbi'].fillna(method='ffill', inplace=True)
labeled_data['hr'].fillna(method='ffill', inplace=True)
#labeled_data.groupby('hr').head()
nan_in_hr=labeled_data['hr'].isna().any()
nan_count_in_hr = labeled_data['hr'].isna().sum()
if nan_in_hr:
    print("There are NaN values in the 'hr' column after filling.",nan_count_in_hr)

if nan_count_in_hr > 0:
    fill_value = 649
    labeled_data.loc[labeled_data['hrIbi'].isna(), 'hrIbi'] = fill_value
    labeled_data.loc[labeled_data['hr'].isna(), 'hr'] = 95

print("Number of NaN values in the 'hr' column after filling:", labeled_data['hrIbi'].isna().sum())

labeled_data

There are NaN values in the 'hr' column after filling. 19
Number of NaN values in the 'hr' column after filling: 0


Unnamed: 0,sessionId,timestamp,x,y,z,ppgValue,hr,hrIbi,hrStatus,ibiStatus,notification,engagement,affect,context,label_id,label
12763,1,493272,884.0,1564.0,3767.0,2103379.0,95.0,649.0,,,,,,,1,RELAXED
12764,1,493311,900.0,1518.0,3782.0,2104145.0,95.0,649.0,,,,,,,1,RELAXED
12765,1,493351,894.0,1553.0,3734.0,2104953.0,95.0,649.0,,,,,,,1,RELAXED
12766,1,493391,907.0,1488.0,3729.0,2105398.0,95.0,649.0,,,,,,,1,RELAXED
12767,1,493431,861.0,1559.0,3731.0,2105114.0,95.0,649.0,,,,,,,1,RELAXED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8101814,58,6184903,453.0,552.0,4126.0,2126027.0,81.0,917.0,,,,,,,240,HAPPY
8101815,58,6184943,494.0,607.0,4059.0,2126361.0,81.0,917.0,,,,,,,240,HAPPY
8101816,58,6184983,695.0,841.0,4124.0,2127234.0,81.0,917.0,,,,,,,240,HAPPY
8101817,58,6185023,497.0,591.0,4329.0,2128049.0,81.0,917.0,,,,,,,240,HAPPY


In [8]:

def preprocess_data(data):
  
    data = data.drop(columns=[ 'notification', 'engagement','context','affect','hrStatus','ibiStatus',])

    scaler = StandardScaler()
    numerical_cols = ['ppgValue','x', 'y', 'z','hr','hrIbi']
    data[numerical_cols] = scaler.fit_transform(data[numerical_cols].ffill())

    label_encoder = LabelEncoder()
    data['label'] = label_encoder.fit_transform(data['label'])

    return data

In [9]:
processed_data = preprocess_data(labeled_data)
processed_data

Unnamed: 0,sessionId,timestamp,x,y,z,ppgValue,hr,hrIbi,label_id,label
12763,1,493272,0.769742,0.827824,0.279626,-0.198057,1.863799,-0.108217,1,2
12764,1,493311,0.783048,0.800829,0.290323,-0.194905,1.863799,-0.108217,1,2
12765,1,493351,0.778058,0.821369,0.256091,-0.191581,1.863799,-0.108217,1,2
12766,1,493391,0.788869,0.783223,0.252526,-0.189750,1.863799,-0.108217,1,2
12767,1,493431,0.750615,0.824890,0.253952,-0.190918,1.863799,-0.108217,1,2
...,...,...,...,...,...,...,...,...,...,...
8101814,58,6184903,0.411319,0.233920,0.535653,-0.104877,-0.056656,0.466064,240,1
8101815,58,6184943,0.445415,0.266198,0.487871,-0.103503,-0.056656,0.466064,240,1
8101816,58,6184983,0.612568,0.403523,0.534227,-0.099911,-0.056656,0.466064,240,1
8101817,58,6185023,0.447910,0.256808,0.680426,-0.096558,-0.056656,0.466064,240,1


In [14]:
grouped = processed_data.groupby('label_id')
grouped_label =  processed_data.groupby('label')

min = len(grouped.get_group(1))
for i in range(len(grouped)-1):
    if len(grouped.get_group(i+2))<min:
        min = len(grouped.get_group(i+2))

X = []
Y = []
Xa = []
Xb = []
for i in range(len(grouped)):
    X.append(grouped.get_group(i+1)[["x", "y", "z",'ppgValue', 'hr', 'hrIbi']][:min])
    Y.append(grouped.get_group(i+1).iloc[0][["label"]])

X = np.array(X)
Y = np.array(Y)
print(X.shape)
print(Y.shape)

(240, 259, 6)
(240, 1)


In [11]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

In [16]:

""" model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(X.shape[1], X.shape[2])),
    MaxPooling1D(pool_size=2),
    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid') 
]) """

model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X.shape[1], X.shape[2])),
    MaxPooling1D(pool_size=2),
    Conv1D(128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Conv1D(256, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),  
    Dense(64, activation='relu'),
    Dense(1, activation='softmax') 
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=100, batch_size=64, validation_data=(X_test, Y_test))

loss, accuracy = model.evaluate(X_test, Y_test)
print("Test Accuracy:", accuracy)

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 0.4484 - loss: -1.5396 - val_accuracy: 0.6250 - val_loss: -9.2106
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.4651 - loss: -24.9521 - val_accuracy: 0.6250 - val_loss: -62.6730
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.4490 - loss: -148.1980 - val_accuracy: 0.6250 - val_loss: -257.0598
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.4573 - loss: -531.4123 - val_accuracy: 0.6250 - val_loss: -818.0145
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.4750 - loss: -1638.1971 - val_accuracy: 0.6250 - val_loss: -2210.9900
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.4677 - loss: -4331.8662 - val_accuracy: 0.6250 - val_loss: -5345.4878
Epoch 