In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

df = pd.read_csv('Datasets/cleaned_data.csv')

# Round the mood column to nearest integer
df['mood'] = df['mood'].round()
df['date'] = pd.to_datetime(df['date'])

# Extract year, month, day, hour, and minute as separate columns
df["year"] = df["date"].dt.year
df["month"] = df["date"].dt.month
df["day"] = df["date"].dt.day

# Perform one-hot encoding one the persons
one_hot_df = pd.get_dummies(df['id'])

# Concatenate the one-hot encoded columns to the original DataFrame
df = pd.concat([df, one_hot_df], axis=1)

# Drop the original categorical column
df.drop(['id'], axis=1, inplace=True)

df

Unnamed: 0,date,mood,circumplex.arousal,circumplex.valence,activity,screen,call,sms,appCat.builtin,appCat.communication,...,AS14.24,AS14.25,AS14.26,AS14.27,AS14.28,AS14.29,AS14.30,AS14.31,AS14.32,AS14.33
0,2014-03-04,7.0,0.500000,1.000000,0.042303,0.000000,5.0,0.0,0.000,0.000,...,0,0,0,0,0,0,0,0,0,0
1,2014-03-05,6.0,0.000000,0.333333,0.005556,0.000000,0.0,0.0,0.000,0.000,...,0,0,0,0,0,0,0,0,0,0
2,2014-03-05,8.0,0.666667,1.333333,0.042303,0.000000,1.0,1.0,0.000,0.000,...,0,0,0,0,0,0,0,0,0,0
3,2014-03-06,7.0,0.500000,0.750000,0.042303,0.000000,1.0,0.0,0.000,0.000,...,0,0,0,0,0,0,0,0,0,0
4,2014-03-06,7.0,-0.600000,0.600000,0.005556,0.000000,0.0,13.0,0.000,0.000,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1300,2014-06-05,7.0,0.000000,0.800000,0.152343,2371.588999,11.0,0.0,1749.277,958.821,...,1,0,0,0,0,0,0,0,0,0
1301,2014-06-06,8.0,0.500000,0.250000,0.189476,2733.332000,16.0,0.0,945.898,1710.924,...,1,0,0,0,0,0,0,0,0,0
1302,2014-06-07,8.0,-1.000000,0.000000,0.081893,5530.377999,0.0,2.0,5027.498,735.965,...,1,0,0,0,0,0,0,0,0,0
1303,2014-06-08,6.0,-1.500000,1.000000,0.079510,1131.197999,0.0,0.0,351.389,375.515,...,1,0,0,0,0,0,0,0,0,0


In [13]:
# Define the input and target variables
X = df.drop(["date", "mood"], axis=1) # drop the date and target columns
y = df["mood"]

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features using StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Encode target variable using LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

# Reshape input data to fit RNN input shape
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Define RNN model architecture
model = Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.5))
model.add(LSTM(units=100, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(units=100, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(units=100))
model.add(Dropout(0.5))
model.add(Dense(units=10, activation='softmax'))

# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model on test data
loss, accuracy = model.evaluate(X_test, y_test)
print('Test loss:', loss)
print('Test accuracy:', accuracy)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100