In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout
from tensorflow.keras.utils import to_categorical
import cleaning
import reinstancing


In [None]:
# Load and reinstance data
df_raw, users = cleaning.loadData('dataset_mood_smartphone.csv')
df_inst = reinstancing.reinstanceDataset(df_raw, users, period=5)
df_inst.head()


In [None]:
# Feature engineering
df_feat = df_inst.copy()
df_feat['screen_log'] = np.log1p(df_feat['screen'])
df_feat['activity_log'] = np.log1p(df_feat['activity'])
df_feat['call_sms_ratio'] = df_feat['call'] / (df_feat['sms'] + 1)
df_feat = df_feat.dropna()
df_feat.head()


In [None]:
# Create 5 mood categories based on nextday
df_feat['mood_cat'] = pd.qcut(df_feat['nextday'], 5, labels=False)
print(df_feat['mood_cat'].value_counts(normalize=True))


In [None]:
# Prepare features and target
X = df_feat.drop(columns=['nextday', 'mood_cat'])
y = df_feat['mood_cat']
# Split dataset (2/3 train, 1/3 test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=1/3, stratify=y, random_state=42
)


In [None]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)
print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))


In [None]:
# Prepare data for RNN
X_train_rnn = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_rnn = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))
y_train_cat = to_categorical(y_train, num_classes=5)
y_test_cat = to_categorical(y_test, num_classes=5)


In [None]:
# Build RNN model
model = Sequential([
    SimpleRNN(64, input_shape=(1, X_train_scaled.shape[1]), activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
# Train RNN model
history = model.fit(
    X_train_rnn, y_train_cat,
    epochs=20,
    batch_size=32,
    validation_split=0.2,
    verbose=2
)


In [None]:
# Evaluate RNN model
print("RNN Evaluation:")
model.evaluate(X_test_rnn, y_test_cat, verbose=2)
y_pred_rnn = np.argmax(model.predict(X_test_rnn), axis=1)
print("RNN Classification Report:")
print(classification_report(y_test, y_pred_rnn))
