In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dropout, Dense
import cleaning
import reinstancing


In [None]:
# Load and reinstance data
df_raw, users = cleaning.loadData('dataset_mood_smartphone.csv')
df_inst = reinstancing.reinstanceDataset(df_raw, users, period=5)
df_inst.head()


In [None]:
# Feature engineering
df_feat = df_inst.copy()
df_feat['screen_log'] = np.log1p(df_feat['screen'])
df_feat['activity_log'] = np.log1p(df_feat['activity'])
df_feat['call_sms_ratio'] = df_feat['call'] / (df_feat['sms'] + 1)
df_feat = df_feat.dropna()
df_feat['mood_cat'] = pd.qcut(df_feat['nextday'], 5, labels=False)
df_feat.head()


In [None]:
# Prepare train/test split
X = df_feat.drop(columns=['nextday', 'mood_cat'])
y = df_feat['mood_cat']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=1/3, stratify=y, random_state=42
 )


In [None]:
# Scale features
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)


In [None]:
# Random Forest hyperparameter tuning
rf = RandomForestClassifier(random_state=42)
param_dist = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt'],
    'bootstrap': [True, False]
}
rf_random = RandomizedSearchCV(
    rf, param_distributions=param_dist,
    n_iter=20, cv=3, verbose=2, n_jobs=-1,
    scoring='accuracy', random_state=42
)
rf_random.fit(X_train_s, y_train)
print('Best RF params:', rf_random.best_params_)
print('Best RF CV accuracy:', rf_random.best_score_)
y_pred_rf = rf_random.best_estimator_.predict(X_test_s)
print(classification_report(y_test, y_pred_rf))


In [None]:
# Prepare data for RNN
X_train_rnn = X_train_s.reshape((X_train_s.shape[0], 1, X_train_s.shape[1]))
X_test_rnn = X_test_s.reshape((X_test_s.shape[0], 1, X_test_s.shape[1]))


In [None]:
# RNN hyperparameter tuning
n_features = X_train_s.shape[1]
def create_model(units=64, dropout_rate=0.5, optimizer='adam'):
    model = Sequential()
    model.add(SimpleRNN(units, activation='relu', input_shape=(1, n_features)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(5, activation='softmax'))
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=optimizer,
        metrics=['accuracy']
    )
    return model
rnn = KerasClassifier(build_fn=create_model, verbose=0)
param_dist_rnn = {
    'units': [32, 64],
    'dropout_rate': [0.2, 0.5],
    'optimizer': ['adam', 'rmsprop'],
    'batch_size': [16, 32],
    'epochs': [10, 20]
}
rnn_random = RandomizedSearchCV(
    rnn, param_distributions=param_dist_rnn,
    n_iter=10, cv=3, verbose=2, n_jobs=1,
    random_state=42
)
rnn_random.fit(X_train_rnn, y_train)
print('Best RNN params:', rnn_random.best_params_)
print('Best RNN CV accuracy:', rnn_random.best_score_)
y_pred_rnn = rnn_random.best_estimator_.predict(X_test_rnn)
print(classification_report(y_test, y_pred_rnn))
