In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
data = pd.read_csv('HR_data.csv')

# Encode categorical variables
data['Round'] = data['Round'].apply(lambda x: int(x.split('_')[1]))
data['Phase'] = data['Phase'].apply(lambda x: int(x[-1]))

le = LabelEncoder()
data['Puzzler'] = le.fit_transform(data['Puzzler'])
data['Cohort'] = le.fit_transform(data['Cohort'])

# Hr values and frustraion
X = data[['HR_Mean', 'HR_Median', 'HR_std', 'HR_Min', 'HR_Max', 'HR_AUC', 'Round', 'Phase', 'Puzzler', 'Cohort']]
y = data['Frustrated']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled[:5], y[:5]  # Test

(array([[ 0.26152899,  0.39351481, -0.63365935,  0.82713357, -0.44725716,
         -0.13233303,  0.4472136 ,  1.22474487,  1.        , -0.8660254 ],
        [-0.67230486, -0.52634573, -0.82813559, -0.04311034, -0.85192419,
         -0.41022421,  0.4472136 ,  0.        ,  1.        , -0.8660254 ],
        [-0.35262561, -0.18093376, -0.65380752,  0.06513604, -0.69477195,
         -0.48938678,  0.4472136 , -1.22474487,  1.        , -0.8660254 ],
        [ 0.38877105,  0.37865838, -0.46694313,  0.69752278, -0.32546417,
          0.51981131, -0.4472136 ,  1.22474487,  1.        , -0.8660254 ],
        [ 0.17555954, -0.03360751,  0.00149711,  0.44114977,  0.08548895,
          0.13747606, -0.4472136 ,  0.        ,  1.        , -0.8660254 ]]),
 0    1
 1    5
 2    0
 3    1
 4    5
 Name: Frustrated, dtype: int64)

In [15]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor

def create_ann():
    model = Sequential()
    model.add(Dense(64, input_dim=X_scaled.shape[1], activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

# Initialize and train the ANN model
ann_model = KerasRegressor(build_fn=create_ann, epochs=100, batch_size=10, verbose=0)
ann_model.fit(X_scaled, y)

# Predictions
y_pred_ann = ann_model.predict(X_scaled)

  ann_model = KerasRegressor(build_fn=create_ann, epochs=100, batch_size=10, verbose=0)


In [14]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

# Initialize models
lr_model = LinearRegression()
rf_model = RandomForestRegressor(random_state=42)

# Train models
lr_model.fit(X_scaled, y)
rf_model.fit(X_scaled, y)

# Predictions
y_pred_lr = lr_model.predict(X_scaled)
y_pred_rf = rf_model.predict(X_scaled)
y_pred_ann = ann_model.predict(X_scaled)

In [8]:
# Make models
lr_model = LinearRegression()
rf_model = RandomForestRegressor(random_state=42)

# K-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Evaluate models
def evaluate_model(model, X, y, kf):
    mae_scores = cross_val_score(model, X, y, cv=kf, scoring='neg_mean_absolute_error')
    mse_scores = cross_val_score(model, X, y, cv=kf, scoring='neg_mean_squared_error')
    r2_scores = cross_val_score(model, X, y, cv=kf, scoring='r2')
    return -mae_scores.mean(), -mse_scores.mean(), r2_scores.mean()

lr_mae, lr_mse, lr_r2 = evaluate_model(lr_model, X_scaled, y, kf)
rf_mae, rf_mse, rf_r2 = evaluate_model(rf_model, X_scaled, y, kf)

print(f"Linear Regression: MAE = {lr_mae:.2f}, MSE = {lr_mse:.2f}, R² = {lr_r2:.2f}")
print(f"Random Forest Regressor: MAE = {rf_mae:.2f}, MSE = {rf_mse:.2f}, R² = {rf_r2:.2f}")

Linear Regression: MAE = 1.47, MSE = 3.52, R² = 0.03
Random Forest Regressor: MAE = 1.30, MSE = 2.65, R² = 0.26


In [9]:
# Train final models and get evaluation metrics
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Linear Regression
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)
print("Linear Regression MAE:", mean_absolute_error(y_test, y_pred_lr))
print("Linear Regression MSE:", mean_squared_error(y_test, y_pred_lr))
print("Linear Regression R²:", r2_score(y_test, y_pred_lr))

# Random Forest Regressor
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Regressor MAE:", mean_absolute_error(y_test, y_pred_rf))
print("Random Forest Regressor MSE:", mean_squared_error(y_test, y_pred_rf))
print("Random Forest Regressor R²:", r2_score(y_test, y_pred_rf))


Linear Regression MAE: 1.5381390697848445
Linear Regression MSE: 3.17156912490291
Linear Regression R²: 0.15929055070218656
Random Forest Regressor MAE: 1.4005882352941175
Random Forest Regressor MSE: 2.8818647058823537
Random Forest Regressor R²: 0.23608447603760585


In [5]:
from sklearn.tree import DecisionTreeRegressor
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor

# Initialize models
dt_model = DecisionTreeRegressor(random_state=42)

def create_ann():
    model = Sequential()
    model.add(Dense(64, input_dim=X_scaled.shape[1], activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

ann_model = KerasRegressor(build_fn=create_ann, epochs=100, batch_size=10, verbose=0)

# K-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Evaluate models
def evaluate_model(model, X, y, kf):
    mae_scores = cross_val_score(model, X, y, cv=kf, scoring='neg_mean_absolute_error')
    mse_scores = cross_val_score(model, X, y, cv=kf, scoring='neg_mean_squared_error')
    r2_scores = cross_val_score(model, X, y, cv=kf, scoring='r2')
    return -mae_scores.mean(), -mse_scores.mean(), r2_scores.mean()

dt_mae, dt_mse, dt_r2 = evaluate_model(dt_model, X_scaled, y, kf)
ann_mae, ann_mse, ann_r2 = evaluate_model(ann_model, X_scaled, y, kf)

print(f"Decision Tree Regressor: MAE = {dt_mae:.2f}, MSE = {dt_mse:.2f}, R² = {dt_r2:.2f}")
print(f"Artificial Neural Network: MAE = {ann_mae:.2f}, MSE = {ann_mse:.2f}, R² = {ann_r2:.2f}")

  ann_model = KerasRegressor(build_fn=create_ann, epochs=100, batch_size=10, verbose=0)


Decision Tree Regressor: MAE = 1.70, MSE = 5.35, R² = -0.46
Artificial Neural Network: MAE = 1.48, MSE = 3.47, R² = 0.05
