# Model training

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from tabulate import tabulate
from sklearn.metrics import mean_squared_error
import math

In [None]:
#debugging stuff
def evaluate_stress_predictions(predicted_values,true_values):
    # Initialize counters
    correct_predictions = 0
    overestimated = 0
    underestimated = 0

    # Compare true and predicted values
    for true, pred in zip(true_values, predicted_values):
        if true == pred:
            correct_predictions += 1
        elif true == 'low' and pred == 'medium':
            underestimated += 1
        elif true == 'medium' and pred == 'low':
            overestimated += 1
        elif true == 'medium' and pred == 'high':
            underestimated += 1
        elif true == 'high' and pred == 'medium':
            overestimated += 1
        elif true == 'high' and pred == 'low':
            overestimated += 1
        elif true == 'low' and pred == 'high':
            underestimated += 1

    # Return counts
    return correct_predictions, overestimated, underestimated
    

def evaluate_age_predictions(predicted_values,true_values ):
    # Initialize counters
    correct_predictions = 0
    overestimated = 0
    underestimated = 0
    
    # Compare true and predicted values
    for true, pred in zip(true_values, predicted_values):
        if pred == true:
            correct_predictions += 1
        elif pred > true:
            overestimated += 1
        else:
            underestimated += 1
    
    return correct_predictions, overestimated, underestimated

def load_dataset():
    train_data = pd.read_csv('./dataset/train_dataset.csv')
    test_data = pd.read_csv('./dataset/test_dataset.csv')
    # Preprocess training and testing dataset
    train_data['bedtime'] = pd.to_datetime(train_data['bedtime']).dt.hour
    train_data['activity_intensity'] = train_data['activity_intensity'].map({'low': 0, 'moderate': 1, 'high': 2})

    test_data['bedtime'] = pd.to_datetime(test_data['bedtime']).dt.hour
    test_data['activity_intensity'] = test_data['activity_intensity'].map({'low': 0, 'moderate': 1, 'high': 2})
    return train_data, test_data


train_data, test_data = load_dataset()

def get_vars(c="stress"):
    #train_data, test_data = load_dataset()
    
    if(c == "stress"):
        train_features = train_data[['bedtime', 'activity_intensity', 'byear']]
        test_features = test_data[['bedtime', 'activity_intensity', 'byear']]
        
        train_target = train_data['stress_level']
        test_target = test_data['stress_level']
      
        
    else:
        train_features = train_data[['stress', 'activity_intensity', 'byear']]
        test_features = test_data[['stress', 'activity_intensity', 'byear']]
        
        train_target = train_data['mental_age']
        test_target = test_data['mental_age']
        
        
    return train_features, train_target, test_features, test_target

def print_stats(pred,test,banner,c="stress"):
    
    try:
        head=['Accuracy',accuracy_score(pred,test)]
    except:
        mse = round(mean_squared_error(test, pred),2)
        head= ['Mean Squared Error', str(mse) + " ("+str(math.sqrt(mse))+")"]
    
    if(c=="stress"):
        correct, over, under = evaluate_stress_predictions(pred, test)
    else:
        correct, over, under = evaluate_age_predictions(pred, test)
        
    data = [head,
        ["Correct Predictions:", str(correct)+"/"+str(len(test))],
        ["Overestimated:",  str(over)+"/"+str(len(test))],
        ["Underestimated:",  str(under)+"/"+str(len(test))]]
    print("\n\n"+tabulate(data, headers=[banner, ""]))

# Decision tree

### Stress

In [None]:
train_features, train_target, test_features, test_target = get_vars()


clf = DecisionTreeClassifier(random_state=42)
clf.fit(train_features, train_target) #Train the classifier
pred_target = clf.predict(test_features) #Make predictions

# Evaluate the model
print_stats(pred_target,test_target ,"Decision tree stress") #Evaluates

### Mental_age

In [None]:
train_features, train_target, test_features, test_target = get_vars(c="age")

clf = DecisionTreeRegressor(random_state=42)
clf.fit(train_features, train_target) #Train the classifier
pred_target = clf.predict(test_features) #Make predictions

print_stats(test_target,pred_target,"Decision tree mental age","age") #Evaluates

In [None]:
#clf.get_params()#hyperparam

# Random forest

### Stress

In [None]:
train_features, train_target, test_features, test_target = get_vars()

rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(train_features, train_target) #Train the classifier
pred_target = rf_clf.predict(test_features) #Make predictions

print_stats(test_target, pred_target,"Random forest stress") #Evaluates

### Mental_age

In [None]:
train_features, train_target, test_features, test_target = get_vars(c="age")

rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(train_features, train_target) #Train the classifier
pred_target = rf_clf.predict(test_features) #Make predictions

print_stats(test_target, pred_target,"Random forest age","age") #Evaluates

# Support vector

### Stress

In [None]:
train_features, train_target, test_features, test_target = get_vars()

svm_clf = SVC(kernel='linear', random_state=42)  #You can choose different kernels as well
svm_clf.fit(train_features, train_target) #Train the classifier
pred_target = svm_clf.predict(test_features) #Make predictions

print_stats(test_target, pred_target,"Support Vector stress") #Evaluates

### Mental_age

In [None]:
train_features, train_target, test_features, test_target = get_vars(c="age")

svm_clf = SVC(kernel='linear', random_state=42) 
svm_clf.fit(train_features, train_target) #Train the classifier
pred_target = svm_clf.predict(test_features) #Make predictions

print_stats(test_target, pred_target,"Support Vector mental age") #Evaluates