# Model training

In [218]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from tabulate import tabulate

In [219]:
#debugging stuff
def evaluate_stress_predictions(predicted_values,true_values):
    # Initialize counters
    correct_predictions = 0
    overestimated = 0
    underestimated = 0

    # Compare true and predicted values
    for true, pred in zip(true_values, predicted_values):
        if true == pred:
            correct_predictions += 1
        elif true == 'low' and pred == 'medium':
            underestimated += 1
        elif true == 'medium' and pred == 'low':
            overestimated += 1
        elif true == 'medium' and pred == 'high':
            underestimated += 1
        elif true == 'high' and pred == 'medium':
            overestimated += 1
        elif true == 'high' and pred == 'low':
            overestimated += 1
        elif true == 'low' and pred == 'high':
            underestimated += 1

    # Return counts
    return correct_predictions, overestimated, underestimated
    

def evaluate_age_predictions(predicted_values,true_values ):
    # Initialize counters
    correct_predictions = 0
    overestimated = 0
    underestimated = 0
    
    # Compare true and predicted values
    for true, pred in zip(true_values, predicted_values):
        if pred == true:
            correct_predictions += 1
        elif pred > true:
            overestimated += 1
        else:
            underestimated += 1
    
    return correct_predictions, overestimated, underestimated

def load_dataset():
    train_data = pd.read_csv('./dataset/train_dataset.csv')
    test_data = pd.read_csv('./dataset/test_dataset.csv')
    # Preprocess training and testing dataset
    train_data['bedtime'] = pd.to_datetime(train_data['bedtime']).dt.hour
    train_data['activity_intensity'] = train_data['activity_intensity'].map({'low': 0, 'moderate': 1, 'high': 2})
    
    
    
    test_data['bedtime'] = pd.to_datetime(test_data['bedtime']).dt.hour
    test_data['activity_intensity'] = test_data['activity_intensity'].map({'low': 0, 'moderate': 1, 'high': 2})
    return train_data, test_data

def print_stats(pred,test,banner):
    svm_accuracy = accuracy_score(svm_y_pred,y_test)
    correct, over, under = evaluate_age_predictions(svm_y_pred, y_test)
    data = [["Accuracy:", accuracy],
        ["Correct Predictions:", correct],
        ["Overestimated:", over],
        ["Underestimated:", under]]
    print(tabulate(data, headers=[banner, ""]))

### Decision tree

In [220]:
# Load the testing dataset from CSV
train_data, test_data = load_dataset()

# Separate features and target variable in training and testing dataset
X_train = train_data[['bedtime', 'activity_intensity', 'byear']]
y_train = train_data['stress_level']

X_test = test_data[['bedtime', 'activity_intensity', 'byear']]
y_test = test_data['stress_level']

# Train the decision tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
correct, over, under = evaluate_age_predictions(y_pred,y_test )
data = [["Accuracy:", accuracy],
        ["Correct Predictions:", correct],
        ["Overestimated:", over],
        ["Underestimated:", under]]

# Print as table
print(tabulate(data, headers=["Decision tree stress", ""]))

#---

# Separate features and target variable in training and testing dataset
X_train = train_data[['stress', 'activity_intensity', 'byear']]
y_train = train_data['mental_age']

X_test = test_data[['stress', 'activity_intensity', 'byear']]
y_test = test_data['mental_age']

# Train the decision tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model

accuracy = accuracy_score(y_test, rf_y_pred)
accuracy = accuracy_score(y_test, y_pred)
correct, over, under = evaluate_age_predictions(y_pred,y_test )

data = [["Accuracy:", accuracy],
        ["Correct Predictions:", correct],
        ["Overestimated:", over],
        ["Underestimated:", under]]

# Print as table
print("\n\n",tabulate(data, headers=["Decision tree mental age", "Value"]))

Decision tree stress
----------------------  ---------
Accuracy:                0.428571
Correct Predictions:    21
Overestimated:          14
Underestimated:         14


 Decision tree mental age        Value
--------------------------  ---------
Accuracy:                    0.387755
Correct Predictions:        19
Overestimated:              13
Underestimated:             17


### Random forest

In [221]:
train_data, test_data = load_dataset()
# Separate features and target variable in training and testing dataset
X_train = train_data[['bedtime', 'activity_intensity', 'byear']]
y_train = train_data['stress_level']

X_test = test_data[['bedtime', 'activity_intensity', 'byear']]
y_test = test_data['stress_level']


rf_clf = RandomForestClassifier(random_state=42)

# Train the classifier
rf_clf.fit(X_train, y_train)

# Make predictions
rf_y_pred = rf_clf.predict(X_test)

# Evaluate the model
# Evaluate the model

accuracy = accuracy_score(y_test, rf_y_pred)
accuracy = accuracy_score(y_test, rf_y_pred)
correct, over, under = evaluate_stress_predictions(rf_y_pred,y_test )
data = [["Accuracy:", accuracy],
        ["Correct Predictions:", correct],
        ["Overestimated:", over],
        ["Underestimated:", under]]

# Print as table
print(tabulate(data, headers=["Random forest stress", ""]))

#---

# Separate features and target variable in training and testing dataset
X_train = train_data[['stress', 'activity_intensity', 'byear']]
y_train = train_data['mental_age']

X_test = test_data[['stress', 'activity_intensity', 'byear']]
y_test = test_data['mental_age']

rf_clf = RandomForestClassifier(random_state=42)

# Train the classifier
rf_clf.fit(X_train, y_train)

# Make predictions
rf_y_pred = rf_clf.predict(X_test)

# Evaluate the model
# Evaluate the model
print_stats(y_test, rf_y_pred,"Random forest mental age")
accuracy = accuracy_score(y_test, rf_y_pred)




Random forest stress
----------------------  ---------
Accuracy:                0.428571
Correct Predictions:    21
Overestimated:          22
Underestimated:          6
Random forest mental age
--------------------------  ---------
Accuracy:                    0.428571
Correct Predictions:        29
Overestimated:               4
Underestimated:             16


### Support vector

In [222]:
# Separate features and target variable in training and testing dataset
X_train = train_data[['bedtime', 'activity_intensity', 'byear']]
y_train = train_data['stress_level']

X_test = test_data[['bedtime', 'activity_intensity', 'byear']]
y_test = test_data['stress_level']

svm_clf = SVC(kernel='linear', random_state=42)  # You can choose different kernels as well

# Train the classifier
svm_clf.fit(X_train, y_train)

# Make predictions
svm_y_pred = svm_clf.predict(X_test)

# Evaluate the model
print_stats(svm_y_pred,y_test,"Support Vector stress")


#----
# Separate features and target variable in training and testing dataset
X_train = train_data[['stress', 'activity_intensity', 'byear']]
y_train = train_data['mental_age']

X_test = test_data[['stress', 'activity_intensity', 'byear']]
y_test = test_data['mental_age']

svm_clf = SVC(kernel='linear', random_state=42)  # You can choose different kernels as well

# Train the classifier
svm_clf.fit(X_train, y_train)

# Make predictions
svm_y_pred = svm_clf.predict(X_test)

# Evaluate the model
print_stats(svm_y_pred,y_test,"Support Vector mental age")


Support Vector stress
-----------------------  ---------
Accuracy:                 0.428571
Correct Predictions:     22
Overestimated:           11
Underestimated:          16
Support Vector mental age
---------------------------  ---------
Accuracy:                     0.428571
Correct Predictions:         29
Overestimated:                4
Underestimated:              16
