# Machine Learning Group Project
"The task is to predict a momentary self-reported well being score that was measured while people were playing a video game designed to lower stress and improve mental health."

## Packages and Data

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.model_selection import train_test_split
import xgboost as xgb

In [4]:
train = pd.read_csv("data/train_data.csv")
test = pd.read_csv("data/test_data.csv")
val = pd.read_csv("data/supplimental_data.csv")

## Functions

In [6]:
def make_categorical(dataFrame, featureList):
    for feature in featureList:
        dataFrame[feature] = dataFrame[feature].astype("category")
    return dataFrame

def make_datetime(dataFrame, feature):
    dataFrame[feature] = pd.to_datetime(dataFrame[feature])
    dataFrame[feature] = dataFrame[feature].astype("int64") 
    return dataFrame

## Features and splits

In [8]:
featureList = ["UserID", "QuestionTiming", "CurrentGameMode", "CurrentTask", "LastTaskCompleted"]
make_categorical(train, featureList)
make_datetime(train, "TimeUtc")

FEATURES =  ["UserID", "QuestionTiming", "TimeUtc", "CurrentGameMode", "CurrentTask", "CurrentSessionLength", "LastTaskCompleted", "LevelProgressionAmount"]
RESTRICTED = ["UserID", "TimeUtc", "CurrentGameMode", "CurrentTask", "CurrentSessionLength", "LastTaskCompleted"]
TARGET = ["ResponseValue"]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(train[FEATURES], train[TARGET], test_size = 0.25, random_state = 333)
X1_train, X1_test, y1_train, y1_test = train_test_split(train[RESTRICTED], train[TARGET], test_size = 0.25, random_state = 333)

In [10]:
reg = xgb.XGBRegressor(tree_method="hist", enable_categorical=True, eval_metric = "mae", n_estimators = 300)

reg = reg.fit(X_train, y_train, 
        eval_set = [(X_train, y_train), (X_test, y_test)],
        verbose = 100)

reg1 = xgb.XGBRegressor(tree_method="hist", enable_categorical=True, eval_metric = "mae", n_estimators = 300)
    
reg1 = reg1.fit(X1_train, y1_train, 
        eval_set = [(X1_train, y1_train), (X1_test, y1_test)],
        verbose = 100)

[0]	validation_0-mae:165.18414	validation_1-mae:165.48160
[100]	validation_0-mae:75.51161	validation_1-mae:95.77026
[200]	validation_0-mae:58.71803	validation_1-mae:90.77887
[299]	validation_0-mae:46.94526	validation_1-mae:88.87992
[0]	validation_0-mae:165.18558	validation_1-mae:165.48216
[100]	validation_0-mae:74.72011	validation_1-mae:95.54131
[200]	validation_0-mae:60.63264	validation_1-mae:90.78637
[299]	validation_0-mae:48.55861	validation_1-mae:88.66431


# Experiment

In [20]:
small = train[:30]
trainSmall = pd.DataFrame()
testSmall = pd.DataFrame()

for x, y in small.groupby("UserID"):
    numlen = len(y)
    trainSize = int(numlen * 0.8)
    
    trainCon = y[:trainSize]
    testCon = y[trainSize:]
    
    trainSmall = pd.concat([trainSmall, trainCon], axis = 0)
    testSmall = pd.concat([testSmall, testCon], axis = 0)
    
display(trainSmall)
display(testSmall)
    

  for x, y in small.groupby("UserID"):


Unnamed: 0,UserID,QuestionTiming,TimeUtc,CurrentGameMode,CurrentTask,CurrentSessionLength,LastTaskCompleted,LevelProgressionAmount,QuestionType,ResponseValue
0,p1,User Initiated,1660863327000000000,,,2,,,Wellbeing,509.0
1,p1,System Initiated,1660865911000000000,,,0,,,Wellbeing,653.0
2,p1,User Initiated,1660865964000000000,Career,HOME_VAN,1,WASH_PWVan,1.0,Wellbeing,705.0
3,p1,System Initiated,1660866301000000000,Career,RESIDENTIALSMALL_BACKYARD,6,WASH_PWVan,0.168267,Wellbeing,817.0
4,p1,System Initiated,1660866682000000000,Career,RESIDENTIALSMALL_BACKYARD,13,WASH_PWVan,0.429364,Wellbeing,810.0
5,p1,System Initiated,1660868244000000000,Career,RESIDENTIALSMALL_BACKYARD,22,,0.999766,Wellbeing,829.0
6,p1,System Initiated,1660868924000000000,Career,RESIDENTIALSMALL_BUNGALOW,33,WASH_DirtBike,0.315248,Wellbeing,779.0
7,p1,User Initiated,1660869089000000000,Career,RESIDENTIALSMALL_BUNGALOW,36,WASH_DirtBike,0.389675,Wellbeing,786.0
8,p1,User Initiated,1660877659000000000,Career,RESIDENTIALSMALL_BUNGALOW,0,,0.390541,Wellbeing,565.0
9,p1,User Initiated,1660932885000000000,,,0,,,Wellbeing,647.0


Unnamed: 0,UserID,QuestionTiming,TimeUtc,CurrentGameMode,CurrentTask,CurrentSessionLength,LastTaskCompleted,LevelProgressionAmount,QuestionType,ResponseValue
17,p1,User Initiated,1660953844000000000,Career,RECREATIONGROUND_PLAYGROUND,43,WASH_ClimbingFrame,1.0,Wellbeing,844.0
18,p1,System Initiated,1661112308000000000,,,0,,,Wellbeing,703.0
19,p1,User Initiated,1661142970000000000,Career,SUBURBIA_DETACHEDHOUSE,13,,0.707143,Wellbeing,818.0
20,p1,User Initiated,1661194042000000000,,,0,,,Wellbeing,615.0
21,p1,User Initiated,1661474983000000000,Career,RECREATIONGROUND_SKATEPARK,1,,0.279622,Wellbeing,486.0
22,p100,User Initiated,1662566876000000000,,,1,,,Wellbeing,390.0
28,p10004,System Initiated,1661178749000000000,Career,SUBURBIA_DETACHEDHOUSE,44,,0.567957,Wellbeing,464.0
29,p10004,System Initiated,1661179351000000000,Career,SUBURBIA_DETACHEDHOUSE,54,,0.738142,Wellbeing,564.0
