# Machine Learning Group Project
"The task is to predict a momentary self-reported well being score that was measured while people were playing a video game designed to lower stress and improve mental health."

## Packages and Data

In [3]:
import pandas as pd
import numpy as np
from datetime import datetime
df = pd.read_csv("data/train_data.csv")
test = pd.read_csv("data/test_data.csv")
val = pd.read_csv("data/supplimental_data.csv")

## Functions

In [5]:
def make_categorical(dataFrame, featureList):
    for feature in featureList:
        dataFrame[feature] = dataFrame[feature].astype("category")
    return dataFrame

def make_datetime(dataFrame, feature):
    dataFrame[feature] = pd.to_datetime(dataFrame[feature])
    dataFrame[feature] = dataFrame[feature].astype("int64") 
    return dataFrame

## Features and splits

In [7]:
print(df[0:1]["CurrentTask"].values)
df.head()

[nan]


Unnamed: 0,UserID,QuestionTiming,TimeUtc,CurrentGameMode,CurrentTask,CurrentSessionLength,LastTaskCompleted,LevelProgressionAmount,QuestionType,ResponseValue
0,p1,User Initiated,2022-08-18 22:55:27,,,2,,,Wellbeing,509.0
1,p1,System Initiated,2022-08-18 23:38:31,,,0,,,Wellbeing,653.0
2,p1,User Initiated,2022-08-18 23:39:24,Career,HOME_VAN,1,WASH_PWVan,1.0,Wellbeing,705.0
3,p1,System Initiated,2022-08-18 23:45:01,Career,RESIDENTIALSMALL_BACKYARD,6,WASH_PWVan,0.168267,Wellbeing,817.0
4,p1,System Initiated,2022-08-18 23:51:22,Career,RESIDENTIALSMALL_BACKYARD,13,WASH_PWVan,0.429364,Wellbeing,810.0


In [15]:
from sklearn.impute import SimpleImputer

imp = SimpleImputer(strategy = "most_frequent")
imp.fit_transform(df)
df1 = imp.transform(df)
df1 = pd.DataFrame(df1)
df1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,p1,User Initiated,2022-08-18 22:55:27,Career,RECREATIONGROUND_PLAYGROUND,2,WASH_PWVan,1.0,Wellbeing,509.0
1,p1,System Initiated,2022-08-18 23:38:31,Career,RECREATIONGROUND_PLAYGROUND,0,WASH_PWVan,1.0,Wellbeing,653.0
2,p1,User Initiated,2022-08-18 23:39:24,Career,HOME_VAN,1,WASH_PWVan,1.0,Wellbeing,705.0
3,p1,System Initiated,2022-08-18 23:45:01,Career,RESIDENTIALSMALL_BACKYARD,6,WASH_PWVan,0.168267,Wellbeing,817.0
4,p1,System Initiated,2022-08-18 23:51:22,Career,RESIDENTIALSMALL_BACKYARD,13,WASH_PWVan,0.429364,Wellbeing,810.0


In [9]:
featureList = ["UserID", "QuestionTiming", "CurrentGameMode", "CurrentTask", "LastTaskCompleted"]
df1 = make_categorical(df1, featureList)
df1 = make_datetime(df1, "TimeUtc")

FEATURES =  ["UserID", "QuestionTiming", "TimeUtc", "CurrentGameMode", "CurrentTask", "CurrentSessionLength", "LastTaskCompleted", "LevelProgressionAmount"]
RESTRICTED = ["UserID", "TimeUtc", "CurrentGameMode", "CurrentTask", "CurrentSessionLength", "LastTaskCompleted"]
TARGET = ["ResponseValue"]

KeyError: 'UserID'

In [None]:
train = pd.DataFrame()
test = pd.DataFrame()


for userID, entry in df1.groupby("UserID", observed = True):
    numEntries = len(entry)
    trainSize = int(numEntries * 0.5)
    trainCon = entry[:trainSize]
    testCon = entry[trainSize:]
    train = pd.concat([train, trainCon])
    test = pd.concat([test, testCon])

In [None]:
X_train = train[RESTRICTED]
y_train = train[TARGET]
X_test = test[RESTRICTED]
y_test = test[TARGET]


In [None]:
import xgboost as xgb
reg = xgb.XGBRegressor(tree_method="hist", enable_categorical=True, eval_metric = "mae", n_estimators = 500)

reg = reg.fit(X_train, y_train, 
        eval_set = [(X_train, y_train), (X_test, y_test)],
        verbose = 100)

In [None]:
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors = 2)
neigh.fit(X_train, y_train)
neigh.predict(X_test)