## Decision Tree

In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Load the data
data_path = 'train_data_knn_imputed.csv'  
data = pd.read_csv(data_path)

# Selecting features and target variable
features = [
    "CurrentSessionLength", "CurrentGameMode_LabelEncoded", "CurrentTask_TargetEncoded",
    "LastTaskCompleted_TargetEncoded", "LevelProgressionAmount", "Month",
    "WeekendFlag", "PeriodOfDay_Night", "QuestionTiming_System Initiated","UserAvgResponse"
]
X = data[features]
y = data['ResponseValue']

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating and training the decision tree model
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

# Predicting and evaluating the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

# Print the evaluation results
print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")
print(f"Mean Absolute Error: {mae}")

Mean Squared Error: 32867.44296122022
R-squared Score: 0.27173542404721596
Mean Absolute Error: 121.60847840103158
