## Dataset1
MASTER_PhonesmartdataAll_CCI_AdvStats.csv

### 1.0 Data Preparation

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [10]:
# Load the dataset
df = pd.read_csv("MASTER_PhonesmartdataAll_CCI_AdvStats.csv")
df = df.drop('Device', axis=1, errors='ignore')
df.replace(" ", np.nan, inplace=True)
df.dropna(inplace = True)
float_columns = ['Age', 'GenderNum', 'AutismQuotient', 'STAI', 'BRIEF_Total', 'DailyAvgMins', 'DailyAvePickups']
df[float_columns] = df[float_columns].apply(pd.to_numeric, errors='coerce').astype(int)
df.shape

(124, 10)

In [11]:
# Split the dataset
X = df.drop('VS_RT_correct_Single', axis=1)  # Replace 'target_column_name' with the actual target column
y = df['VS_RT_correct_Single']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 1.1 Decision Tree without pruning  

In [17]:
# Apply Decision Tree without pruning
tree_10 = DecisionTreeRegressor(random_state=42)
tree_10.fit(X_train, y_train)

In [19]:
# Make predictions on the test set
y_pred_10 = tree_10.predict(X_test)
y_pred_10

array([ 723.4588889,  769.8311111,  723.4588889,  829.0229167,
        846.4154762,  678.7708333,  858.6645833,  858.6645833,
        769.8311111,  858.6645833,  662.8854167,  739.3095238,
        799.4433333,  828.5055556,  712.7944444,  884.825    ,
        828.6472222,  718.3888889,  749.4416667,  739.3095238,
        710.6888889, 1039.3      ,  828.5055556,  819.1222222,
        678.7708333])

In [21]:
# Evaluate the performance without pruning
mse_10 = mean_squared_error(y_test, y_pred_10)
mae_10 = mean_absolute_error(y_test, y_pred_10)
r2_10 = r2_score(y_test, y_pred_10)

print(f"Mean Squared Error: {mse_10}")
print(f"Mean Absolute Error: {mae_10}")
print(f"R-squared: {r2_10}")

Mean Squared Error: 23440.018577607145
Mean Absolute Error: 77.17420237200001
R-squared: 0.1481671578354623


### 1.2 Decision Tree with pruning

In [23]:
# Apply Decision Tree with pruning (early stop)
tree_11 = DecisionTreeRegressor(max_depth=3, random_state=42)
tree_11.fit(X_train, y_train)

In [24]:
# Make predictions on the test set
y_pred_11 = tree_11.predict(X_test)
y_pred_11

array([759.59720708, 759.59720708, 759.59720708, 840.22404237,
       840.22404237, 667.78189236, 840.22404237, 840.22404237,
       759.59720708, 840.22404237, 667.78189236, 840.22404237,
       759.59720708, 840.22404237, 759.59720708, 840.22404237,
       759.59720708, 840.22404237, 759.59720708, 840.22404237,
       759.59720708, 943.16719705, 840.22404237, 759.59720708,
       667.78189236])

In [25]:
# Evaluate the performance without pruning
mse_11 = mean_squared_error(y_test, y_pred_11)
mae_11 = mean_absolute_error(y_test, y_pred_11)
r2_11 = r2_score(y_test, y_pred_11)

print(f"Mean Squared Error: {mse_11}")
print(f"Mean Absolute Error: {mae_11}")
print(f"R-squared: {r2_11}")

Mean Squared Error: 21656.493764068913
Mean Absolute Error: 67.03524314400853
R-squared: 0.2129821666613786


## Dataset2 
wine

### 2.0 Data Preparation

In [44]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [51]:
wine = datasets.load_wine()
XX = wine.data
yy = wine.target
XX

array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]])

In [46]:
# Split the data into training and testing sets
XX_train, XX_test, yy_train, yy_test = train_test_split(XX, yy, test_size=0.2, random_state=42)

### 2.1 Decision Tree without pruning

In [47]:
# Apply Decision Tree without pruning
tree_20 = DecisionTreeClassifier(random_state=42)
tree_20.fit(XX_train, yy_train)

In [48]:
# Make predictions on the test set
y_pred_20 = tree_20.predict(XX_test)
y_pred_20

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0])

In [50]:
# Evaluate the performance without pruning
accuracy_20 = accuracy_score(yy_test, y_pred_20)
classification_20 = classification_report(yy_test, y_pred_20)

print(f"Accuracy: {accuracy_20}")
print(f"Classification:\n {classification_20}")

Accuracy: 0.9444444444444444
Classification:
               precision    recall  f1-score   support

           0       0.93      0.93      0.93        14
           1       0.93      1.00      0.97        14
           2       1.00      0.88      0.93         8

    accuracy                           0.94        36
   macro avg       0.95      0.93      0.94        36
weighted avg       0.95      0.94      0.94        36



### 2.2 Decision Tree with pruning

In [52]:
# Apply Decision Tree with pruning
tree_21 = DecisionTreeClassifier(max_depth=3, random_state=42)
tree_21.fit(XX_train, yy_train)

In [53]:
# Make predictions on the test set
y_pred_21 = tree_21.predict(XX_test)
y_pred_21

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0])

In [54]:
# Evaluate the performance with pruning
accuracy_21 = accuracy_score(yy_test, y_pred_21)
classification_21 = classification_report(yy_test, y_pred_21)

print(f"Accuracy: {accuracy_21}")
print(f"Classification:\n {classification_21}")

Accuracy: 0.9444444444444444
Classification:
               precision    recall  f1-score   support

           0       1.00      0.93      0.96        14
           1       0.88      1.00      0.93        14
           2       1.00      0.88      0.93         8

    accuracy                           0.94        36
   macro avg       0.96      0.93      0.94        36
weighted avg       0.95      0.94      0.94        36

