# Decision Tree Regression

In [39]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error
from prettytable import PrettyTable

In [40]:
steel_data = pd.read_csv("steel.csv")

In [41]:
# Features of the dataset, all cols bar the last
features = steel_data.values[:, :-1]
# Ground truths, the last column
ground_truths = steel_data.values[:, -1]

kf = KFold(n_splits=10, shuffle=False)

In [42]:
model = DecisionTreeRegressor(random_state=4)

scores_headers = ["Fold", "R2 Score", "Mean Squared Error"]
scores_list = []
for i, (train_index, test_index) in enumerate(kf.split(features)):
    # Features and ground truths for the ith fold
    training_features, test_features = features[train_index], features[test_index]
    training_ground_truths, test_ground_truths = ground_truths[train_index], ground_truths[test_index]

    model.fit(training_features, training_ground_truths)

    prediction = model.predict(test_features)

    r2 = r2_score(test_ground_truths, prediction)
    print(f"R2 Score: {r2}")

    mse = mean_squared_error(test_ground_truths, prediction)
    print(f"Mean Squared Error: {mse}")

    scores_list.append([f"{i+1}", f"{r2:.2f}", f"{mse:.2f}"])

param_grid={
    "max_depth" : [3,4,5,6,7,8],
    "min_samples_leaf" : [3,4,5,6,7,8]}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring="r2", n_jobs=-1)

grid_search.fit(features, ground_truths)

print(grid_search.best_params_)

R2 Score: 0.44960274424132096
Mean Squared Error: 2307.9395212180257
R2 Score: -0.236365905073864
Mean Squared Error: 5851.158630070643
R2 Score: 0.5958985898959019
Mean Squared Error: 2747.8411156805864
R2 Score: 0.7377646274260763
Mean Squared Error: 1587.7106136995649
R2 Score: 0.6406829018426705
Mean Squared Error: 1807.8740823017777
R2 Score: 0.70372234387163
Mean Squared Error: 2304.3123582998746
R2 Score: 0.5017404975697688
Mean Squared Error: 2074.9348088182965
R2 Score: 0.38001865387776135
Mean Squared Error: 3719.8592003374024
R2 Score: 0.32652633099421635
Mean Squared Error: 7637.251104851183
R2 Score: 0.3270044819868978
Mean Squared Error: 5687.030084735499
{'max_depth': 7, 'min_samples_leaf': 6}


In [43]:
table = PrettyTable()

table.title = "Error Scores with Default Params"
table.field_names = scores_headers
table.add_rows(scores_list)
print(table)

+--------------------------------------+
|   Error Scores with Default Params   |
+------+----------+--------------------+
| Fold | R2 Score | Mean Squared Error |
+------+----------+--------------------+
|  1   |   0.45   |      2307.94       |
|  2   |  -0.24   |      5851.16       |
|  3   |   0.60   |      2747.84       |
|  4   |   0.74   |      1587.71       |
|  5   |   0.64   |      1807.87       |
|  6   |   0.70   |      2304.31       |
|  7   |   0.50   |      2074.93       |
|  8   |   0.38   |      3719.86       |
|  9   |   0.33   |      7637.25       |
|  10  |   0.33   |      5687.03       |
+------+----------+--------------------+
