# Decision Tree Regression

In [147]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import cross_val_score

In [148]:
steel_data = pd.read_csv("steel.csv")

In [149]:
# Features of the dataset, all cols bar the last
features = steel_data.values[:, :-1]
# Ground truths, the last column
ground_truths = steel_data.values[:, -1]

kf = KFold(n_splits=10, shuffle=False)

In [150]:
model = DecisionTreeRegressor(random_state=4)
for i, (train_index, test_index) in enumerate(kf.split(features)):
    print(f"Fold {i+1}")

    # Features and ground truths for the ith fold
    training_features, test_features = features[train_index], features[test_index]
    training_ground_truths, test_ground_truths = ground_truths[train_index], ground_truths[test_index]

    model.fit(training_features, training_ground_truths)

    prediction = model.predict(test_features)

    r2 = r2_score(test_ground_truths, prediction)
    print(f"R2 Score: {r2}")

    mse = mean_squared_error(test_ground_truths, prediction)
    print(f"Mean Squared Error: {mse}")

param_grid={
    "max_depth" : [3,4,5,6,7,8],
    "min_samples_leaf" : [3,4,5,6,7,8]}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring="r2", n_jobs=-1)

grid_search.fit(features, ground_truths)

print(grid_search.best_params_)

Fold 1
R2 Score: 0.44960274424132096
Mean Squared Error: 2307.9395212180257
Fold 2
R2 Score: -0.236365905073864
Mean Squared Error: 5851.158630070643
Fold 3
R2 Score: 0.5958985898959019
Mean Squared Error: 2747.8411156805864
Fold 4
R2 Score: 0.7377646274260763
Mean Squared Error: 1587.7106136995649
Fold 5
R2 Score: 0.6406829018426705
Mean Squared Error: 1807.8740823017777
Fold 6
R2 Score: 0.70372234387163
Mean Squared Error: 2304.3123582998746
Fold 7
R2 Score: 0.5017404975697688
Mean Squared Error: 2074.9348088182965
Fold 8
R2 Score: 0.38001865387776135
Mean Squared Error: 3719.8592003374024
Fold 9
R2 Score: 0.32652633099421635
Mean Squared Error: 7637.251104851183
Fold 10
R2 Score: 0.3270044819868978
Mean Squared Error: 5687.030084735499
{'max_depth': 7, 'min_samples_leaf': 6}
