In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tree.base import DecisionTree
from metrics import *

np.random.seed(42)

# Reading the data
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
data = pd.read_csv(url, sep = '\s+', header = None,
                 names = ["mpg", "cylinders", "displacement", "horsepower", "weight",
                        "acceleration", "model year", "origin", "car name"])

In [2]:
data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


In [15]:
# cleaning the data
data.loc[data['horsepower'] == '?', 'horsepower'] = 0
data['horsepower'] = pd.to_numeric(data['horsepower'])
data.drop(columns=["car name"], inplace=True)
y = data['mpg']
data.drop(columns=['mpg'],inplace=True)

data.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,8,307.0,130.0,3504.0,12.0,70,1
1,8,350.0,165.0,3693.0,11.5,70,1
2,8,318.0,150.0,3436.0,11.0,70,1
3,8,304.0,150.0,3433.0,12.0,70,1
4,8,302.0,140.0,3449.0,10.5,70,1


In [5]:
y.head()

0    18.0
1    15.0
2    18.0
3    16.0
4    17.0
Name: mpg, dtype: float64

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size = 0.3, random_state = 42)

In [18]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

sklearn_tree = DecisionTreeRegressor(max_depth=5, random_state=42)
sklearn_tree.fit(X_train, y_train)

y_pred_sklearn = sklearn_tree.predict(X_test)

mae_sklearn = mae(y_test, y_pred_sklearn)
rmse_sklearn = rmse(y_test, y_pred_sklearn)

print(f"Scikit-learn Decision Tree MAE: {mae_sklearn:.4f}")
print(f"Scikit-learn Decision Tree RMSE: {rmse_sklearn:.4f}")

Scikit-learn Decision Tree MAE: 2.3640
Scikit-learn Decision Tree RMSE: 3.4111


In [19]:
tree = DecisionTree(criterion = "mse", max_depth = 5)
tree.fit(X_train, y_train)

y_pred = tree.predict(X_test)

mae_self = mae(y_test, y_pred)
rmse_self = rmse(y_test, y_pred)

print(f"Our Decision Tree MAE: {mae_self:.4f}")
print(f"Our Decision Tree RMSE: {rmse_self:.4f}")

Our Decision Tree MAE: 2.6503
Our Decision Tree RMSE: 4.1563
