# Importing packages 

In [None]:
# Data wrangling 
import pandas as pd 

# Ploting 
import matplotlib.pyplot as plt

# Weak base learner 
from tree import Tree

# Reading data 

In [None]:
d = pd.read_csv('data/auto-mpg.csv')

In [None]:
d.head(10)

# Ploting the data 

The $\mathbb{Y}$ variable is mpg - miles per galon. 

The $\mathbb{X}$ variable is the car weight.

In [None]:
y = 'mpg'
x = 'weight'

plt.figure(figsize=(12, 8))
plt.plot(d[x], d[y], 'o')
plt.title('mpg vs weight')
plt.xlabel('weight')
plt.ylabel('mpg')
plt.show()

# Fitting a regression tree 

In [None]:
# Initiating the tree
reg = Tree(
    d,
    y,
    [x],
    max_depth=2
)

# Fitting on data 
reg.fit()

# Printing out the tree
reg.print_tree()

In [None]:
# Predicting 
_input = [{x: y.get(x)} for y in d.to_dict('records')]
yhat = [reg.predict(y) for y in _input]

# Saving the predictions to the training set 
d['yhat'] = yhat

# Ploting all the points 
plt.figure(figsize=(12, 8))
plt.plot(d[x], d[y], 'o', label='original')
plt.plot(d[x], d['yhat'], 'o', label='predicted')
plt.title('mpg vs weight')
plt.xlabel('weight')
plt.ylabel('mpg')
plt.legend()
plt.show()