# Importing packages 

In [None]:
# OS traversal
import os
import shutil

# System functionalities
import sys

# Adding the whole project to module paths
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Data wrangling 
import pandas as pd 

# Ploting 
import matplotlib.pyplot as plt

# Regression boosting
from regression.boosting import RegressionGB

# GIF building 
import imageio

# Reading data 

In [None]:
d = pd.read_csv('data/auto-mpg.csv')

print(f"Shape of data: {d.shape}")

In [None]:
d.head(10)

# Gradient boosting 

The $\mathbb{Y}$ variable is mpg - miles per galon. 

The $\mathbb{X}$ variable is the car weight.

In [None]:
y = 'mpg'
x = 'weight'

# Ploting all the points 
plt.figure(figsize=(12, 8))
plt.plot(d[x], d[y], 'o', label='original')

# Defining the number of iterations
_m_iterations = [
    1, 
    20,
    40,
]

for _m in _m_iterations:
    # Initiating the tree
    rgb = RegressionGB(
        d,
        y,
        [x],
        max_depth=3, 
        min_sample_leaf=10,
        learning_rate=0.1,
    )

    # Fitting on data 
    rgb.fit(m=_m)

    # Predicting 
    _input = [{x: y.get(x)} for y in d.to_dict('records')]
    yhat = [rgb.predict(y) for y in _input]

    # Saving the predictions to the training set 
    d['yhat'] = yhat

    plt.plot(d[x], d['yhat'], 'o', label=f'{_m} iterations')
    plt.title('mpg vs weight')

plt.xlabel('weight')
plt.ylabel('mpg')
plt.legend()
plt.show()

# Multi input gradient boosting animation 



In [None]:
# Defining the y_var 
_y_var = 'mpg'

# Defining the features
_x_vars = ['weight', 'acceleration']

# Creating a tmp directory for gifs 
_tmp_dir = os.path.join('tmp')

# Deleting all the previous runs
if os.path.exists(_tmp_dir):
    shutil.rmtree(_tmp_dir)

# Creating
os.mkdir(_tmp_dir) 

# Initiating the object 
_reg_gb = RegressionGB(
    d = d,
    y_var = _y_var,
    x_vars = _x_vars,
    max_depth = 2,
    min_sample_leaf = 2,
    learning_rate = 0.3,
)

# Number of iterations 
_n = 30

for i in range(_n):
    _filename = os.path.join(_tmp_dir, f"frame_{i}.png")
    # Ploting the initial points and predictions
    plt.figure(figsize=(12, 8))
    if i > 0:
        _reg_gb.fit(m=1)

    plt.subplot(1, 2, 1)
    plt.plot(d.weight, d[_y_var].values, 'o', label='original', alpha=0.5)
    plt.scatter(d.weight, _reg_gb._predictions, edgecolors='black', label=f'predictions - iteration {i}', color='orange')
    plt.xlabel('weight')
    plt.ylabel('mpg')
    plt.title(f'Gradient boosting results')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(d.acceleration, d[_y_var].values, 'o', label='original', alpha=0.55)
    plt.scatter(d.acceleration, _reg_gb._predictions, edgecolors='black', label=f'predictions - iteration {i}', color='orange')
    plt.xlabel('acceleration')
    plt.ylabel('mpg')
    plt.title(f'Gradient boosting results')
    plt.legend()

    plt.savefig(_filename)

# Saving the gif
_gif_dir = os.path.join('gif')
if not os.path.exists(_gif_dir):
    os.mkdir(_gif_dir)

with imageio.get_writer(os.path.join(_gif_dir, 'RGB.gif'), mode='I', duration=0.4) as writer:
    _files = [os.path.join(_tmp_dir, x) for x in os.listdir(_tmp_dir)]
    _files.sort(key=lambda x: os.path.getmtime(x))
    for filename in _files:
        image = imageio.imread(filename)
        writer.append_data(image)