# Visualizações Animadas - Homework Results

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from IPython.display import HTML

In [None]:
df = pd.read_csv('car_fuel_efficiency.csv')
df = df.fillna(0)

y = df['fuel_efficiency_mpg'].values
X = df.drop('fuel_efficiency_mpg', axis=1)

X_full_train, X_test, y_full_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_full_train, y_full_train, test_size=0.25, random_state=1)

dv = DictVectorizer(sparse=True)
X_train = dv.fit_transform(X_train.to_dict(orient='records'))
X_val = dv.transform(X_val.to_dict(orient='records'))
X_test = dv.transform(X_test.to_dict(orient='records'))

## Animation 1: RMSE vs n_estimators

In [None]:
n_values = list(range(10, 210, 10))
rmse_values = []

for n in n_values:
    model = RandomForestRegressor(n_estimators=n, random_state=1, n_jobs=-1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    rmse_values.append(rmse)

fig, ax = plt.subplots(figsize=(10, 6))
line, = ax.plot([], [], 'b-', linewidth=2)
point, = ax.plot([], [], 'ro', markersize=10)
ax.set_xlim(0, 210)
ax.set_ylim(min(rmse_values)*0.95, max(rmse_values)*1.05)
ax.set_xlabel('n_estimators', fontsize=12)
ax.set_ylabel('RMSE', fontsize=12)
ax.set_title('RMSE vs n_estimators', fontsize=14)
ax.grid(True, alpha=0.3)

def init():
    line.set_data([], [])
    point.set_data([], [])
    return line, point

def animate(i):
    line.set_data(n_values[:i+1], rmse_values[:i+1])
    point.set_data([n_values[i]], [rmse_values[i]])
    return line, point

anim = FuncAnimation(fig, animate, init_func=init, frames=len(n_values), interval=200, blit=True)
plt.close()
HTML(anim.to_jshtml())

## Animation 2: RMSE vs max_depth

In [None]:
depths = [10, 15, 20, 25]
mean_rmse_per_depth = []

for depth in depths:
    rmse_scores = []
    for n in range(10, 210, 10):
        model = RandomForestRegressor(n_estimators=n, max_depth=depth, random_state=1, n_jobs=-1)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))
        rmse_scores.append(rmse)
    mean_rmse_per_depth.append(np.mean(rmse_scores))

fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar([], [], color='steelblue', alpha=0.7)
ax.set_xlim(-0.5, len(depths)-0.5)
ax.set_ylim(0, max(mean_rmse_per_depth)*1.1)
ax.set_xlabel('max_depth', fontsize=12)
ax.set_ylabel('Mean RMSE', fontsize=12)
ax.set_title('Mean RMSE vs max_depth', fontsize=14)
ax.set_xticks(range(len(depths)))
ax.set_xticklabels(depths)
ax.grid(True, alpha=0.3, axis='y')

def init():
    for bar in bars:
        bar.set_height(0)
    return bars

def animate(i):
    ax.clear()
    ax.bar(range(i+1), mean_rmse_per_depth[:i+1], color='steelblue', alpha=0.7)
    ax.set_xlim(-0.5, len(depths)-0.5)
    ax.set_ylim(0, max(mean_rmse_per_depth)*1.1)
    ax.set_xlabel('max_depth', fontsize=12)
    ax.set_ylabel('Mean RMSE', fontsize=12)
    ax.set_title('Mean RMSE vs max_depth', fontsize=14)
    ax.set_xticks(range(len(depths)))
    ax.set_xticklabels(depths)
    ax.grid(True, alpha=0.3, axis='y')
    for j, v in enumerate(mean_rmse_per_depth[:i+1]):
        ax.text(j, v + 0.01, f'{v:.3f}', ha='center', va='bottom')
    return ax,

anim = FuncAnimation(fig, animate, init_func=init, frames=len(depths), interval=800, blit=False)
plt.close()
HTML(anim.to_jshtml())

## Animation 3: Feature Importance

In [None]:
model = RandomForestRegressor(n_estimators=10, max_depth=20, random_state=1, n_jobs=-1)
model.fit(X_train, y_train)

features = ['vehicle_weight', 'horsepower', 'acceleration', 'engine_displacement']
importances = [model.feature_importances_[dv.feature_names_.index(f)] for f in features]

fig, ax = plt.subplots(figsize=(10, 6))
ax.set_xlim(0, max(importances)*1.1)
ax.set_ylim(-0.5, len(features)-0.5)
ax.set_xlabel('Importance', fontsize=12)
ax.set_title('Feature Importance', fontsize=14)
ax.set_yticks(range(len(features)))
ax.set_yticklabels(features)
ax.grid(True, alpha=0.3, axis='x')

def init():
    return ax,

def animate(i):
    ax.clear()
    ax.barh(range(i+1), importances[:i+1], color='forestgreen', alpha=0.7)
    ax.set_xlim(0, max(importances)*1.1)
    ax.set_ylim(-0.5, len(features)-0.5)
    ax.set_xlabel('Importance', fontsize=12)
    ax.set_title('Feature Importance', fontsize=14)
    ax.set_yticks(range(len(features)))
    ax.set_yticklabels(features)
    ax.grid(True, alpha=0.3, axis='x')
    for j, v in enumerate(importances[:i+1]):
        ax.text(v + 0.005, j, f'{v:.3f}', va='center')
    return ax,

anim = FuncAnimation(fig, animate, init_func=init, frames=len(features), interval=800, blit=False)
plt.close()
HTML(anim.to_jshtml())

## Animation 4: XGBoost eta Comparison

In [None]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)

eta_values = [0.3, 0.1]
eta_rmse = []

for eta in eta_values:
    xgb_params = {
        'eta': eta,
        'max_depth': 6,
        'min_child_weight': 1,
        'objective': 'reg:squarederror',
        'nthread': 8,
        'seed': 1,
        'verbosity': 0,
    }
    model = xgb.train(xgb_params, dtrain, num_boost_round=100)
    y_pred = model.predict(dval)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    eta_rmse.append(rmse)

fig, ax = plt.subplots(figsize=(10, 6))
colors = ['coral', 'skyblue']
ax.set_xlim(-0.5, len(eta_values)-0.5)
ax.set_ylim(0, max(eta_rmse)*1.1)
ax.set_xlabel('eta', fontsize=12)
ax.set_ylabel('RMSE', fontsize=12)
ax.set_title('XGBoost: RMSE vs eta', fontsize=14)
ax.set_xticks(range(len(eta_values)))
ax.set_xticklabels(eta_values)
ax.grid(True, alpha=0.3, axis='y')

def init():
    return ax,

def animate(i):
    ax.clear()
    ax.bar(range(i+1), eta_rmse[:i+1], color=colors[:i+1], alpha=0.7)
    ax.set_xlim(-0.5, len(eta_values)-0.5)
    ax.set_ylim(0, max(eta_rmse)*1.1)
    ax.set_xlabel('eta', fontsize=12)
    ax.set_ylabel('RMSE', fontsize=12)
    ax.set_title('XGBoost: RMSE vs eta', fontsize=14)
    ax.set_xticks(range(len(eta_values)))
    ax.set_xticklabels(eta_values)
    ax.grid(True, alpha=0.3, axis='y')
    for j, v in enumerate(eta_rmse[:i+1]):
        ax.text(j, v + 0.01, f'{v:.4f}', ha='center', va='bottom', fontweight='bold')
    if i == 1:
        best_idx = np.argmin(eta_rmse)
        ax.text(best_idx, eta_rmse[best_idx]/2, 'BEST', ha='center', fontsize=16, fontweight='bold', color='white')
    return ax,

anim = FuncAnimation(fig, animate, init_func=init, frames=len(eta_values), interval=1000, blit=False)
plt.close()
HTML(anim.to_jshtml())