In [3]:
import pandas as pd
import numpy as np
from math import sqrt
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import matplotlib.pyplot as plt

# Cargar datos
df_births = pd.read_csv('daily-total-female-births.csv')
df_shampoo = pd.read_csv('shampoo.csv')
df_temp = pd.read_csv('monthly-mean-temp.csv') 
df_sales = pd.read_csv('monthly-car-sales.csv')

# Funciones útiles
def train_test_split(data, test_size):
    split_idx = int(len(data)*(1-test_size))
    train, test = data.iloc[:split_idx], data.iloc[split_idx:]
    return train, test

def walk_forward_validation(data, test_size, model_func, cfg):
    train, test = train_test_split(data, test_size)
    history = [x for x in train]
    pred = []
    for i in range(len(test)):
        yhat = model_func(history, cfg)
        pred.append(yhat)
        history.append(test[i])
    error = sqrt(mean_squared_error(test, pred))
    return error, pred

def plot_results(train, test, pred, name):
    ax = train.plot(label='Train', figsize=(20, 8))
    test.plot(ax=ax, label='Test')
    pd.Series(pred, index=test.index).plot(ax=ax, label='Prediction')
    ax.legend()
    plt.title(name)
    plt.savefig(f'{name}.png')
    
# Promedio simple    
def predict_mean(history, cfg):
    window, avg_type = cfg
    if avg_type=='mean':
        return np.mean(history[-window:])
    return np.median(history[-window:])
    
# SARIMA
def predict_sarima(history, cfg):
    order, seasonal_order, trend = cfg
    model = SARIMAX(history, order=order, seasonal_order=seasonal_order,
                    trend=trend, enforce_stationarity=False,
                    enforce_invertibility=False)
    fitted = model.fit(disp=False)  
    return fitted.predict(len(history), len(history))[0]

# Holt-Winters  
def predict_holtwinters(history, cfg):
    t,d,s,p,b,r = cfg
    model = ExponentialSmoothing(np.asarray(history), trend=t, damped=d, seasonal=s, 
                                 seasonal_periods=p)
    fitted = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
    return fitted.predict(len(history), len(history))[0]

# Evaluación y visualización para cada conjunto de datos
for name, df in [('Births', df_births), ('Shampoo', df_shampoo), 
                 ('Temperature', df_temp), ('Sales', df_sales)]:
    
    print(f'Evaluando {name}')
    
    # Dividir datos
    test_size = 0.2
    test = test.fillna(method='ffill') 
    train, test = train_test_split(df, test_size)
    
    # Promedio simple
    cfg = [30, 'mean'] 
    error, pred = walk_forward_validation(df, test_size, predict_mean, cfg)
    print(f'Promedio RMSE: {error:.2f}')
    plot_results(train, test, pred, f'{name}_promedio')
    
    # SARIMA
    cfg = [(1,0,1), (1,0,1,12), 'ct']
    error, pred = walk_forward_validation(df, test_size, predict_sarima, cfg)
    print(f'SARIMA RMSE: {error:.2f}')
    plot_results(train, test, pred, f'{name}_sarima')
    
    # Holt-Winters
    cfg = ['add', False, 'add', 12, True, False]
    error, pred = walk_forward_validation(df, test_size, predict_holtwinters, cfg)
    print(f'Holt-Winters RMSE: {error:.2f}') 
    plot_results(train, test, pred, f'{name}_holtwinters')
    
    print()

Evaluando Births


UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U6'), dtype('<U6')) -> None