In [None]:
import numpy as np
import pandas as pd
%matplotlib inline

In [None]:
# exemplo - sequencia de 0 até 19
dummy_series = pd.Series(range(20))
dummy_series

In [None]:
from src.tde import UnivariateTDE

# série para matriz
UnivariateTDE(data=dummy_series, k=5, horizon=1)

In [None]:
# vários pontos no futuro
UnivariateTDE(data=dummy_series, k=5, horizon=5)

In [None]:
from pmdarima.datasets import load_airpassengers

series = load_airpassengers(as_series=True)[:-1]

series.plot()

In [None]:
series_df = UnivariateTDE(data=series, k=5, horizon=1)
series_df.head()

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# separando as variáveis explicativas
X = series_df.drop('t+1', axis=1)
y = series_df['t+1']

# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

In [None]:
# treinando uma Random Forest
model = RandomForestRegressor()
model.fit(X_train, y_train)

In [None]:
# previsoes
forecasts = model.predict(X_test)
forecasts[:10]

In [None]:
# computing the error
from src.error import MASE

MASE(y_train, y_test, forecasts)

In [None]:
y_test = y_test.rename('Actual')
forecasts = pd.Series(forecasts, index=y_test.index).rename('Forecasts')
ax = y_train.plot()
ax = y_test.plot(legend=True,color='red', figsize=(12,6),title='Forecasting with RF')
forecasts.plot(legend=True, color='green')

In [None]:
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.tree import DecisionTreeRegressor
from lightgbm import LGBMRegressor

ridge = Ridge()
lasso = Lasso()
linear_reg = LinearRegression()
rf = RandomForestRegressor()
tree = DecisionTreeRegressor()
lgbm = LGBMRegressor()

In [None]:
ridge.fit(X_train, y_train)
lasso.fit(X_train, y_train)
linear_reg.fit(X_train, y_train)
rf.fit(X_train, y_train)
tree.fit(X_train, y_train)
lgbm.fit(X_train, y_train)

In [None]:
models = [ridge, lasso, linear_reg, rf, tree, lgbm]
colors = ['yellow','orange','green','grey','black','purple']
forecasts = []
for j,m in enumerate(models):
    print(j)
    pred = m.predict(X_test)
    pred = pd.Series(pred, index=y_test.index).rename(f'Forecast {j}')
    forecasts.append(pred)


In [None]:
ax = y_train.plot()
ax = y_test.plot(legend=True,color='red', figsize=(12,6))
for j, pred in enumerate(forecasts):
    pred.plot(legend=True, color=colors[j])

In [None]:
for pred in forecasts:
    print(MASE(y_train, y_test, pred))

In [None]:
ridge.fit(X_train[1:], y_train.diff()[1:])
lasso.fit(X_train[1:], y_train.diff()[1:])
linear_reg.fit(X_train[1:], y_train.diff()[1:])
rf.fit(X_train[1:], y_train.diff()[1:])
tree.fit(X_train[1:], y_train.diff()[1:])
lgbm.fit(X_train[1:], y_train.diff()[1:])

In [None]:
models = [ridge, lasso, linear_reg, rf, tree, lgbm]
for m in models:
    pred = m.predict(X_test)
    print(MASE(y_train.diff()[1:], y_test.diff()[1:], pred[1:]))

# Transformations

##### About the importance of transformations

In [None]:
dummy_series.plot()

In [None]:
train, test = train_test_split(dummy_series, test_size=1, shuffle=False)

train.tail()

In [None]:
test.head()

In [None]:
train_df = UnivariateTDE(data=train, k=3, horizon=1)

train_df

In [None]:
X_train, y_train = train_df.drop('t+1', axis=1), train_df['t+1']

In [None]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor()
model.fit(X_train.values, y_train)

forecast = model.predict([[16,17,18]])

forecast

In [None]:
# same process but with differenced data
train_df = UnivariateTDE(data=train.diff(), k=3, horizon=1)
print(train_df)

X_train, y_train = train_df.drop('t+1', axis=1), train_df['t+1']

from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor()
model.fit(X_train.values, y_train)

In [None]:
forecast = model.predict([[16,17,18]])
print(forecast)

# adding last known value to the forecast to revert the differencing operation
forecast + train.values[-1]