In [None]:
import numpy as np
import pandas as pd
%matplotlib inline

In [None]:
# dados dos passageiros
from pmdarima.datasets import load_airpassengers

series = load_airpassengers(as_series=True)[:-1]

series.plot()

In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(series, shuffle=False, test_size=12)

train.tail()

In [None]:
test

In [None]:
import pmdarima as pm

model = pm.auto_arima(train, m=12, trace=True, suppress_warnings=True)

In [None]:
forecasts, pred_interval = model.predict(n_periods=len(test), return_conf_int=True, alpha=0.05)
forecasts

In [None]:
pred_interval

In [None]:
test = test.rename('Actual')
forecasts = pd.Series(forecasts, index=test.index).rename('Forecasts')
ax = train.plot()
ax = test.plot(legend=True,color='red', figsize=(12,6),title='Forecasting with Auto Arima')
forecasts.plot(legend=True, color='green')
ax.fill_between(test.index,
                pred_interval[:, 0], 
                pred_interval[:, 1],
                alpha=0.9, color='orange',
                label="Prediction Intervals")

In [None]:
forecasts, pred_interval = model.predict(n_periods=len(test), return_conf_int=True, alpha=0.5)

test = test.rename('Actual')
forecasts = pd.Series(forecasts, index=test.index).rename('Forecasts')
ax = train.plot()
ax = test.plot(legend=True,color='red', figsize=(12,6),title='Forecasting with Auto Arima')
forecasts.plot(legend=True, color='green')
ax.fill_between(test.index,
                pred_interval[:, 0], 
                pred_interval[:, 1],
                alpha=0.9, color='orange',
                label="Prediction Intervals")

# Previsão de Excedência

In [None]:
data = pd.read_csv('assets/wave_height_hourly.csv')

In [None]:
data.head()

In [None]:
wave = pd.Series(data['SignificantWaveHeight'].values, index = pd.to_datetime(data['time']))
wave.plot()

In [None]:
from src.tde import UnivariateTDE

wave_tde = UnivariateTDE(wave, horizon=12, k=6)
wave_tde.head()

In [None]:
is_future = wave_tde.columns.str.contains('\+')
X = wave_tde.iloc[:,~is_future]
y = wave_tde['t+12']

X.head()

In [None]:
y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

In [None]:
y_train.describe()

In [None]:
threshold = y_train.quantile(0.95)
threshold

In [None]:
y_train_clf = (y_train > threshold).astype(int)
y_train_clf

In [None]:
y_train_clf.value_counts(normalize=True)

In [None]:
y_test_clf = (y_test > threshold).astype(int)
y_test_clf.value_counts(normalize=True)

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, y_train_clf)

In [None]:
forecasts_prob = model.predict_proba(X_test)
forecasts_prob

In [None]:
exceedance_prob = forecasts_prob[:,1]
exceedance_prob[:6]

In [None]:
exceedance_prob = pd.Series(exceedance_prob, index=y_test_clf.index).rename('Exceedance Prob.')
ax = y_test_clf.plot(legend=True,color='red', figsize=(12,6), alpha=0.5)
exceedance_prob.plot(legend=True, color='green', alpha=0.5)

In [None]:
from sklearn.metrics import roc_auc_score, roc_curve, plot_roc_curve
?roc_auc_score

plot_roc_curve(model, X_test, y_test_clf)

In [None]:

roc_auc_score(y_test_clf, exceedance_prob)

In [None]:
from imblearn.ensemble import BalancedRandomForestClassifier

In [None]:
model = BalancedRandomForestClassifier()
model.fit(X_train, y_train_clf)

In [None]:
forecasts_prob = model.predict_proba(X_test)
forecasts_prob

In [None]:
roc_auc_score(y_test_clf, forecasts_prob[:,1])

In [None]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import TomekLinks

In [None]:
smote = SMOTE()
X_train_r, y_train_clf_r = smote.fit_resample(X_train, y_train_clf)

In [None]:
smote_model = RandomForestClassifier()
smote_model.fit(X_train_r, y_train_clf_r)

In [None]:
forecasts_prob_sm = smote_model.predict_proba(X_test)
forecasts_prob_sm

In [None]:
roc_auc_score(y_test_clf, forecasts_prob_sm[:,1])