In [None]:
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, InputLayer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import importlib as imp
import datacleaner
import datetime
imp.reload(datacleaner)

from datacleaner import *

In [None]:
np.random.seed(1)
tf.keras.backend.clear_session()

bd = pd.read_csv(r"training_data.csv", na_filter=False)

In [None]:
bd.head()

In [None]:
def tratar_dados(bd):
    bd = ordernar(bd)
    bd = RoadsCleaner(bd)
    #bd = data(bd)
    bd = valores_em_falta(bd)
    bd = eliminar(bd)
    bd = incidentsNumbers(bd)
    bd = luminosidade(bd)
    bd = rainNumbers(bd)
    bd = delayNumbers(bd)
    bd = removeOutlier(bd)
    indice_treino, indice_val=split_data(bd, perc=10)
    escala, bd = data_normalization(bd, norm_range=(-1, 1))
    return bd,indice_treino,indice_val,escala
    
bd,indice_treino,indice_val,escala=tratar_dados(bd)
bd.head()

In [None]:
bd.index = pd.to_datetime(bd["record_date"], format="%Y-%m-%d %H:%M:00")
serie(bd,"incidents")
newBd = bd[["incidents","record_date"]]
bd.pop("record_date")

In [None]:
temp = bd
temp = temp.resample("H").interpolate(method="time")
temp =temp[temp.index.month !=8]
print(temp)
fig=plt.figure()
plt.plot(temp["incidents"])
spacing =10
fig.subplots_adjust(right=spacing)
plt.show()
print(temp["incidents"])

In [None]:
def df_to_X_y(df, window_size=5):
    df_as_np = df.to_numpy()
    X = []
    y = []
    for i in range(len(df_as_np) - window_size):
        row = df_as_np[i:i + window_size]
        X.append(row)
        label = df_as_np[i + window_size][8]  # Última coluna contém a variável de destino (incidentes)
        y.append(label)
    return np.array(X), np.array(y)

In [None]:
WINDOW_SIZE = 48
X1, y1 = df_to_X_y(temp, WINDOW_SIZE)
X1.shape, y1.shape

In [None]:
X_train1, y_train1 = X1[:6000], y1[:6000]
X_val1, y_val1 = X1[6000:7000], y1[6000:7000]
X_test1, y_test1 = X1[7000:], y1[7000:]
X_train1.shape, y_train1.shape, X_val1.shape, y_val1.shape, X_test1.shape, y_test1.shape

In [None]:
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam

In [None]:
def build_model(timesteps):
    model = Sequential()
    model.add(InputLayer((48, 13)))
    model.add(SimpleRNN(units=64, input_shape=(timesteps, 1), activation='relu'))
    model.add(Dense(8, 'relu'))
    model.add(Dense(1))
    
    #model summary (and save it as PNG)
    # tf.keras.utils.plot_model(model, 'Kagglernn.png', show_shapes=True)
    return model

In [None]:
model = build_model(timesteps=WINDOW_SIZE)
model.summary()

In [None]:
cp1 = ModelCheckpoint('model_KaggleRNN/model.h5', save_best_only=True)
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[RootMeanSquaredError()])

In [None]:
model.fit(X_train1, y_train1, validation_data=(X_val1, y_val1), epochs=100, callbacks=[cp1])

In [None]:
from tensorflow.keras.models import load_model
model = load_model('modelKaggleRNN/modelo.h5')

In [None]:
train_predictions = model.predict(X_train1).flatten()
escala_predictions = MinMaxScaler()
escala_actuals = MinMaxScaler()

escala_predictions.fit(train_predictions.reshape(-1, 1))
escala_actuals.fit(y_train1.reshape(-1, 1))

train_predictions_unscaled = escala_predictions.inverse_transform(train_predictions.reshape(-1, 1))
y_train1_unscaled = escala_actuals.inverse_transform(y_train1.reshape(-1, 1))
train_predictions_unscaled += 3
y_train1_unscaled += 3
train_results = pd.DataFrame(data={'Train Predictions': train_predictions_unscaled.flatten(), 'Actuals': y_train1_unscaled.flatten()})
train_results

In [None]:
import matplotlib.pyplot as plt
plt.title('Resultados do Treino')
plt.xlabel('Índice')
plt.ylabel('Valores')

plt.plot(train_results['Train Predictions'][:500], color='blue', linestyle='--', label='Previsões de Treino')
plt.plot(train_results['Actuals'][:500], color='red', linestyle='-', label='Valores Reais')

plt.legend()
plt.tight_layout()
plt.show()

In [None]:
val_predictions = model.predict(X_val1).flatten()
escala_predictions = MinMaxScaler()
escala_actuals = MinMaxScaler()

escala_predictions.fit(val_predictions.reshape(-1, 1))
escala_actuals.fit(y_val1.reshape(-1, 1))

val_predictions_unscaled = escala_predictions.inverse_transform(val_predictions.reshape(-1, 1))
y_val1_unscaled = escala_actuals.inverse_transform(y_val1.reshape(-1, 1))
val_predictions_unscaled += 3
y_val1_unscaled += 3
val_results = pd.DataFrame(data={'Val Predictions': val_predictions_unscaled.flatten(), 'Actuals': y_val1_unscaled.flatten()})
val_results

In [None]:
import matplotlib.pyplot as plt

plt.title('Resultados da Validação')
plt.xlabel('Índice')
plt.ylabel('Valores')

plt.plot(val_results['Val Predictions'][:100], color='blue', linestyle='--', label='Previsões da Validação')
plt.plot(val_results['Actuals'][:100], color='red', linestyle='-', label='Valores Reais')

plt.legend()
plt.tight_layout()
plt.show()

In [None]:
test_predictions = model.predict(X_test1).flatten()
escala_predictions = MinMaxScaler()
escala_actuals = MinMaxScaler()

escala_predictions.fit(test_predictions.reshape(-1, 1))
escala_actuals.fit(y_test1.reshape(-1, 1))

test_predictions_unscaled = escala_predictions.inverse_transform(test_predictions.reshape(-1, 1))
y_test1_unscaled = escala_actuals.inverse_transform(y_test1.reshape(-1, 1))
test_predictions_unscaled += 3
y_test1_unscaled += 3
test_results = pd.DataFrame(data={'Test Predictions': test_predictions_unscaled.flatten(), 'Actuals': y_test1_unscaled.flatten()})
test_results

In [None]:
import matplotlib.pyplot as plt

plt.title('Resultados de Teste')
plt.xlabel('índice')
plt.ylabel('Valores')

plt.plot(test_results['Test Predictions'][:300], color='blue', linestyle='--', label='Previsões do Teste')
plt.plot(test_results['Actuals'][:300], color='red', linestyle='-', label='Valores Reais')

plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
def forecast(model, df, timesteps, multisteps, scaler):
    input_seq = df[-timesteps:].values #getting the last sequence of known value
    inp = input_seq
    predictions = list()
    for _ in range(1, multisteps+1):
        inp = inp.reshape(1, timesteps, 1)
        yhat = model.predict(inp)
        yhat_inversed = scaler.inverse_transform(yhat)
        predictions.append(yhat_inversed[0][0])
        #prepare new input to forecast the next day
        inp = np.append(inp[0], yhat)
        inp = inp[-timesteps:]
    return predictions

def plot_forecast(data, forecasts):
    plt.figure(figsize=(8,6))
    plt.plot(range(len(data)), data, color='green', label='Confirmed')
    plt.plot(range(len(data)-1, len(data)+len(forecasts)-1), forecasts, color='red', label='Forecasts')
    plt.title('Number of incidents')
    plt.ylabel('Incidents')
    plt.xlabel('Days')
    plt.legend()
    plt.show()    

In [None]:
forecasts = forecast(model, bd, WINDOW_SIZE, multisteps=WINDOW_SIZE, scaler=escala)

In [None]:
plot_forecast(bd, forecasts)