# GRU

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import random
from scipy.stats import pearsonr
from collections import Counter

from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, LSTM, Conv1D, MaxPooling1D, Flatten, GRU, Bidirectional
from tensorflow.keras.models import Sequential
from tensorflow.random import set_seed

sns.set_palette("Paired")


from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler, PowerTransformer
from sklearn.preprocessing import SplineTransformer

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score

from sklearn.pipeline import Pipeline

import warnings
warnings.filterwarnings("ignore")

from keras.layers import BatchNormalization

from tensorflow import compat
compat.v1.logging.set_verbosity(compat.v1.logging.ERROR)

# Funções e constantes

In [2]:
PATH="https://raw.githubusercontent.com/LeiteJu/TCC/main/dados/input/"

SIGLAS = ['AC', 'AL', 'AM', 'AP', 'BA', 'CE',
  'DF', 'ES', 'GO', 'MA',
  'MG', 'MS', 'MT', 'PA', 'PB',
  'PE', 'PI', 'PR', 'RJ',
  'RN', 'RO', 'RR', 'RS',
  'SC', 'SE', 'SP', 'TO']

LABELS=["subestima: -90%", "subestima entre -90% e 60%", "subestima entre -60% e -30%",
        "subestima entre -30% e 10%", "entre -10% e 10%", "superestima entre 10% e 30%", 
        "superestima entre 30% e 60%", "superestima entre 60% e 90%", "superestima mais de 90%"]

N='NORTE'
NE="NORDESTE"
CO='CENTRO OESTE?'
SE='SUDESTE'
S = 'SUL'

REGIOES = {
    'AC': N, 'AL': NE, 'AM' : N, 'AP' : N, 'BA' : NE, 'CE' : NE,
    'DF' : CO, 'ES' : SE, 'GO' : CO, 'MA' : NE,
    'MG' : SE, 'MS' : CO, 'MT' : CO, 'PA' : N, 'PB' : NE,
    'PE' : NE, 'PI' : NE, 'PR' : S, 'RJ' : SE,
    'RN' : NE, 'RO' : N, 'RR' : N, 'RS' : S,
    'SC' : S, 'SE' : NE, 'SP' : SE, 'TO' : N}

In [3]:
# calcula metricas de regressao
def score_regression_metrics(y_test, y_test_pred):

    RMSE = mean_squared_error(y_true=y_test, y_pred=y_test_pred, squared=False)
    MAE = mean_absolute_error(y_true=y_test, y_pred=y_test_pred)
    MAPE = mean_absolute_percentage_error(y_true=y_test, y_pred=y_test_pred)
    R2 = r2_score(y_true=y_test, y_pred=y_test_pred)

    scores = {
        "neg_root_mean_squared_error": RMSE,
        "neg_mean_absolute_error": MAE,
        "neg_mean_absolute_percentage_error": MAPE,
    }

    return scores

def show_scores (scores):

    print ("Scores obtidos:")

    print (f"RMSE: {scores['neg_root_mean_squared_error']}")
    print (f"MAE: {scores['neg_mean_absolute_error']}")
    print (f"MAPE: {scores['neg_mean_absolute_percentage_error']}")

In [4]:
# n_steps -> timestep no tempo .. pegar contexto
def split_sequences(sequences, n_steps):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [5]:
class Scaler3DShape:
    
    def __init__(self, scaler=StandardScaler):
        self.scaler = scaler() 

    def fit_transform(self, x):
        x_new = self.scaler.fit_transform(x.reshape(-1, x.shape[-1])).reshape(x.shape)
        return x_new

    def transform(self, x):
        x_new = self.scaler.transform(x.reshape(-1, x.shape[-1])).reshape(x.shape)
        return x_new

In [6]:
SEED = 41

def set_seeds (SEED=41):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    set_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

set_seeds(SEED)

In [7]:
def load_data():
    
    df = pd.read_csv(f"{PATH}processado.csv")
    
    x  = df.copy()
    x = x.sort_values(["data", "estados"])
    x = x.drop(["consumo", 'data'], axis=1)

    y = df.copy().sort_values(["data", "estados"])[['estados', 'data', 'consumo']]
    
    # processo de one-hot
    x = pd.get_dummies(data=x, columns=["estados"], drop_first=True)

    y = y['consumo']
    
    return x,y

# Experimentos

### 1 GRU(64)

Scores obtidos:
- RMSE: 58983.791888066524
- MAE: 35264.30576231061
- MAPE: 0.48861234325849734

In [8]:
#novo :)
set_seeds(41)

x,y = load_data()

timestep=3

df = x.copy()
df["consumo"] = y


df_train, df_test = train_test_split(df, test_size=0.15, shuffle=False)

x_train, y_train = split_sequences(df_train.values, timestep)
x_test, y_test = split_sequences(df_test.values, timestep)

scaler = Scaler3DShape(MinMaxScaler)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = Sequential()


model.add(GRU(units=64, activation="relu")),

model.add(Dropout(rate=0.10))
model.add(Dense(units=1))
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mse",
    metrics=[keras.metrics.RootMeanSquaredError(name="RMSE")])


history = model.fit(x_train, y_train, epochs=100, batch_size=32, verbose=0) 

show_scores(score_regression_metrics(y_train, model.predict(x_train)))

y_pred = model.predict(x_test)
scores = score_regression_metrics(y_test, y_pred)
show_scores(scores)

Scores obtidos:
RMSE: 30774.423673776306
MAE: 17263.053975238017
MAPE: 0.22779397858574799
Scores obtidos:
RMSE: 58983.791888066524
MAE: 35264.30576231061
MAPE: 0.48861234325849734


## 1 GRU (128)

Scores obtidos:
- RMSE: 51373.11238077078
- MAE: 30178.43626065341
- MAPE: 0.39352192925125856

In [9]:
#novo :)
set_seeds(41)

x,y = load_data()

timestep=3

df = x.copy()
df["consumo"] = y


df_train, df_test = train_test_split(df, test_size=0.15, shuffle=False)

x_train, y_train = split_sequences(df_train.values, timestep)
x_test, y_test = split_sequences(df_test.values, timestep)

scaler = Scaler3DShape(MinMaxScaler)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = Sequential()

model.add(GRU(units=128, activation="relu")),

model.add(Dropout(rate=0.10))
model.add(Dense(units=1))
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mse",
    metrics=[keras.metrics.RootMeanSquaredError(name="RMSE")])


history = model.fit(x_train, y_train, epochs=100, batch_size=32, verbose=0) 

show_scores(score_regression_metrics(y_train, model.predict(x_train)))

y_pred = model.predict(x_test)
scores = score_regression_metrics(y_test, y_pred)
show_scores(scores)

Scores obtidos:
RMSE: 29565.23096609332
MAE: 16405.185092392123
MAPE: 0.1882507583119234
Scores obtidos:
RMSE: 51373.11238077078
MAE: 30178.43626065341
MAPE: 0.39352192925125856


## 1 GRU(256)

Scores obtidos:
- RMSE: 67520.3262247469
- MAE: 37927.66290719697
- MAPE: 0.5414366820119797

In [10]:
#novo :)
set_seeds(41)

x,y = load_data()

timestep=3

df = x.copy()
df["consumo"] = y


df_train, df_test = train_test_split(df, test_size=0.15, shuffle=False)

x_train, y_train = split_sequences(df_train.values, timestep)
x_test, y_test = split_sequences(df_test.values, timestep)

scaler = Scaler3DShape(MinMaxScaler)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = Sequential()

model.add(GRU(units=256, activation="relu")),

model.add(Dropout(rate=0.10))
model.add(Dense(units=1))
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mse",
    metrics=[keras.metrics.RootMeanSquaredError(name="RMSE")])


history = model.fit(x_train, y_train, epochs=100, batch_size=32, verbose=0) 

show_scores(score_regression_metrics(y_train, model.predict(x_train)))

y_pred = model.predict(x_test)
scores = score_regression_metrics(y_test, y_pred)
show_scores(scores)

Scores obtidos:
RMSE: 34423.31173979458
MAE: 19020.808449113974
MAPE: 0.23169670790803484
Scores obtidos:
RMSE: 67520.3262247469
MAE: 37927.66290719697
MAPE: 0.5414366820119797


## GRU(128,64)

In [11]:
#novo :)
set_seeds(41)

x,y = load_data()

timestep=3

df = x.copy()
df["consumo"] = y


df_train, df_test = train_test_split(df, test_size=0.15, shuffle=False)

x_train, y_train = split_sequences(df_train.values, timestep)
x_test, y_test = split_sequences(df_test.values, timestep)

scaler = Scaler3DShape(MinMaxScaler)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = Sequential()

model.add(GRU(units=128, return_sequences=True, activation="relu")),
model.add(GRU(units=64, activation="relu")),
model.add(Dropout(rate=0.10))
model.add(Dense(units=1))
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mse",
    metrics=[keras.metrics.RootMeanSquaredError(name="RMSE")])


history = model.fit(x_train, y_train, epochs=100, batch_size=32, verbose=0) 

show_scores(score_regression_metrics(y_train, model.predict(x_train)))

y_pred = model.predict(x_test)
scores = score_regression_metrics(y_test, y_pred)
show_scores(scores)

Scores obtidos:
RMSE: 32666.12218254618
MAE: 18252.347183437632
MAPE: 0.2177141680598484
Scores obtidos:
RMSE: 47945.9335943859
MAE: 30441.39976089015
MAPE: 0.6379432308546132


In [12]:
#novo :)
set_seeds(41)

x,y = load_data()

timestep=3

df = x.copy()
df["consumo"] = y


df_train, df_test = train_test_split(df, test_size=0.15, shuffle=False)

x_train, y_train = split_sequences(df_train.values, timestep)
x_test, y_test = split_sequences(df_test.values, timestep)

scaler = Scaler3DShape(MinMaxScaler)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = Sequential()

model.add(GRU(units=128, return_sequences=True, activation="relu")),
model.add(GRU(units=64, activation="relu")),
model.add(Dense(units=64, activation="relu")),
model.add(Dropout(rate=0.10))
model.add(Dense(units=1))
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mse",
    metrics=[keras.metrics.RootMeanSquaredError(name="RMSE")])


history = model.fit(x_train, y_train, epochs=100, batch_size=32, verbose=0) 

show_scores(score_regression_metrics(y_train, model.predict(x_train)))

y_pred = model.predict(x_test)
scores = score_regression_metrics(y_test, y_pred)
show_scores(scores)

Scores obtidos:
RMSE: 31450.818751332274
MAE: 17458.324777509886
MAPE: 0.18904875748654473
Scores obtidos:
RMSE: 44126.249323583594
MAE: 28122.84014322917
MAPE: 0.4425326521589296


In [13]:
#novo :)
set_seeds(41)

x,y = load_data()

timestep=3

df = x.copy()
df["consumo"] = y


df_train, df_test = train_test_split(df, test_size=0.15, shuffle=False)

x_train, y_train = split_sequences(df_train.values, timestep)
x_test, y_test = split_sequences(df_test.values, timestep)

scaler = Scaler3DShape(MinMaxScaler)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = Sequential()

model.add(GRU(units=128, return_sequences=True, activation="relu")),
model.add(GRU(units=64, activation="relu")),
model.add(Dense(units=64, activation="relu")),
model.add(Dense(units=32, activation="relu")),
model.add(Dense(units=64, activation="relu")),
model.add(Dropout(rate=0.10))
model.add(Dense(units=1))
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mse",
    metrics=[keras.metrics.RootMeanSquaredError(name="RMSE")])


history = model.fit(x_train, y_train, epochs=100, batch_size=32, verbose=0) 

show_scores(score_regression_metrics(y_train, model.predict(x_train)))

y_pred = model.predict(x_test)
scores = score_regression_metrics(y_test, y_pred)
show_scores(scores)

Scores obtidos:
RMSE: 29750.573861662706
MAE: 16224.536114586534
MAPE: 0.1742494612999861
Scores obtidos:
RMSE: 41248.433823474246
MAE: 24676.46122750947
MAPE: 0.298679462742127


In [14]:
#novo :)
set_seeds(41)

x,y = load_data()

timestep=3

df = x.copy()
df["consumo"] = y


df_train, df_test = train_test_split(df, test_size=0.15, shuffle=False)

x_train, y_train = split_sequences(df_train.values, timestep)
x_test, y_test = split_sequences(df_test.values, timestep)

scaler = Scaler3DShape(MinMaxScaler)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = Sequential()

model.add(GRU(units=128, return_sequences=True, activation="relu")),
model.add(GRU(units=64, activation="relu")),
model.add(Dense(units=64, activation="relu")),
model.add(Dense(units=32, activation="relu")),
model.add(Dense(units=64, activation="relu")),
model.add(Dropout(rate=0.10))
model.add(Dense(units=1))
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mse",
    metrics=[keras.metrics.RootMeanSquaredError(name="RMSE")])


history = model.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0) 

show_scores(score_regression_metrics(y_train, model.predict(x_train)))

y_pred = model.predict(x_test)
scores = score_regression_metrics(y_test, y_pred)
show_scores(scores)

Scores obtidos:
RMSE: 26040.105201641607
MAE: 13207.951564256102
MAPE: 0.12641521475090325
Scores obtidos:
RMSE: 41768.96367257219
MAE: 23745.983612689393
MAPE: 0.2614653100316766
