# Prevendo o nível de poluição do Ar da China (2010 - 2014)
### Para isso iremos utilizar uma Rede Neural Recorrente LSTM

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
base = pd.read_csv("poluicao.csv")

In [3]:
base.shape

(43824, 13)

In [4]:
base = base.dropna()

In [5]:
base.shape

(41757, 13)

In [6]:
base.head()

Unnamed: 0,No,year,month,day,hour,pm2.5,DEWP,TEMP,PRES,cbwd,Iws,Is,Ir
24,25,2010,1,2,0,129.0,-16,-4.0,1020.0,SE,1.79,0,0
25,26,2010,1,2,1,148.0,-15,-4.0,1020.0,SE,2.68,0,0
26,27,2010,1,2,2,159.0,-11,-5.0,1021.0,SE,3.57,0,0
27,28,2010,1,2,3,181.0,-7,-5.0,1022.0,SE,5.36,1,0
28,29,2010,1,2,4,138.0,-7,-5.0,1022.0,SE,6.25,2,0


In [7]:
base = base.drop(columns=["No", "year", "day", "month", "hour", "cbwd"])

In [8]:
base.head()

Unnamed: 0,pm2.5,DEWP,TEMP,PRES,Iws,Is,Ir
24,129.0,-16,-4.0,1020.0,1.79,0,0
25,148.0,-15,-4.0,1020.0,2.68,0,0
26,159.0,-11,-5.0,1021.0,3.57,0,0
27,181.0,-7,-5.0,1022.0,5.36,1,0
28,138.0,-7,-5.0,1022.0,6.25,2,0


In [9]:
base.shape

(41757, 7)

# Separando a base de traino e a base de teste

In [10]:
base_train = base.iloc[:27977, :]

### Bse de dados de treinamento

In [11]:
base_train.shape

(27977, 7)

### Bse de dados de teste

# Pré-Processamento dos dados de traino
Nessa etapa, vamos colocar os dados de trainamento no formato adquado para ser processado por uma Rede Neural Recorrente

In [13]:
base_train = base_train.iloc[:, :].values
base_train.shape

(27977, 7)

In [14]:
# Normalizando os atributos previsores
normalizador = MinMaxScaler(feature_range=(0, 1))
base_train_norm = normalizador.fit_transform(base_train)

## Colocando os dados no formato correto para uma RNN

In [15]:
previsores = []
pm2_5_real = []
for i in range(50, 27977):
    previsores.append(base_train_norm[i-50:i, 0:8])
    pm2_5_real.append(base_train_norm[i, 0])
previsores, pm2_5_real = np.array(previsores), np.array(pm2_5_real)

In [16]:
previsores.shape

(27927, 50, 7)

In [17]:
pm2_5_real.shape

(27927,)

# Criando o modelo LSTM utilizando o keras

In [18]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

In [19]:
regressor = Sequential()

regressor.add(LSTM(units=100, return_sequences = True, input_shape = (previsores.shape[1], 7)))
regressor.add(Dropout(0.3))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.3))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.3))

regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.3))

regressor.add(Dense(units = 1, activation = "linear"))

regressor.compile(optimizer = "rmsprop", loss = "mean_squared_error",
                 metrics = ["mean_absolute_error"])

In [20]:
# Essa classe do keras monitora uma métrica do modelo, caso essa métrica pare de melhorar por uma quantidade
# de etapas pré definidas, o modelo para o treinamento

es = EarlyStopping(monitor="loss", min_delta=1e-10, patience=10, verbose=1)

# -------
rlr = ReduceLROnPlateau(monitor="loss", factor=0.2, patience=5, verbose=1)

mcp = ModelCheckpoint(filepath="pesos_poluicao.h5", monitor="loss", save_best_only=True)


In [21]:
regressor.fit(previsores, pm2_5_real, epochs=100, batch_size=32,
             callbacks=[es, rlr, mcp])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100

Epoch 00019: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100

Epoch 00024: ReduceLROnPlateau reducing learning rate to 8.000000525498762e-06.
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100

Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.6000001778593287e-06.
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100

Epoch 00034: ReduceLROnPlateau reducing learning rate to 3.200000264769187e-07.
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100

Epoch 00039: ReduceLROnPlateau reducing learning rate to 6.400000529538374e-08.
Epoch 40/100


<tensorflow.python.keras.callbacks.History at 0x7fe06efcb190>

# Pré processamento da base de teste

In [22]:
entradas = base[27927:].values

In [23]:
len(entradas)

13830

In [24]:
entradas = normalizador.transform(entradas)

In [26]:
x_teste = []
for i in range(50, 13830):
    x_teste.append(entradas[i-50:i, 0:8])
x_teste = np.array(x_teste)


In [27]:
x_teste.shape

(13780, 50, 7)

# Fazendo as previsões

In [28]:
previsoes = regressor.predict(x_teste)
previsoes = normalizador.inverse_transform(previsoes)

ValueError: non-broadcastable output operand with shape (13780,1) doesn't match the broadcast shape (13780,7)