In [32]:
import math
import sys 
import numpy as np # linear algebra
from scipy.stats import randint
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv), data manipulation as in SQL
import matplotlib.pyplot as plt # this is used for the plot the graph 
import matplotlib.dates as mdates
import seaborn as sns # used for plot interactive graph. 
from sklearn.model_selection import train_test_split # to split the data into two parts
#from sklearn.cross_validation import KFold # use for cross validation
from sklearn.preprocessing import StandardScaler # for normalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline # pipeline making
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectFromModel
from sklearn import metrics # for the check the error and accuracy of the model
from sklearn.metrics import mean_squared_error,r2_score
from datetime import timedelta

## for Deep-learing:
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
from keras.optimizers import SGD 
from keras.callbacks import EarlyStopping
#from keras.utils import np_utils
import itertools
from keras.layers import LSTM
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout
from custom_transforms.transforms import *
from models.LSTM1 import LSTM1

In [33]:
df = pd.read_csv('../input/teresopolis-2016-2023-precos-medios.csv', sep=',', 
                 parse_dates={'dt' : ['Ano', 'Mês', 'Dia']},
                 low_memory=False, na_values=['nan','?'], index_col='dt')

filtered_alface_crespa_roca = df[df['Produto'] == 'Alface Crespa - Roça'][['Preco_unitario']]
filtered_alface_crespa_roca = filtered_alface_crespa_roca.resample('W').mean()
filtered_alface_crespa_roca = filtered_alface_crespa_roca.interpolate(method='linear')
filtered_alface_crespa_roca.index = filtered_alface_crespa_roca.index.tz_localize("UTC")
filtered_alface_crespa_roca.sort_values(by=['dt'], inplace=True)
filtered_alface_crespa_roca["first_day_week"] = filtered_alface_crespa_roca.index - filtered_alface_crespa_roca.index.weekday * timedelta(days=1)

In [34]:
weather_df = pd.read_csv('../processed_data/weather_2016_2023.csv', index_col=0)
weather_df.index = pd.to_datetime(weather_df.index, utc=True)
for col in weather_df.columns:
    weather_df[col] = weather_df[col].replace(-9999, None)
weather_df = weather_df[(weather_df.index >= '2016-04-24') & (weather_df.index <= '2023-09-24')]
weather_weekly_df = pd.DataFrame(weather_df[[ 'PRECIPITAÇÃO TOTAL, HORÁRIO (mm)', 'RADIACAO GLOBAL (Kj/m²)', 'TEMPERATURA DO PONTO DE ORVALHO (°C)']].resample('W').mean())



weather_weekly_df["first_day_week"] = (weather_weekly_df.index - weather_weekly_df.index.weekday * timedelta(days=1))

In [35]:
#weather_weekly_df['RADIAÇÃO GLOBAL (Kj/m²)'] = weather_df['RADIACAO GLOBAL (Kj/m²)'].resample('W').max()
weather_weekly_df['RADIACAO GLOBAL (Kj/m²)']=weather_df['RADIACAO GLOBAL (Kj/m²)'].resample('W').max()

In [36]:
price_weather_df = pd.merge(filtered_alface_crespa_roca, weather_weekly_df, on='first_day_week', how='left')
price_weather_df.index = filtered_alface_crespa_roca.index

In [37]:
model = LSTM1()
model.run(price_weather_df, ['Preco_unitario', 'TEMPERATURA DO PONTO DE ORVALHO (°C)'], 8, 8, 8)

train_size: 272
validation_size: 38
test_size: 78
values_train: (272, 5)
values_validation: (38, 5)
values_test: (78, 5)
train_percentage: 0.7010309278350515
validation_percentage: 0.0979381443298969
test_percentage: 0.20103092783505155
(257, 1, 16) (257, 8) (63, 1, 16) (63, 8)
Epoch 1/100
4/4 - 4s - loss: 0.0965 - val_loss: 0.4940 - 4s/epoch - 998ms/step
Epoch 2/100
4/4 - 0s - loss: 0.0685 - val_loss: 0.4216 - 74ms/epoch - 18ms/step
Epoch 3/100
4/4 - 0s - loss: 0.0463 - val_loss: 0.3525 - 68ms/epoch - 17ms/step
Epoch 4/100
4/4 - 0s - loss: 0.0314 - val_loss: 0.2908 - 68ms/epoch - 17ms/step
Epoch 5/100
4/4 - 0s - loss: 0.0255 - val_loss: 0.2452 - 64ms/epoch - 16ms/step
Epoch 6/100
4/4 - 0s - loss: 0.0267 - val_loss: 0.2236 - 71ms/epoch - 18ms/step
Epoch 7/100
4/4 - 0s - loss: 0.0275 - val_loss: 0.2234 - 70ms/epoch - 17ms/step
Epoch 8/100
4/4 - 0s - loss: 0.0263 - val_loss: 0.2360 - 67ms/epoch - 17ms/step
Epoch 9/100
4/4 - 0s - loss: 0.0251 - val_loss: 0.2532 - 71ms/epoch - 18ms/step
Ep

In [38]:
model.rmse_by_timestep

Unnamed: 0,RMSE
1,0.157622
2,0.262465
3,0.364574
4,0.419857
5,0.460678
6,0.50188
7,0.526113
8,0.548025


In [39]:
all_rmses = []
rmses_by_in_size = []
for i in range(1, 8):
    model.run(price_weather_df, ['Preco_unitario', 'TEMPERATURA DO PONTO DE ORVALHO (°C)'], i, 8, 1)
    rmses_by_in_size.append(model.rmse)
    all_rmses.append(model.rmse_by_timestep)



train_size: 272
validation_size: 38
test_size: 78
values_train: (272, 5)
values_validation: (38, 5)
values_test: (78, 5)
train_percentage: 0.7010309278350515
validation_percentage: 0.0979381443298969
test_percentage: 0.20103092783505155
(264, 1, 9) (264, 1) (70, 1, 9) (70, 1)
Epoch 1/100
4/4 - 4s - loss: 0.0852 - val_loss: 0.3897 - 4s/epoch - 956ms/step
Epoch 2/100
4/4 - 0s - loss: 0.0493 - val_loss: 0.2762 - 64ms/epoch - 16ms/step
Epoch 3/100
4/4 - 0s - loss: 0.0278 - val_loss: 0.2008 - 65ms/epoch - 16ms/step
Epoch 4/100
4/4 - 0s - loss: 0.0186 - val_loss: 0.1611 - 68ms/epoch - 17ms/step
Epoch 5/100
4/4 - 0s - loss: 0.0201 - val_loss: 0.1459 - 74ms/epoch - 19ms/step
Epoch 6/100
4/4 - 0s - loss: 0.0219 - val_loss: 0.1411 - 73ms/epoch - 18ms/step
Epoch 7/100
4/4 - 0s - loss: 0.0210 - val_loss: 0.1409 - 66ms/epoch - 16ms/step
Epoch 8/100
4/4 - 0s - loss: 0.0190 - val_loss: 0.1447 - 64ms/epoch - 16ms/step
Epoch 9/100
4/4 - 0s - loss: 0.0171 - val_loss: 0.1507 - 66ms/epoch - 17ms/step
Epoc

In [40]:
all_rmses

[       RMSE
 1  0.472371,
        RMSE
 1  0.483312,
        RMSE
 1  0.483308,
        RMSE
 1  0.485603,
       RMSE
 1  0.48516,
        RMSE
 1  0.480003,
      RMSE
 1  0.4815]

In [41]:
rmses_by_in_size

[0.13189621919065767,
 0.13630291478285889,
 0.1444327155707161,
 0.1465708298718512,
 0.13870261270701553,
 0.13903656925504201,
 0.14226822771545]