In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.preprocessing import StandardScaler




In [3]:
amostra_df = pd.read_csv(f"Tabelas/amostra_series_df.csv")
amostra_df = amostra_df.set_index("Data da Coleta")
amostra_df.head()

Unnamed: 0_level_0,Regiao - Sigla,Estado - Sigla,Municipio,Produto,Valor de Venda,Unidade de Medida,Bandeira,Ano,Mes,Dia
Data da Coleta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2012-01-02,S,RS,TRAMANDAI,ETANOL,2.499,Litro,IPIRANGA,2012,1,2
2012-01-02,S,RS,NOVO HAMBURGO,DIESEL,2.049,Litro,PETROBRAS DISTRIBUIDORA S.A.,2012,1,2
2012-01-02,SE,SP,TUPA,GASOLINA,2.79,Litro,RAIZEN,2012,1,2
2012-01-02,SE,SP,ARARAQUARA,GASOLINA,2.529,Litro,BRANCA,2012,1,2
2012-01-02,NE,BA,CAMACARI,DIESEL,1.98,Litro,RAIZEN,2012,1,2


In [4]:
machine_learning_df = pd.DataFrame()
machine_learning_df[['Ano', 'Mes', 'Dia']] = amostra_df[['Ano', 'Mes', 'Dia']]

In [5]:
media_valor = amostra_df['Valor de Venda'].mean()
desvio_padrao_valor = amostra_df['Valor de Venda'].std()

def padronizar_valor(valor):
    return (valor - media_valor) / desvio_padrao_valor

machine_learning_df['Valor de Venda'] = amostra_df['Valor de Venda'].apply(padronizar_valor)

colunas_para_padronizar = amostra_df[['Regiao - Sigla', 'Estado - Sigla', 'Produto']]

dummy_columns = pd.get_dummies(colunas_para_padronizar)

machine_learning_df = pd.concat([machine_learning_df, dummy_columns], axis=1)

column_order = ['Valor de Venda'] + [col for col in machine_learning_df.columns if col != 'Valor de Venda']
machine_learning_df = machine_learning_df[column_order]

In [6]:
machine_learning_df.head()

Unnamed: 0_level_0,Valor de Venda,Ano,Mes,Dia,Regiao - Sigla_CO,Regiao - Sigla_N,Regiao - Sigla_NE,Regiao - Sigla_S,Regiao - Sigla_SE,Estado - Sigla_AC,...,Estado - Sigla_SE,Estado - Sigla_SP,Estado - Sigla_TO,Produto_DIESEL,Produto_DIESEL S10,Produto_DIESEL S50,Produto_ETANOL,Produto_GASOLINA,Produto_GASOLINA ADITIVADA,Produto_GNV
Data da Coleta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-02,-0.730595,2012,1,2,False,False,False,True,False,False,...,False,False,False,False,False,False,True,False,False,False
2012-01-02,-1.088245,2012,1,2,False,False,False,True,False,False,...,False,False,False,True,False,False,False,False,False,False
2012-01-02,-0.499314,2012,1,2,False,False,False,False,True,False,...,False,True,False,False,False,False,False,True,False,False
2012-01-02,-0.706751,2012,1,2,False,False,False,False,True,False,...,False,True,False,False,False,False,False,True,False,False
2012-01-02,-1.143084,2012,1,2,False,False,True,False,False,False,...,False,False,False,True,False,False,False,False,False,False


In [7]:
X = machine_learning_df.drop('Valor de Venda', axis=1)
y = machine_learning_df['Valor de Venda']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
X_train.head()

Unnamed: 0_level_0,Ano,Mes,Dia,Regiao - Sigla_CO,Regiao - Sigla_N,Regiao - Sigla_NE,Regiao - Sigla_S,Regiao - Sigla_SE,Estado - Sigla_AC,Estado - Sigla_AL,...,Estado - Sigla_SE,Estado - Sigla_SP,Estado - Sigla_TO,Produto_DIESEL,Produto_DIESEL S10,Produto_DIESEL S50,Produto_ETANOL,Produto_GASOLINA,Produto_GASOLINA ADITIVADA,Produto_GNV
Data da Coleta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-09,2018,1,9,False,False,False,False,True,False,False,...,False,True,False,False,False,False,True,False,False,False
2022-07-25,2022,7,25,False,False,True,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
2015-03-12,2015,3,12,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2014-11-18,2014,11,18,True,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
2019-06-03,2019,6,3,False,False,False,True,False,False,False,...,False,False,False,False,True,False,False,False,False,False


In [9]:
X_train = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1)

In [10]:
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(50, activation='relu'),
    Dense(1)  # Saída única para previsão
])

In [11]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [12]:
X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float32)
X_test_tensor = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_test_tensor = tf.convert_to_tensor(y_test, dtype=tf.float32)

model.fit(X_train_tensor, y_train_tensor, epochs=2, batch_size=32, validation_data=(X_test_tensor, y_test_tensor))

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x1aa6b57f790>

In [13]:
model.evaluate(X_test_tensor, y_test_tensor)



0.6960790753364563

In [14]:
prediction = model.predict(X_test_tensor)
valor_original = (prediction / desvio_padrao_valor) + media_valor
print("Predicted value:", valor_original)

Predicted value: [[3.340769 ]
 [3.6464357]
 [3.1592622]
 ...
 [3.6491153]
 [3.8868346]
 [3.3970468]]


In [15]:
dados_para_previsao = machine_learning_df.copy()

dados_para_previsao['Ano'] = machine_learning_df['Ano'] + 10

dados_para_previsao.head()

Unnamed: 0_level_0,Valor de Venda,Ano,Mes,Dia,Regiao - Sigla_CO,Regiao - Sigla_N,Regiao - Sigla_NE,Regiao - Sigla_S,Regiao - Sigla_SE,Estado - Sigla_AC,...,Estado - Sigla_SE,Estado - Sigla_SP,Estado - Sigla_TO,Produto_DIESEL,Produto_DIESEL S10,Produto_DIESEL S50,Produto_ETANOL,Produto_GASOLINA,Produto_GASOLINA ADITIVADA,Produto_GNV
Data da Coleta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-02,-0.730595,2022,1,2,False,False,False,True,False,False,...,False,False,False,False,False,False,True,False,False,False
2012-01-02,-1.088245,2022,1,2,False,False,False,True,False,False,...,False,False,False,True,False,False,False,False,False,False
2012-01-02,-0.499314,2022,1,2,False,False,False,False,True,False,...,False,True,False,False,False,False,False,True,False,False
2012-01-02,-0.706751,2022,1,2,False,False,False,False,True,False,...,False,True,False,False,False,False,False,True,False,False
2012-01-02,-1.143084,2022,1,2,False,False,True,False,False,False,...,False,False,False,True,False,False,False,False,False,False


In [16]:
from sklearn.preprocessing import StandardScaler


new_data_selected = dados_para_previsao.drop('Valor de Venda', axis=1)

scaler = StandardScaler()
input_features_scaled = scaler.fit_transform(new_data_selected)

input_tensor = tf.convert_to_tensor(input_features_scaled, dtype=tf.float32)


In [17]:
print(scaler)

StandardScaler()


In [18]:
from keras.models import Model
from keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense

input_shape = (X_train.shape[1], 1)  # Shape da entrada (timesteps, input_dim)
inputs = Input(shape=input_shape)
x = Conv1D(filters=64, kernel_size=3, activation='relu')(inputs)
x = MaxPooling1D(pool_size=2)(x)
x = Flatten()(x)
x = Dense(50, activation='relu')(x)
outputs = Dense(1)(x)

model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='mean_squared_error')



In [None]:
import contextlib
from IPython.display import clear_output
import os
import sys

# Suprimir a saída padrão durante o loop de previsão
@contextlib.contextmanager
def suppress_stdout():
    with open(os.devnull, 'w') as fnull:
        old_stdout = sys.stdout
        sys.stdout = fnull
        try:
            yield
        finally:
            sys.stdout = old_stdout

valores_da_previsao = []

for index, row in dados_para_previsao.iterrows():
    input_row = scaler.transform(row.drop('Valor de Venda').values.reshape(1, -1))  # Padronizar a entrada
    input_row_reshaped = input_row.reshape(1, -1, 1)  # Adicionar dimensão de tempo
    prediction = model.predict(input_row_reshaped)  # Fazer a previsão

    # Desfazer a padronização manualmente
    previsao_despadronizada = prediction[0][0] * desvio_padrao_valor + media_valor
    valores_da_previsao.append(previsao_despadronizada)
    
    # Limpar a saída anterior
    clear_output(wait=True)
    
    # Imprimir a previsão atual
    print(f"Previsão para a linha {index}: {previsao_despadronizada:.2f}")

dados_para_previsao['Previsao'] = valores_da_previsao
