In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [30]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

In [5]:
csv_path = '/content/drive/MyDrive/Modulo11/avocado.csv'


In [6]:
df = pd.read_csv(csv_path)
df.head()

Unnamed: 0.1,Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
1,1,2015-12-20,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2,2,2015-12-13,0.93,118220.22,794.7,109149.67,130.5,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
3,3,2015-12-06,1.08,78992.15,1132.0,71976.41,72.58,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
4,4,2015-11-29,1.28,51039.6,941.48,43838.39,75.78,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


In [11]:
# Selecionar a coluna de preços
prices = df['AveragePrice'].values.reshape(-1, 1)

# Normalizar os preços
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_prices = scaler.fit_transform(prices)


In [24]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

# Ex: usar os últimos 10 preços pra prever o próximo
seq_length = 10
X, y = create_sequences(scaled_prices, seq_length)

# Transformar em tensores
X = torch.tensor(X, dtype=torch.float32)  # [amostras, seq_len, 1]
y = torch.tensor(y, dtype=torch.float32)


In [25]:
class AvocadoPriceRNN(nn.Module):
    def __init__(self):
        super(AvocadoPriceRNN, self).__init__()
        self.rnn = nn.RNN(input_size=1, hidden_size=50, num_layers=2, batch_first=True)
        self.fc = nn.Linear(50, 1)

    def forward(self, x):
        h0 = torch.zeros(2, x.size(0), 50).to(x.device)  # inicializa hidden state
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])  # pega apenas o último tempo
        return out


In [26]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [27]:
model = AvocadoPriceRNN().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Mover dados para o mesmo device
X = X.to(device)
y = y.to(device)


In [28]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    outputs = model(X)
    loss = criterion(outputs, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Época [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Época [10/100], Loss: 0.0416
Época [20/100], Loss: 0.0230
Época [30/100], Loss: 0.0170
Época [40/100], Loss: 0.0152
Época [50/100], Loss: 0.0133
Época [60/100], Loss: 0.0101
Época [70/100], Loss: 0.0054
Época [80/100], Loss: 0.0050
Época [90/100], Loss: 0.0045
Época [100/100], Loss: 0.0042


In [34]:
model.eval()
with torch.no_grad():
    predicted = model(X).cpu().numpy()
    real = y.cpu().numpy()

# Reverter a normalização
real_prices = scaler.inverse_transform(real)
predicted_prices = scaler.inverse_transform(predicted)

# Ajustar datas (pulamos os primeiros `seq_length` dias na criação das janelas)
datas_reais = df['Date'].iloc[seq_length:].reset_index(drop=True)

# Criar DataFrame
tabela_resultado = pd.DataFrame({
    'Data': datas_reais,
    'Preço Real': real_prices.flatten(),
    'Preço Previsto': predicted_prices.flatten()
})

# Mostrar as primeiras linhas
tabela_resultado.head(20)

Unnamed: 0,Data,Preço Real,Preço Previsto
0,2015-10-18,1.12,1.073683
1,2015-10-11,1.28,1.093516
2,2015-10-04,1.31,1.150388
3,2015-09-27,0.99,1.216649
4,2015-09-20,1.33,1.186153
5,2015-09-13,1.28,1.18357
6,2015-09-06,1.11,1.232997
7,2015-08-30,1.07,1.219779
8,2015-08-23,1.34,1.17491
9,2015-08-16,1.33,1.202599
