# RNN Model Evaluation
This notebook evaluates the performance of the RNN model in predicting the time an item will be on sale, using sample data as input.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm
from datetime import datetime
import os

pd.options.display.max_columns = None
pd.options.display.width = None
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


## Load and Preprocess Data

In [2]:

items = pd.read_csv('../data/items.csv')
print("Items shape:", items.shape)
n_items = len(items)
item_to_index = {item_id: i + 2 for i, item_id in enumerate(items['item_id'])}
item_to_index[0] = 0 
item_to_index[1] = 1  
print(f"Number of unique items: {n_items}")

Items shape: (5569, 13)
Number of unique items: 5569


## Data Preprocessing

In [3]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class Encoder(nn.Module):
    def __init__(self, input_size=5, item_index=3, embedding_size=16, hidden_size=16, dropout_p=0.1, bidirectional=True):
        super(Encoder, self).__init__()

        self.hidden_size = hidden_size
        self.item_index = item_index
        n_items = len(item_to_index)

        self.embedding = nn.Embedding(n_items, embedding_size)
        self.rnn = nn.LSTM(input_size + embedding_size, hidden_size, batch_first=True, num_layers=2, bidirectional=bidirectional)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, X):
        item_ids = X[:, :, self.item_index].long()

        X = torch.cat([X[:, :, :self.item_index], X[:, :, self.item_index + 1:]], dim=2)

        item_embeddings = self.dropout(self.embedding(item_ids))

        X = torch.cat([X, item_embeddings], dim=2)

        output, (hidden, cell) = self.rnn(X)

        return output, (hidden, cell)

class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, bidirectional=True):
        super(Decoder, self).__init__()
        output_size = hidden_size * 2 if bidirectional else hidden_size
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=2, bidirectional=bidirectional)
        self.projection = nn.Sequential(
            nn.Linear(output_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, encoder_outputs, encoder_hidden):
        output, _ = self.rnn(encoder_outputs, encoder_hidden)
        output = self.projection(output)
        return output

class AuctionPredictor(nn.Module):
    def __init__(self, input_size=5, encoder_hidden_size=16, decoder_hidden_size=16, item_index=3, embedding_size=16, dropout_p=0.1, bidirectional=True):
        super(AuctionPredictor, self).__init__()
        decoder_input_size = encoder_hidden_size * 2 if bidirectional else encoder_hidden_size
        self.encoder = Encoder(input_size, item_index, embedding_size, encoder_hidden_size, dropout_p, bidirectional=bidirectional)
        self.decoder = Decoder(decoder_input_size, decoder_hidden_size, bidirectional=bidirectional)

    def forward(self, X):
        encoder_outputs, encoder_hidden = self.encoder(X)
        decoder_outputs = self.decoder(encoder_outputs, encoder_hidden)
        return decoder_outputs

In [4]:

historical_prices_path = '../data/historical_prices.csv'
if not os.path.exists(historical_prices_path):
    historical_prices_path = 'historical_prices.csv'

try:

    weekly_historical_prices = pd.read_csv(historical_prices_path)
    weekly_historical_prices['datetime'] = weekly_historical_prices['datetime'].astype(str)
    weekly_historical_prices.set_index(['item_id', 'datetime'], inplace=True)
    
    print('Historical prices loaded successfully.')
except FileNotFoundError:
    print(f'Error: The historical prices file {historical_prices_path} was not found.')
    weekly_historical_prices = pd.DataFrame(columns=['item_id', 'datetime', 'price'])

Historical prices loaded successfully.


In [5]:
class AuctionDataset(torch.utils.data.Dataset):
    def __init__(self, sample_data_path, item_to_index, weekly_historical_prices):
        self.sample_data_path = sample_data_path
        self.item_to_index = item_to_index
        self.weekly_historical_prices = weekly_historical_prices
        self.sample_dates = os.listdir(sample_data_path)
        self.sample_hours = []
        for date in self.sample_dates:
            self.sample_hours.extend([(date, hour) for hour in os.listdir(os.path.join(sample_data_path, date))])
        self.column_map = {
            'bid': 0,
            'buyout': 1,
            'quantity': 2,
            'item_id': 3,
            'time_left': 4,
            'hours_since_first_appearance': 5,
            'historical_price': 6
        }
        print(f"Dataset size: {len(self)}")
    def __len__(self):
        return len(self.sample_hours)
    def __getitem__(self, idx):
        date, hour = self.sample_hours[idx]
        file_path = os.path.join(self.sample_data_path, date, hour)
        data = torch.load(file_path)
        X = []
        y = []
        for item_id, item_data in data.items():
            item_X = item_data[:, :-1]
            item_y = item_data[:, -1]
            datetime_str = f"{date} 00:00:00"
            if (item_id, datetime_str) in self.weekly_historical_prices.index:
                historical_price = self.weekly_historical_prices.loc[item_id, datetime_str]['price']
            else:
                historical_price = item_X[:, self.column_map['buyout']].median()
            item_X = torch.cat([item_X, torch.ones(item_X.shape[0], 1) * historical_price], dim=1)
            item_X[:, self.column_map['bid']] = item_X[:, self.column_map['bid']] * 10000
            item_X[:, self.column_map['buyout']] = item_X[:, self.column_map['buyout']] * 10000
            item_X[:, self.column_map['item_id']] = torch.tensor([self.item_to_index.get(int(item), 1) for item in item_X[:, self.column_map['item_id']]], dtype=torch.long)
            item_X[:, self.column_map['time_left']] = item_X[:, self.column_map['time_left']] / 48.0
            item_X[:, self.column_map['hours_since_first_appearance']] = item_X[:, self.column_map['hours_since_first_appearance']] / 48.0
            item_X[:, self.column_map['bid']] = item_X[:, self.column_map['bid']] / 1000
            item_X[:, self.column_map['buyout']] = item_X[:, self.column_map['buyout']] / 1000
            item_X[:, self.column_map['historical_price']] = item_X[:, self.column_map['historical_price']] / 1000
            item_X[:, self.column_map['quantity']] = item_X[:, self.column_map['quantity']] / 200.0
            X.append(item_X)
            y.append(item_y)
        return X, y

In [6]:
def collate_auctions(batch):
    all_X = []
    all_y = []
    for X, y in batch:
        all_X.extend(X)
        all_y.extend(y)
    lengths = [x.size(0) for x in all_X]
    max_length = max(lengths)
    padded_X = [F.pad(x, (0, 0, 0, max_length - x.size(0))) for x in all_X]
    padded_y = [F.pad(y, (0, max_length - y.size(0))) for y in all_y]
    X = torch.stack(padded_X)
    y = torch.stack(padded_y)
    return X, y

In [7]:

dataset = AuctionDataset('sample', item_to_index, weekly_historical_prices)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, collate_fn=collate_auctions)

Dataset size: 48


In [8]:

embedding_size = 64
encoder_hidden_size = 128
decoder_hidden_size = 128
model = AuctionPredictor(input_size=7,
                         encoder_hidden_size=encoder_hidden_size,
                         decoder_hidden_size=decoder_hidden_size,
                         item_index=3,
                         embedding_size=embedding_size,
                         dropout_p=0.1,
                         bidirectional=False).to(device)
model_path = 'models/rnn_model.pt'
if not os.path.exists(model_path):
    model_path = '../eval/models/rnn_model.pt'

try:
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    print('Pre-trained RNN model loaded successfully.')
except FileNotFoundError:
    print(f'Error: The model file {model_path} was not found.')
except Exception as e:
    print(f'An error occurred while loading the model: {str(e)}')

An error occurred while loading the model: Error(s) in loading state_dict for AuctionPredictor:
	size mismatch for encoder.embedding.weight: copying a param with shape torch.Size([10398, 64]) from checkpoint, the shape in current model is torch.Size([5571, 64]).
	size mismatch for encoder.rnn.weight_ih_l0: copying a param with shape torch.Size([512, 70]) from checkpoint, the shape in current model is torch.Size([512, 71]).


  checkpoint = torch.load(model_path, map_location=device)


In [9]:
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm

def evaluate_rnn(model, dataloader):
    model.eval()
    predictions = []
    actual_values = []

    with torch.no_grad():
        for X, y in tqdm(dataloader):
            X = X.to(device)
            y = y.to(device)
            output = model(X)
            predictions.extend(output.squeeze().cpu().numpy().flatten())
            actual_values.extend(y.cpu().numpy().flatten())

    predictions = np.array(predictions)
    actual_values = np.array(actual_values)
    mask = actual_values != 0
    mae = mean_absolute_error(actual_values[mask], predictions[mask])
    return mae, predictions[mask], actual_values[mask]

rnn_mae, rnn_predictions, actual_values = evaluate_rnn(model, dataloader)
print(f'RNN Model MAE: {rnn_mae}')

  data = torch.load(file_path)
  0%|          | 0/2 [00:00<?, ?it/s]


UnpicklingError: invalid load key, '{'.