RNN Model Evaluation

In [18]:
import torch
import torch.nn.functional as F
import pandas as pd
import numpy as np
import os
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm
from datetime import datetime
from auction_predictor import AuctionPredictor
from auction_dataset import AuctionDataset

pd.options.display.max_columns = None
pd.options.display.width = None

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu



Load and Preprocess Data

In [19]:
items = pd.read_csv('../data/items.csv')
print("Items shape:", items.shape)
n_items = len(items)
item_to_index = {item_id: i + 2 for i, item_id in enumerate(items['item_id'])}
item_to_index[0] = 0 
item_to_index[1] = 1  
print(f"Number of unique items: {n_items}")
historical_prices_path = '../data/historical_prices.csv'
if not os.path.exists(historical_prices_path):
    historical_prices_path = 'historical_prices.csv'

try:
    weekly_historical_prices = pd.read_csv(historical_prices_path)
    weekly_historical_prices['datetime'] = weekly_historical_prices['datetime'].astype(str)
    weekly_historical_prices.set_index(['item_id', 'datetime'], inplace=True)
    print('Historical prices loaded successfully.')
except FileNotFoundError:
    print(f'Error: The historical prices file {historical_prices_path} was not found.')
    weekly_historical_prices = pd.DataFrame(columns=['item_id', 'datetime', 'price'])

Items shape: (10396, 13)
Number of unique items: 10396
Historical prices loaded successfully.


In [20]:
import os
import json
import numpy as np
from tqdm import tqdm
from datetime import datetime

time_left_mapping = {
    'VERY_LONG': 48,
    'LONG': 12,
    'MEDIUM': 2,
    'SHORT': 0.5
}

def process_auction_data(auctions):
    processed_data = []
    
    for auction in auctions:
        if not isinstance(auction, dict) or 'item' not in auction or 'id' not in auction['item']:
            print(f"Unexpected structure in auction: {auction}")
            continue

        item_id = auction['item']['id']
        time_left_numeric = time_left_mapping.get(auction['time_left'], 0)
        
        processed_auction = [
            auction['bid'], 
            auction['buyout'], 
            auction['quantity'], 
            item_id,  
            time_left_numeric, 
            auction['quantity'],  
            auction['buyout']  
        ]
        processed_data.append(processed_auction)
    return np.array(processed_data)

def load_auctions_from_sample(data_dir='sample/'):
    file_info = {}

    for root, dirs, files in os.walk(data_dir):
        for filename in tqdm(files):
            filepath = os.path.join(root, filename)
            date = datetime.strptime(filename.split('.')[0], '%Y%m%dT%H')
            file_info[filepath] = date

    file_info = {k: v for k, v in sorted(file_info.items(), key=lambda item: item[1])}
    
    all_auctions = []
    
    for filepath in list(file_info.keys()):
        with open(filepath, 'r') as f:
            try:
                json_data = json.load(f)
                
                if 'auctions' not in json_data:
                    print(f"File {filepath} does not contain 'auctions' key, skipping.")
                    continue
                
                auction_data = json_data['auctions']
                
                if not auction_data:
                    print(f"File {filepath} is empty, skipping.")
                    continue
                
                all_auctions.extend(auction_data)
            except json.JSONDecodeError as e:
                print(f"Error loading file {filepath}: {e}")
                continue
            except Exception as e:
                print(f"Unexpected error loading file {filepath}: {e}")
                continue

    return all_auctions

data_dir = 'sample/'
auction_data = load_auctions_from_sample(data_dir)
data = process_auction_data(auction_data)

print(f"Processed {len(data)} auctions.")
print(f"Example of processed auction: {data[0]}")

0it [00:00, ?it/s]
100%|██████████| 24/24 [00:00<00:00, 30030.82it/s]
100%|██████████| 24/24 [00:00<00:00, 51834.86it/s]


Error loading file sample/26-08-2024/20240826T22.json: Expecting value: line 1 column 1 (char 0)
Processed 354499 auctions.
Example of processed auction: [2.384481e+06 2.509979e+06 1.000000e+00 5.274800e+04 5.000000e-01
 1.000000e+00 2.509979e+06]


In [21]:
file_path = '../data/items.csv'

try:
    items = pd.read_csv(file_path)
    n_items = len(items)
    
    item_to_index = {item_id: i + 2 for i, item_id in enumerate(items['item_id'])}
    item_to_index[0] = 0
    item_to_index[1] = 1
    
    print(f"Number of unique items: {n_items}")
except FileNotFoundError:
    print(f"Error: File 'items.csv' not found at the specified path: {file_path}")

Number of unique items: 10396


In [25]:
embedding_size = 64
encoder_hidden_size = 128
decoder_hidden_size = 128
epochs = 10
n_items = 3

model = AuctionPredictor(
    n_items=n_items,             
    input_size=6,                   
    encoder_hidden_size=encoder_hidden_size,
    decoder_hidden_size=decoder_hidden_size,
    item_index=3,                   
    embedding_size=embedding_size,
    dropout_p=0.1,
    bidirectional=False
).to(device)

print(f'Number of model parameters: {sum(p.numel() for p in model.parameters())}')


Number of model parameters: 509249


In [None]:
model_path = 'models/rnn_model.pt'
if not os.path.exists(model_path):
    model_path = '../eval/models/rnn_model.pt'  
try:
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()  
    print('Pre-trained RNN model loaded successfully.')
except FileNotFoundError:
    print(f'Error: The model file {model_path} was not found.')
except Exception as e:
    print(f'An error occurred while loading the model: {str(e)}')

def evaluate_rnn(model, data):
    predictions = []
    actual_values = []

    for X, y in data:
        prediction = model.predict(X)  
        predictions.append(prediction)
        actual_values.append(y)
    predictions = np.array(predictions)
    actual_values = np.array(actual_values)

    mae = mean_absolute_error(actual_values, predictions)
    return mae
rnn_mae = evaluate_rnn(model, data)
print(f'RNN Model MAE: {rnn_mae}')