In [17]:
import torch
import torch.nn.functional as F
import pandas as pd
import numpy as np
import os
import json
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm
from datetime import datetime
from auction_predictor import AuctionPredictor
from auction_dataset import AuctionDataset

pd.options.display.max_columns = None
pd.options.display.width = None

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [18]:
items = pd.read_csv('../data/items.csv')
print("Items shape:", items.shape)
n_items = len(items)
item_to_index = {item_id: i + 2 for i, item_id in enumerate(items['item_id'])}
item_to_index[0] = 0 
item_to_index[1] = 1  
print(f"Number of unique items: {n_items}")

historical_prices_path = '../data/historical_prices.csv'
if not os.path.exists(historical_prices_path):
    historical_prices_path = 'historical_prices.csv'

try:
    weekly_historical_prices = pd.read_csv(historical_prices_path)
    weekly_historical_prices['datetime'] = weekly_historical_prices['datetime'].astype(str)
    weekly_historical_prices.set_index(['item_id', 'datetime'], inplace=True)
    print('Historical prices loaded successfully.')
except FileNotFoundError:
    print(f'Error: The historical prices file {historical_prices_path} was not found.')
    weekly_historical_prices = pd.DataFrame(columns=['item_id', 'datetime', 'price'])

time_left_mapping = {
    'VERY_LONG': 48,
    'LONG': 12,
    'MEDIUM': 2,
    'SHORT': 0.5
}

Items shape: (10396, 13)
Number of unique items: 10396
Historical prices loaded successfully.


In [19]:
def process_auction_data(auctions, max_auctions_per_item=1000):
    auctions_by_item = {}
    
    for auction in auctions:
        if not isinstance(auction, dict) or 'item' not in auction or 'id' not in auction['item']:
            print(f"Unexpected structure in auction: {auction}")
            continue

        item_id = auction['item']['id']
        time_left_numeric = time_left_mapping.get(auction['time_left'], 0)
        bid = auction['bid'] * 10000 / 1000
        buyout = auction['buyout'] * 10000 / 1000
        quantity = auction['quantity'] / 200
        time_left = time_left_numeric / 48
        item_index = item_to_index.get(item_id, 1)
        
    
        hours_since_first_appearance = 0  
        
        
        datetime_str = "2023-08-25 00:00:00" 
        if (item_id, datetime_str) in weekly_historical_prices.index:
            historical_price = weekly_historical_prices.loc[item_id, datetime_str]['price']
        else:
            historical_price = buyout  
        
        processed_auction = [
            bid, 
            buyout,  
            quantity, 
            item_index,
            time_left, 
            hours_since_first_appearance,
            historical_price  
        ]
        
        if item_index not in auctions_by_item:
            auctions_by_item[item_index] = []
        
        if len(auctions_by_item[item_index]) < max_auctions_per_item:
            auctions_by_item[item_index].append(processed_auction)
    
    return auctions_by_item

def load_auctions_from_sample(data_dir='sample/'):
    file_info = {}

    for root, dirs, files in os.walk(data_dir):
        for filename in tqdm(files):
            filepath = os.path.join(root, filename)
            date = datetime.strptime(filename.split('.')[0], '%Y%m%dT%H')
            file_info[filepath] = date

    file_info = {k: v for k, v in sorted(file_info.items(), key=lambda item: item[1])}
    
    all_auctions = []
    
    for filepath in list(file_info.keys()):
        with open(filepath, 'r') as f:
            try:
                json_data = json.load(f)
                
                if 'auctions' not in json_data:
                    print(f"File {filepath} does not contain 'auctions' key, skipping.")
                    continue
                
                auction_data = json_data['auctions']
                
                if not auction_data:
                    print(f"File {filepath} is empty, skipping.")
                    continue
                
                all_auctions.extend(auction_data)
            except json.JSONDecodeError as e:
                print(f"Error loading file {filepath}: {e}")
                continue
            except Exception as e:
                print(f"Unexpected error loading file {filepath}: {e}")
                continue

    return all_auctions

data_dir = 'sample/'
auction_data = load_auctions_from_sample(data_dir)
auctions_by_item = process_auction_data(auction_data)

print(f"Processed auctions for {len(auctions_by_item)} different items.")
print(f"Example of processed auctions for an item: {auctions_by_item[list(auctions_by_item.keys())[0]][0]}")

0it [00:00, ?it/s]
100%|██████████| 24/24 [00:00<00:00, 41171.08it/s]
100%|██████████| 24/24 [00:00<00:00, 69663.18it/s]


Error loading file sample/26-08-2024/20240826T22.json: Expecting value: line 1 column 1 (char 0)
Processed auctions for 1817 different items.
Example of processed auctions for an item: [23844810.0, 25099790.0, 0.005, 1, 0.010416666666666666, 0, 25099790.0]


In [20]:
embedding_size = 64
encoder_hidden_size = 128
decoder_hidden_size = 128
epochs = 10

model = AuctionPredictor(
    n_items=n_items,             
    input_size=7,                   
    encoder_hidden_size=encoder_hidden_size,
    decoder_hidden_size=decoder_hidden_size,
    item_index=3,                   
    embedding_size=embedding_size,
    dropout_p=0.1,
    bidirectional=False
).to(device)

print(f'Number of model parameters: {sum(p.numel() for p in model.parameters())}')

model_path = 'models/rnn_model.pt'
if not os.path.exists(model_path):
    model_path = '../eval/models/rnn_model.pt'  

try:
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()  
    print('Pre-trained RNN model loaded successfully.')
except FileNotFoundError:
    print(f'Error: The model file {model_path} was not found.')
except Exception as e:
    print(f'An error occurred while loading the model: {str(e)}')

Number of model parameters: 1173889
An error occurred while loading the model: Error(s) in loading state_dict for AuctionPredictor:
	size mismatch for encoder.embedding.weight: copying a param with shape torch.Size([10398, 64]) from checkpoint, the shape in current model is torch.Size([10396, 64]).
	size mismatch for encoder.rnn.weight_ih_l0: copying a param with shape torch.Size([512, 70]) from checkpoint, the shape in current model is torch.Size([512, 69]).


  checkpoint = torch.load(model_path, map_location=device)


In [21]:
def evaluate_rnn(model, auctions_by_item):
    all_predictions = []
    all_actual_values = []

    for item_idx, auctions in auctions_by_item.items():
        if not auctions: 
            continue
        auctions_np = np.array(auctions)
        data = auctions_np[:, :-1]  
        y = auctions_np[:, -1]  
        X = torch.tensor(data, dtype=torch.float32).to(device)
        X = X.unsqueeze(0)
        print(f"Item {item_idx}:")
        print(f"Input shape after unsqueeze: {X.shape}")
        with torch.no_grad():
         predictions = model(X)
        print(f"Predictions shape: {predictions.shape}")
        all_predictions.extend(predictions.squeeze(0).cpu().numpy())
        all_actual_values.extend(y)
    if not all_predictions:
        print("No valid auctions were processed. Check your data.")
        return None

    return all_predictions, all_actual_values

In [22]:
def calculate_mae(all_predictions, all_actual_values):
    if len(all_predictions) == 0:
        print("No valid data for MAE calculation.")
        return None
    all_predictions = np.array(all_predictions)
    all_actual_values = np.array(all_actual_values)
    mae = mean_absolute_error(all_actual_values, all_predictions)
    return mae
all_predictions, all_actual_values = evaluate_rnn(model, auctions_by_item)
if all_predictions and all_actual_values:
    rnn_mae = calculate_mae(all_predictions, all_actual_values)
    if rnn_mae is not None:
        print(f'RNN Model MAE: {rnn_mae}')
    else:
        print('Evaluation failed due to lack of valid data.')
else:
    print('No predictions were made.')

Item 1:
Input shape after unsqueeze: torch.Size([1, 1000, 6])
Predictions shape: torch.Size([1, 1000, 1])
Item 9482:
Input shape after unsqueeze: torch.Size([1, 358, 6])
Predictions shape: torch.Size([1, 358, 1])
Item 9788:
Input shape after unsqueeze: torch.Size([1, 323, 6])
Predictions shape: torch.Size([1, 323, 1])
Item 10202:
Input shape after unsqueeze: torch.Size([1, 240, 6])
Predictions shape: torch.Size([1, 240, 1])
Item 9481:
Input shape after unsqueeze: torch.Size([1, 401, 6])
Predictions shape: torch.Size([1, 401, 1])
Item 428:
Input shape after unsqueeze: torch.Size([1, 9, 6])
Predictions shape: torch.Size([1, 9, 1])
Item 1078:
Input shape after unsqueeze: torch.Size([1, 15, 6])
Predictions shape: torch.Size([1, 15, 1])
Item 427:
Input shape after unsqueeze: torch.Size([1, 9, 6])
Predictions shape: torch.Size([1, 9, 1])
Item 1081:
Input shape after unsqueeze: torch.Size([1, 3, 6])
Predictions shape: torch.Size([1, 3, 1])
Item 1928:
Input shape after unsqueeze: torch.Size([1