In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os
import gc

# Define the NeighborBasedLSTM model
class NeighborBasedLSTM(nn.Module):
    def __init__(self, input_dim, temporal_features_dim, property_features_dim, 
                 max_neighbors=5, lstm_hidden_dim=16, hidden_dim=64, dropout=0.3):
        super(NeighborBasedLSTM, self).__init__()
        
        self.max_neighbors = max_neighbors
        self.lstm_hidden_dim = lstm_hidden_dim
        
        # LSTM for processing neighbor price histories
        self.neighbor_lstm = nn.LSTM(
            input_size=1,  # Single feature per timestep (price)
            hidden_size=lstm_hidden_dim,
            num_layers=1,
            batch_first=True,
            bidirectional=True
        )
        
        # Attention mechanism to weight neighbor contributions
        self.attention = nn.Linear(lstm_hidden_dim * 2, 1)
        
        # Processing for property features
        self.property_layer1 = nn.Linear(property_features_dim, hidden_dim)
        self.property_bn1 = nn.BatchNorm1d(hidden_dim)
        self.property_layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.property_bn2 = nn.BatchNorm1d(hidden_dim)
        
        # Processing for temporal features
        self.temporal_layer1 = nn.Linear(temporal_features_dim, hidden_dim)
        self.temporal_bn1 = nn.BatchNorm1d(hidden_dim)
        self.temporal_layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.temporal_bn2 = nn.BatchNorm1d(hidden_dim)
        
        # Feature fusion layer
        combined_dim = hidden_dim * 2 + lstm_hidden_dim * 2
        self.fusion_layer = nn.Linear(combined_dim, hidden_dim)
        self.fusion_bn = nn.BatchNorm1d(hidden_dim)
        
        # Output layers
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc1_bn = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, data):
        # Unpack the data
        property_features = data.property_features
        temporal_features = data.temporal_features
        neighbor_histories = data.neighbor_histories
        neighbor_mask = data.neighbor_mask
        
        batch_size = property_features.shape[0]
        
        # Process property features with residual connection
        prop_out = F.relu(self.property_layer1(property_features))
        prop_out = self.property_bn1(prop_out)
        prop_out = self.dropout(prop_out)
        prop_out_res = prop_out
        prop_out = F.relu(self.property_layer2(prop_out))
        prop_out = self.property_bn2(prop_out)
        prop_out = prop_out + prop_out_res  # Residual connection
        
        # Process temporal features with residual connection
        temp_out = F.relu(self.temporal_layer1(temporal_features))
        temp_out = self.temporal_bn1(temp_out)
        temp_out = self.dropout(temp_out)
        temp_out_res = temp_out
        temp_out = F.relu(self.temporal_layer2(temp_out))
        temp_out = self.temporal_bn2(temp_out)
        temp_out = temp_out + temp_out_res  # Residual connection
        
        # Process neighbor histories with LSTM
        # Reshape: [batch, max_neighbors, seq_len] -> [batch*max_neighbors, seq_len, 1]
        seq_len = neighbor_histories.size(2)
        reshaped_histories = neighbor_histories.view(batch_size * self.max_neighbors, seq_len, 1)
        
        # Pass through LSTM
        lstm_out, (h_n, _) = self.neighbor_lstm(reshaped_histories)
        
        # Get final hidden states from both directions
        h_forward = h_n[0]  # [batch*max_neighbors, lstm_hidden_dim]
        h_backward = h_n[1]  # [batch*max_neighbors, lstm_hidden_dim]
        h_combined = torch.cat([h_forward, h_backward], dim=1)  # [batch*max_neighbors, lstm_hidden_dim*2]
        
        # Reshape to [batch, max_neighbors, lstm_hidden_dim*2]
        h_combined = h_combined.view(batch_size, self.max_neighbors, -1)
        
        # Apply attention to weight the neighbors' contributions
        attention_scores = self.attention(h_combined)  # [batch, max_neighbors, 1]
        attention_scores = attention_scores.squeeze(-1)  # [batch, max_neighbors]
        
        # Apply mask to ignore padding (non-existent neighbors)
        attention_scores = attention_scores.masked_fill(~neighbor_mask, -1e9)
        attention_weights = F.softmax(attention_scores, dim=1).unsqueeze(-1)  # [batch, max_neighbors, 1]
        
        # Apply attention weights to neighbor features
        weighted_features = h_combined * attention_weights  # [batch, max_neighbors, lstm_hidden_dim*2]
        
        # Sum across all neighbors
        neighbor_context = weighted_features.sum(dim=1)  # [batch, lstm_hidden_dim*2]
        
        # Combine all features
        combined = torch.cat([prop_out, temp_out, neighbor_context], dim=1)
        combined = F.relu(self.fusion_layer(combined))
        combined = self.fusion_bn(combined)
        combined = self.dropout(combined)
        
        # Final prediction layers
        out = F.relu(self.fc1(combined))
        out = self.fc1_bn(out)
        out = self.dropout(out)
        price_prediction = self.fc2(out)
        
        return price_prediction

def load_neighbor_data(neighbor_csv_path):
    """
    Load neighbor relationships from CSV file
    """
    print(f"Loading neighbor data from {neighbor_csv_path}")
    neighbor_df = pd.read_csv(neighbor_csv_path)
    
    # Create a dictionary mapping test_listing_id to its neighbors
    neighbor_dict = {}
    
    for _, row in neighbor_df.iterrows():
        test_id = row['test_listing_id']
        neighbor_id = row['neighbor_listing_id']
        rank = row['rank']
        distance = row['distance']
        
        if test_id not in neighbor_dict:
            neighbor_dict[test_id] = []
        
        neighbor_dict[test_id].append({
            'neighbor_id': neighbor_id,
            'rank': rank,
            'distance': distance
        })
    
    # Sort neighbors by rank for each test listing
    for test_id in neighbor_dict:
        neighbor_dict[test_id] = sorted(neighbor_dict[test_id], key=lambda x: x['rank'])
    
    print(f"Loaded neighbor data for {len(neighbor_dict)} test listings")
    return neighbor_dict

def extract_price_history(listing_data, date, seq_length=30):
    """
    Extract price history for a listing up to a specific date
    """
    previous_data = listing_data[listing_data['date'] < date].sort_values('date', ascending=False)
    
    # Extract prices
    price_history = []
    for _, row in previous_data.head(seq_length).iterrows():
        price_history.append(row['price'])
    
    # Pad if needed
    if len(price_history) < seq_length:
        padding = [price_history[-1] if price_history else 0] * (seq_length - len(price_history))
        price_history.extend(padding)
    
    # Keep only the most recent seq_length prices and reverse to chronological order
    price_history = price_history[:seq_length]
    price_history.reverse()
    
    return price_history

def prepare_neighbor_data_batch(test_data, train_data, neighbor_dict, max_neighbors=5, seq_length=30):
    """
    Prepare batched neighbor data for all test instances
    """
    print("Preparing neighbor data batch...")
    
    # Create lookup dictionary for train listings
    train_listings_dict = {}
    for listing_id in train_data['listing_id'].unique():
        listing_data = train_data[train_data['listing_id'] == listing_id].sort_values('date')
        train_listings_dict[listing_id] = listing_data
    
    # Initialize arrays to store neighbor histories and masks
    test_size = len(test_data)
    neighbor_histories = np.zeros((test_size, max_neighbors, seq_length), dtype=np.float32)
    neighbor_masks = np.zeros((test_size, max_neighbors), dtype=bool)
    
    # Process each test instance
    for idx, (_, test_row) in enumerate(test_data.iterrows()):
        test_id = test_row['listing_id']
        test_date = test_row['date']
        
        if test_id not in neighbor_dict:
            continue  # Skip if no neighbors found
        
        # Get neighbors for this test listing
        neighbors = neighbor_dict[test_id][:max_neighbors]
        
        # Process each neighbor
        for n_idx, neighbor in enumerate(neighbors):
            if n_idx >= max_neighbors:
                break
                
            neighbor_id = neighbor['neighbor_id']
            
            # Only use neighbors from the training set
            if neighbor_id in train_listings_dict:
                neighbor_data = train_listings_dict[neighbor_id]
                
                # Extract price history
                price_history = extract_price_history(neighbor_data, test_date, seq_length)
                
                # Store data
                neighbor_histories[idx, n_idx] = price_history
                neighbor_masks[idx, n_idx] = True
    
    print(f"Prepared neighbor data for {test_size} test instances")
    return neighbor_histories, neighbor_masks

def prepare_data_for_neighbor_lstm(train_data, test_data, neighbor_dict, property_features, 
                                 temporal_features, property_scaler=None, temporal_scaler=None, 
                                 target_scaler=None, max_neighbors=5, seq_length=30):
    """
    Prepare data for the NeighborBasedLSTM model
    """
    print("Preparing data for NeighborBasedLSTM...")
    
    # Initialize or use provided scalers
    if property_scaler is None:
        property_scaler = StandardScaler()
        property_scaler.fit(train_data[property_features])
    
    if temporal_scaler is None:
        temporal_scaler = StandardScaler()
        temporal_scaler.fit(train_data[temporal_features])
    
    if target_scaler is None:
        target_scaler = StandardScaler()
        target_scaler.fit(train_data['price'].values.reshape(-1, 1))
    
    # Scale property features
    X_train_property = property_scaler.transform(train_data[property_features]).astype(np.float32)
    X_test_property = property_scaler.transform(test_data[property_features]).astype(np.float32)
    
    # Scale temporal features
    X_train_temporal = temporal_scaler.transform(train_data[temporal_features]).astype(np.float32)
    X_test_temporal = temporal_scaler.transform(test_data[temporal_features]).astype(np.float32)
    
    # Prepare neighbor histories for test data
    neighbor_histories, neighbor_masks = prepare_neighbor_data_batch(
        test_data, train_data, neighbor_dict, max_neighbors, seq_length
    )
    
    # Scale the target variable
    y_train = target_scaler.transform(train_data['price'].values.reshape(-1, 1)).flatten().astype(np.float32)
    y_test = target_scaler.transform(test_data['price'].values.reshape(-1, 1)).flatten().astype(np.float32)
    
    # Create data objects
    train_data_obj = Data(
        property_features=torch.FloatTensor(X_train_property),
        temporal_features=torch.FloatTensor(X_train_temporal),
        y=torch.FloatTensor(y_train.reshape(-1, 1))
    )
    
    test_data_obj = Data(
        property_features=torch.FloatTensor(X_test_property),
        temporal_features=torch.FloatTensor(X_test_temporal),
        neighbor_histories=torch.FloatTensor(neighbor_histories),
        neighbor_mask=torch.BoolTensor(neighbor_masks),
        y=torch.FloatTensor(y_test.reshape(-1, 1))
    )
    
    return train_data_obj, test_data_obj, property_scaler, temporal_scaler, target_scaler

def train_neighbor_lstm_model(train_data, val_data, neighbor_dict, property_features, temporal_features,
                            max_neighbors=5, seq_length=30, lstm_hidden_dim=16, hidden_dim=64,
                            epochs=50, lr=0.001, batch_size=64, device='cuda'):
    """
    Train the NeighborBasedLSTM model
    """
    print("\n===== Training NeighborBasedLSTM Model =====")
    print(f"LSTM hidden dimension: {lstm_hidden_dim}, Max neighbors: {max_neighbors}")
    
    # Prepare data
    _, val_data_obj, property_scaler, temporal_scaler, target_scaler = prepare_data_for_neighbor_lstm(
        train_data, val_data, neighbor_dict, property_features, temporal_features,
        max_neighbors=max_neighbors, seq_length=seq_length
    )
    
    # Move validation data to device
    val_data_obj = val_data_obj.to(device)
    
    # Initialize model
    model = NeighborBasedLSTM(
        input_dim=1,  # Single price feature
        temporal_features_dim=len(temporal_features),
        property_features_dim=len(property_features),
        max_neighbors=max_neighbors,
        lstm_hidden_dim=lstm_hidden_dim,
        hidden_dim=hidden_dim,
        dropout=0.3
    ).to(device)
    
    # Initialize optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    criterion = nn.HuberLoss(delta=1.0)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5, verbose=True
    )
    
    # Training loop
    best_val_loss = float('inf')
    best_model_state = None
    patience = 10
    counter = 0
    
    history = {
        'train_loss': [],
        'val_loss': [],
        'val_rmse': [],
        'val_mae': [],
        'lr': []
    }
    
    for epoch in range(epochs):
        # Training
        model.train()
        optimizer.zero_grad()
        
        # Forward pass
        out = model(val_data_obj)
        
        # Calculate loss
        loss = criterion(out, val_data_obj.y)
        
        # Backward pass and optimize
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        # Validation
        model.eval()
        with torch.no_grad():
            val_out = model(val_data_obj)
            val_loss = criterion(val_out, val_data_obj.y)
            
            # Convert predictions back to original scale for metrics
            val_pred_orig = np.expm1(target_scaler.inverse_transform(val_out.cpu().numpy()))
            val_true_orig = np.expm1(target_scaler.inverse_transform(val_data_obj.y.cpu().numpy()))
            
            # Calculate metrics
            val_rmse = np.sqrt(mean_squared_error(val_true_orig, val_pred_orig))
            val_mae = mean_absolute_error(val_true_orig, val_pred_orig)
            
        # Store history
        history['train_loss'].append(loss.item())
        history['val_loss'].append(val_loss.item())
        history['val_rmse'].append(val_rmse)
        history['val_mae'].append(val_mae)
        history['lr'].append(optimizer.param_groups[0]['lr'])
        
        # Print progress
        print(f"Epoch {epoch+1}/{epochs} - Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, "
              f"RMSE: {val_rmse:.2f}, MAE: {val_mae:.2f}")
        
        # Learning rate scheduling
        scheduler.step(val_loss)
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict().copy()
            counter = 0
        else:
            counter += 1
        
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break
        
        # Memory management
        if device.type == 'cuda':
            torch.cuda.empty_cache()
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    return model, property_scaler, temporal_scaler, target_scaler, history

def predict_with_neighbor_lstm(model, test_data, train_data, neighbor_dict, property_features, 
                             temporal_features, property_scaler, temporal_scaler, target_scaler,
                             max_neighbors=5, seq_length=30, device='cuda'):
    """
    Make predictions using the trained NeighborBasedLSTM model
    """
    print("\n===== Predicting with NeighborBasedLSTM Model =====")
    
    # Prepare test data
    _, test_data_obj, _, _, _ = prepare_data_for_neighbor_lstm(
        train_data, test_data, neighbor_dict, property_features, temporal_features,
        property_scaler, temporal_scaler, target_scaler,
        max_neighbors=max_neighbors, seq_length=seq_length
    )
    
    # Move to device
    test_data_obj = test_data_obj.to(device)
    
    # Make predictions
    model.eval()
    with torch.no_grad():
        predictions = model(test_data_obj)
        
        # Transform back to original scale
        predictions_np = target_scaler.inverse_transform(predictions.cpu().numpy())
        
        # If log-transformed, apply inverse
        predictions_orig = np.expm1(predictions_np)
    
    return predictions_orig

In [2]:
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import gc

# Function to evaluate predictions
def evaluate_gnn_predictions(y_true, y_pred, print_results=True):
    """
    Evaluate GNN predictions using multiple metrics
    """
    # Calculate metrics
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-8))) * 100
    
    # Store metrics in dictionary
    metrics = {
        'rmse': rmse,
        'mae': mae,
        'r2': r2,
        'mape': mape
    }
    
    # Print results if requested
    if print_results:
        print("=== GNN Model Evaluation ===")
        print(f"RMSE: {rmse:.2f}")
        print(f"MAE: {mae:.2f}")
        print(f"R²: {r2:.4f}")
        print(f"MAPE: {mape:.2f}%")
    
    return metrics

def apply_price_transformation(train_data, inverse=False):
    """
    Apply log transformation to price data or inverse the transformation
    """
    df = train_data.copy()
    
    if not inverse:
        # Apply log transformation
        print("Applying log transformation to price data")
        df['original_price'] = df['price']  # Store original price
        df['price'] = np.log1p(df['price'])  # log1p to handle zero values
    else:
        # Inverse transform
        print("Inverting log transformation for predictions")
        df['price'] = np.expm1(df['price'])  # expm1 is the inverse of log1p
    
    return df

# Create calculated features
def create_calculated_features(df):
    """
    Adapt calculated features to work with provided dataset columns
    """
    # Create a copy to avoid modifying the original
    df_copy = df.copy()
    
    # Bedroom ratio
    if 'bedrooms' in df_copy.columns and 'accommodates' in df_copy.columns:
        df_copy['bedroom_ratio'] = df_copy['bedrooms'] / df_copy['accommodates'].clip(lower=1)
    
    # Count amenities
    amenity_columns = df_copy.filter(like='has_').columns
    if len(amenity_columns) > 0:
        df_copy['amenity_count'] = df_copy[amenity_columns].sum(axis=1)
    
    # Luxury score - use specific amenities from your dataset
    luxury_amenities = ['has_hot_water', 'has_hair_dryer', 'has_dedicated_workspace', 
                         'has_tv', 'has_wifi', 'has_shampoo']
    available_luxury = [col for col in luxury_amenities if col in df_copy.columns]
    
    if available_luxury:
        df_copy['luxury_score'] = df_copy[available_luxury].sum(axis=1) / len(available_luxury)
    else:
        df_copy['luxury_score'] = 0
    
    # Essential score - basic amenities that are essential
    essential_amenities = ['has_essentials', 'has_bed_linens', 'has_kitchen', 
                           'has_smoke_alarm', 'has_heating']
    available_essential = [col for col in essential_amenities if col in df_copy.columns]
    
    if available_essential:
        df_copy['essential_score'] = df_copy[available_essential].sum(axis=1) / len(available_essential)
    else:
        df_copy['essential_score'] = 0
    
    # Price volatility features based on rolling statistics
    if all(col in df_copy.columns for col in ['rolling_max_7d', 'rolling_min_7d']):
        df_copy['price_range_7d'] = df_copy['rolling_max_7d'] - df_copy['rolling_min_7d']
    
    if all(col in df_copy.columns for col in ['rolling_max_14d', 'rolling_min_14d']):
        df_copy['price_range_14d'] = df_copy['rolling_max_14d'] - df_copy['rolling_min_14d']
    
    if all(col in df_copy.columns for col in ['rolling_max_30d', 'rolling_min_30d']):
        df_copy['price_range_30d'] = df_copy['rolling_max_30d'] - df_copy['rolling_min_30d']
    
    # Fill any NaN values that might have been created
    numeric_cols = df_copy.select_dtypes(include=['number']).columns
    for col in numeric_cols:
        if df_copy[col].isnull().any():
            df_copy[col] = df_copy[col].fillna(df_copy[col].median())
    
    return df_copy

def run_neighbor_lstm_with_rolling_window_cv(train_path, train_ids_path, test_ids_path, neighbor_csv_path,
                                          output_dir=None, window_size=35, n_splits=5,
                                          max_neighbors=5, seq_length=30, lstm_hidden_dim=8,
                                          hidden_dim=64, epochs=50, lr=0.001, sample_size=None):
    """
    Run NeighborBasedLSTM model with rolling window cross-validation
    """
    print(f"Processing dataset: {os.path.basename(train_path)}")
    
    # Create output directory if not exists
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
    
    try:
        # Load training data
        print("Loading data...")
        train_data = pd.read_csv(train_path)
        
        # Load listing IDs for train/test split
        print("Loading train/test listing IDs...")
        with open(train_ids_path, 'r') as f:
            train_listing_ids = [int(line.strip()) for line in f.readlines()]
        
        with open(test_ids_path, 'r') as f:
            test_listing_ids = [int(line.strip()) for line in f.readlines()]
        
        # Load neighbor data
        neighbor_dict = load_neighbor_data(neighbor_csv_path)
        
        print(f"Loaded {len(train_listing_ids)} train IDs and {len(test_listing_ids)} test IDs")
        
        # For testing - take only a small sample of listings if specified
        if sample_size:
            print(f"Limiting to {sample_size} random listings for testing")
            np.random.seed(42)
            selected_train = np.random.choice(train_listing_ids, int(sample_size * 0.8), replace=False)
            selected_test = np.random.choice(test_listing_ids, int(sample_size * 0.2), replace=False)
            train_listing_ids = selected_train.tolist()
            test_listing_ids = selected_test.tolist()
        
        # Convert date column to datetime
        train_data['date'] = pd.to_datetime(train_data['date'])
        
        # Filter data to include only dates in the desired range
        start_date = pd.to_datetime('2023-07-08')
        end_date = pd.to_datetime('2024-02-08')
        train_data = train_data[(train_data['date'] >= start_date) & (train_data['date'] <= end_date)]
        
        # Apply log transformation to price
        train_data = apply_price_transformation(train_data)
        
        # Create calculated features
        print("Creating calculated features...")
        train_data = create_calculated_features(train_data)
        
        # Check for NaN values in the dataset and fill them
        nan_columns = train_data.columns[train_data.isna().any()].tolist()
        if nan_columns:
            print(f"Warning: Found NaN values in columns: {nan_columns}")
            print("Filling NaN values with column means/medians")
            
            for col in nan_columns:
                if np.issubdtype(train_data[col].dtype, np.number):
                    # Fill with median for numeric columns
                    train_data[col] = train_data[col].fillna(train_data[col].median())
                else:
                    # For non-numeric, fill with mode
                    train_data[col] = train_data[col].fillna(train_data[col].mode()[0])
        
        # Define feature groups
        property_features = [
            'latitude', 'longitude', 'accommodates', 'bedrooms', 'bathrooms',
            'amenity_count', 'luxury_score', 'essential_score'
        ]
        
        temporal_features = [
            'DTF_day_of_week', 'DTF_month', 'DTF_is_weekend',
            'DTF_season_sin', 'DTF_season_cos'
        ]
        
        # Ensure all feature lists only contain columns that exist in the dataset
        property_features = [f for f in property_features if f in train_data.columns]
        temporal_features = [f for f in temporal_features if f in train_data.columns]
        
        print(f"Using {len(property_features)} property features and {len(temporal_features)} temporal features")
        
        # Get unique dates and ensure they're properly sorted
        unique_dates = sorted(train_data['date'].dt.date.unique())
        
        # Create explicit test periods - last 35 days split into 5 equal parts (7 days each)
        last_35_days = unique_dates[-window_size:]
        
        # Define explicit test periods - each 7 days
        test_periods = []
        for i in range(n_splits):
            start_idx = i * (window_size // n_splits)
            end_idx = start_idx + (window_size // n_splits)
            # Make sure we don't go beyond the available data
            if end_idx <= len(last_35_days):
                test_periods.append((last_35_days[start_idx], last_35_days[end_idx-1]))
        
        # Adjust n_splits if we couldn't create enough test periods
        n_splits = len(test_periods)
        
        print(f"Created {n_splits} test periods:")
        for i, (test_start, test_end) in enumerate(test_periods):
            print(f"  Period {i+1}: {test_start} to {test_end}")
        
        # Storage for results
        cv_results = []
        all_predictions = []
        all_targets = []
        split_metrics = []
        
        # Initialize device
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
        
        # Run time series cross-validation using our explicit test periods
        for i, (test_start, test_end) in enumerate(test_periods):
            print(f"\n===== Split {i+1}/{n_splits} =====")
            
            # Define training period: everything before test_start
            train_end = pd.to_datetime(test_start) - pd.Timedelta(days=1)
            train_end_date = train_end.date()
            
            print(f"Training period: {unique_dates[0]} to {train_end_date}")
            print(f"Testing period: {test_start} to {test_end}")
            
            # Split by date first
            train_date_mask = train_data['date'].dt.date <= train_end_date
            test_date_mask = (train_data['date'].dt.date >= test_start) & (train_data['date'].dt.date <= test_end)
            
            date_filtered_train = train_data[train_date_mask]
            date_filtered_test = train_data[test_date_mask]
            
            # Now further split by listing IDs
            train_id_mask = date_filtered_train['listing_id'].isin(train_listing_ids)
            test_id_mask = date_filtered_test['listing_id'].isin(test_listing_ids)
            
            split_train_data = date_filtered_train[train_id_mask].copy()
            split_test_data = date_filtered_test[test_id_mask].copy()
            
            print(f"Train data: {len(split_train_data)} rows, {len(split_train_data['listing_id'].unique())} unique listings")
            print(f"Test data: {len(split_test_data)} rows, {len(split_test_data['listing_id'].unique())} unique listings")
            
            # Check if we have enough data for this split
            if len(split_train_data) < 100 or len(split_test_data) < 10:
                print(f"Insufficient data for split {i+1}, skipping")
                continue
            
            # Split train data into train and validation
            unique_train_listings = split_train_data['listing_id'].unique()
            train_listings, val_listings = train_test_split(
                unique_train_listings, test_size=0.2, random_state=42
            )
            
            train_subset = split_train_data[split_train_data['listing_id'].isin(train_listings)].copy()
            val_subset = split_train_data[split_train_data['listing_id'].isin(val_listings)].copy()
            
            # Manage memory before training
            gc.collect()
            if device.type == 'cuda':
                torch.cuda.empty_cache()
            
            # Train model for this split
            try:
                print(f"\n----- Training NeighborBasedLSTM Model (Split {i+1}) -----")
                model, property_scaler, temporal_scaler, target_scaler, _ = train_neighbor_lstm_model(
                    train_subset, val_subset, neighbor_dict, property_features, temporal_features,
                    max_neighbors=max_neighbors, seq_length=seq_length, lstm_hidden_dim=lstm_hidden_dim,
                    hidden_dim=hidden_dim, epochs=epochs, lr=lr, device=device
                )
                
                # Evaluate on test data
                print(f"\n----- Evaluating NeighborBasedLSTM on Test Data (Split {i+1}) -----")
                test_predictions = predict_with_neighbor_lstm(
                    model, split_test_data, train_subset, neighbor_dict, property_features, temporal_features,
                    property_scaler, temporal_scaler, target_scaler,
                    max_neighbors=max_neighbors, seq_length=seq_length, device=device
                )
                
                # Get actual test values (original scale)
                test_actuals = split_test_data['original_price'].values if 'original_price' in split_test_data.columns else split_test_data['price'].values
                
                # Evaluate predictions
                metrics = evaluate_gnn_predictions(test_actuals, test_predictions.flatten(), print_results=True)
                
                print(f"Split {i+1} Results - RMSE: {metrics['rmse']:.4f}, MAE: {metrics['mae']:.4f}, R²: {metrics['r2']:.4f}")
                
                # Store results for this split
                split_results = pd.DataFrame({
                    'split': i,
                    'date': split_test_data['date'],
                    'listing_id': split_test_data['listing_id'],
                    'price': test_actuals,
                    'predicted': test_predictions.flatten(),
                    'error': test_actuals - test_predictions.flatten(),
                    'abs_error': np.abs(test_actuals - test_predictions.flatten()),
                    'pct_error': np.abs((test_actuals - test_predictions.flatten()) / (test_actuals + 1e-8)) * 100
                })
                
                cv_results.append(split_results)
                all_predictions.extend(test_predictions.flatten())
                all_targets.extend(test_actuals)
                
                # Save model for this split if output_dir is provided
                if output_dir:
                    model_path = os.path.join(output_dir, f'neighbor_lstm_model_split_{i+1}.pt')
                    torch.save(model.state_dict(), model_path)
                    print(f"Model for split {i+1} saved to {model_path}")
                
                # Store metrics for this split
                split_metrics.append({
                    'split': i,
                    'rmse': metrics['rmse'],
                    'mae': metrics['mae'],
                    'r2': metrics['r2'],
                    'mape': metrics['mape'],
                    'n_samples': len(test_actuals)
                })
                
                # Memory management after each split
                del model, property_scaler, temporal_scaler, target_scaler
                del test_predictions, split_train_data, split_test_data, train_subset, val_subset
                gc.collect()
                if device.type == 'cuda':
                    torch.cuda.empty_cache()
                
            except Exception as e:
                print(f"Error in split {i+1}: {str(e)}")
                import traceback
                traceback.print_exc()
                continue
        
        # Combine all results
        if not cv_results:
            print("No valid splits completed. Check your data and parameters.")
            return None
                
        all_results = pd.concat(cv_results, ignore_index=True)
        
        # Calculate overall metrics
        all_targets_array = np.array(all_targets)
        all_predictions_array = np.array(all_predictions)
        
        overall_metrics = {
            'rmse': np.sqrt(mean_squared_error(all_targets_array, all_predictions_array)),
            'mae': mean_absolute_error(all_targets_array, all_predictions_array),
            'r2': r2_score(all_targets_array, all_predictions_array),
            'mape': np.mean(np.abs((all_targets_array - all_predictions_array) / (all_targets_array + 1e-8))) * 100
        }
        
        # Calculate daily metrics
        all_results['date_str'] = pd.to_datetime(all_results['date']).dt.strftime('%Y-%m-%d')
        
        daily_metrics = []
        for day, group in all_results.groupby('date_str'):
            y_true_day = group['price']
            y_pred_day = group['predicted']
            
            daily_metrics.append({
                'date': day,
                'rmse': np.sqrt(mean_squared_error(y_true_day, y_pred_day)),
                'mae': mean_absolute_error(y_true_day, y_pred_day),
                'r2': r2_score(y_true_day, y_pred_day) if len(set(y_true_day)) > 1 else np.nan,
                'mape': np.mean(np.abs((y_true_day - y_pred_day) / (y_true_day + 1e-8))) * 100,
                'n_samples': len(y_true_day)
            })
        
        daily_metrics_df = pd.DataFrame(daily_metrics)
        daily_metrics_df['date'] = pd.to_datetime(daily_metrics_df['date'])
        daily_metrics_df = daily_metrics_df.sort_values('date')
        
        split_metrics_df = pd.DataFrame(split_metrics)
        
        # Create a results dictionary
        evaluation_results = {
            'overall_metrics': overall_metrics,
            'split_metrics': split_metrics_df,
            'daily_metrics': daily_metrics_df,
            'all_results': all_results,
            'train_listings': len(train_listing_ids),
            'test_listings': len(test_listing_ids)
        }
        
        # Save results if output directory is provided
        if output_dir:
            # Save all results
            results_file = os.path.join(output_dir, 'neighbor_lstm_rolling_window_results.csv')
            all_results.to_csv(results_file, index=False)
            print(f"Results saved to {results_file}")
            
            # Save metrics
            metrics_file = os.path.join(output_dir, 'neighbor_lstm_rolling_window_metrics.csv')
            daily_metrics_df.to_csv(metrics_file, index=False)
            print(f"Daily metrics saved to {metrics_file}")
            
            # Save summary
            with open(os.path.join(output_dir, 'neighbor_lstm_cv_summary.txt'), 'w') as f:
                f.write(f"NeighborBasedLSTM Rolling Window CV Model Summary\n")
                f.write(f"=================================\n\n")
                f.write(f"Window size: {window_size} days\n")
                f.write(f"Number of splits: {n_splits}\n")
                f.write(f"Training period: {unique_dates[0]} to {unique_dates[-1]}\n")
                f.write(f"Number of training listings: {len(train_listing_ids)}\n")
                f.write(f"Number of test listings: {len(test_listing_ids)}\n\n")
                f.write(f"LSTM hidden dimension: {lstm_hidden_dim}\n")
                f.write(f"Maximum neighbors per listing: {max_neighbors}\n\n")
                f.write(f"Overall Metrics:\n")
                for k, v in overall_metrics.items():
                    f.write(f"  {k}: {v:.6f}\n")
        
        # Print summary
        print("\n===== NeighborBasedLSTM ROLLING WINDOW CV SUMMARY =====")
        print(f"Using {len(train_listing_ids)} listings for training and {len(test_listing_ids)} listings for testing")
        
        print("\n=== Overall Metrics ===")
        print(f"RMSE: {overall_metrics['rmse']:.4f}")
        print(f"MAE: {overall_metrics['mae']:.4f}")
        print(f"R²: {overall_metrics['r2']:.4f}")
        print(f"MAPE: {overall_metrics['mape']:.4f}%")
        
        print("\n=== Split Performance ===")
        print(split_metrics_df[['split', 'rmse', 'mae', 'r2', 'n_samples']].to_string(index=False))
        
        # Return evaluation results
        return evaluation_results
    
    except Exception as e:
        print(f"Error in rolling window CV: {str(e)}")
        import traceback
        traceback.print_exc()
        return None

def run_neighbor_lstm_model(train_path, train_ids_path, test_ids_path, neighbor_csv_path, 
                          output_dir=None, lstm_hidden_dim=8, hidden_dim=64, max_neighbors=5,
                          seq_length=30, epochs=50, lr=0.001, sample_size=None):
    """
    Train and evaluate the NeighborBasedLSTM model
    """
    try:
        # Create output directory if not exists
        if output_dir:
            os.makedirs(output_dir, exist_ok=True)
        
        # Load training data
        print("Loading data...")
        train_data = pd.read_csv(train_path)
        
        # Load listing IDs for train/test split
        print("Loading train/test listing IDs...")
        with open(train_ids_path, 'r') as f:
            train_listing_ids = [int(line.strip()) for line in f.readlines()]
        
        with open(test_ids_path, 'r') as f:
            test_listing_ids = [int(line.strip()) for line in f.readlines()]
        
        print(f"Loaded {len(train_listing_ids)} train IDs and {len(test_listing_ids)} test IDs")
        
        # Load neighbor data
        neighbor_dict = load_neighbor_data(neighbor_csv_path)
        
        # For testing - take only a small sample of listings if specified
        if sample_size:
            print(f"Limiting to {sample_size} random listings for testing")
            np.random.seed(42)
            selected_train = np.random.choice(train_listing_ids, int(sample_size * 0.8), replace=False)
            selected_test = np.random.choice(test_listing_ids, int(sample_size * 0.2), replace=False)
            train_listing_ids = selected_train.tolist()
            test_listing_ids = selected_test.tolist()
        
        # Convert date column to datetime if needed
        if 'date' in train_data.columns and not pd.api.types.is_datetime64_any_dtype(train_data['date']):
            train_data['date'] = pd.to_datetime(train_data['date'])
        
        # Create calculated features
        print("Creating calculated features...")
        train_data = create_calculated_features(train_data)
        
        # Check for NaN values in the dataset and fill them
        nan_columns = train_data.columns[train_data.isna().any()].tolist()
        if nan_columns:
            print(f"Warning: Found NaN values in columns: {nan_columns}")
            print("Filling NaN values with column means/medians")
            
            for col in nan_columns:
                if np.issubdtype(train_data[col].dtype, np.number):
                    # Fill with median for numeric columns
                    train_data[col] = train_data[col].fillna(train_data[col].median())
        else:
                    # For non-numeric, fill with mode
                    train_data[col] = train_data[col].fillna(train_data[col].mode()[0])
        
        # Split data into train and test based on listing IDs
        train_mask = train_data['listing_id'].isin(train_listing_ids)
        test_mask = train_data['listing_id'].isin(test_listing_ids)
        
        train_df = train_data[train_mask].copy()
        test_df = train_data[test_mask].copy()
        
        print(f"Train data: {len(train_df)} rows, {len(train_df['listing_id'].unique())} unique listings")
        print(f"Test data: {len(test_df)} rows, {len(test_df['listing_id'].unique())} unique listings")
        
        # Define feature groups
        property_features = [
            'latitude', 'longitude', 'accommodates', 'bedrooms', 'bathrooms',
            'amenity_count', 'luxury_score', 'essential_score', 'bedroom_ratio'
        ]
        
        temporal_features = [
            'DTF_day_of_week', 'DTF_month', 'DTF_is_weekend',
            'DTF_season_sin', 'DTF_season_cos'
        ]
        
        # Ensure all feature lists only contain columns that exist in the dataset
        property_features = [f for f in property_features if f in train_df.columns]
        temporal_features = [f for f in temporal_features if f in train_df.columns]
        
        print(f"Using {len(property_features)} property features and {len(temporal_features)} temporal features")
        
        # Apply log transformation to prices
        train_df = apply_price_transformation(train_df)
        test_df = apply_price_transformation(test_df)
        
        # Split train data into train and validation
        unique_train_listings = train_df['listing_id'].unique()
        train_listings, val_listings = train_test_split(
            unique_train_listings, test_size=0.2, random_state=42
        )
        
        train_subset = train_df[train_df['listing_id'].isin(train_listings)].copy()
        val_subset = train_df[train_df['listing_id'].isin(val_listings)].copy()
        
        print(f"Train subset: {len(train_subset)} rows, {len(train_listings)} listings")
        print(f"Validation subset: {len(val_subset)} rows, {len(val_listings)} listings")
        
        # Initialize device
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
        
        # Memory management before training
        gc.collect()
        if device.type == 'cuda':
            torch.cuda.empty_cache()
        
        # Train NeighborBasedLSTM model
        print("\n===== Training NeighborBasedLSTM Model =====")
        model, property_scaler, temporal_scaler, target_scaler, history = train_neighbor_lstm_model(
            train_subset, val_subset, neighbor_dict, property_features, temporal_features,
            max_neighbors=max_neighbors, seq_length=seq_length, lstm_hidden_dim=lstm_hidden_dim,
            hidden_dim=hidden_dim, epochs=epochs, lr=lr, device=device
        )
        
        # Plot training history
        plt.figure(figsize=(12, 10))
        
        # Plot training and validation loss
        plt.subplot(2, 2, 1)
        plt.plot(history['train_loss'], label='Train Loss')
        plt.plot(history['val_loss'], label='Validation Loss')
        plt.title('Training and Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        # Plot validation RMSE
        plt.subplot(2, 2, 2)
        plt.plot(history['val_rmse'], label='Validation RMSE')
        plt.title('Validation RMSE')
        plt.xlabel('Epoch')
        plt.ylabel('RMSE')
        
        # Plot validation MAE
        plt.subplot(2, 2, 3)
        plt.plot(history['val_mae'], label='Validation MAE')
        plt.title('Validation MAE')
        plt.xlabel('Epoch')
        plt.ylabel('MAE')
        
        # Plot learning rate
        plt.subplot(2, 2, 4)
        plt.plot(history['lr'], label='Learning Rate')
        plt.title('Learning Rate')
        plt.xlabel('Epoch')
        plt.ylabel('LR')
        plt.yscale('log')
        
        plt.tight_layout()
        
        if output_dir:
            plt.savefig(os.path.join(output_dir, 'neighbor_lstm_training_history.png'))
        plt.show()
        
        # Evaluate on test data
        print("\n===== Evaluating NeighborBasedLSTM on Test Data =====")
        test_predictions = predict_with_neighbor_lstm(
            model, test_df, train_subset, neighbor_dict, property_features, temporal_features,
            property_scaler, temporal_scaler, target_scaler,
            max_neighbors=max_neighbors, seq_length=seq_length, device=device
        )
        
        # Get actual test values (original scale)
        test_actuals = test_df['original_price'].values if 'original_price' in test_df.columns else test_df['price'].values
        
        # Evaluate predictions
        test_metrics = evaluate_gnn_predictions(test_actuals, test_predictions.flatten(), print_results=True)
        
        # Plot results
        plot_gnn_results(test_actuals, test_predictions.flatten(), history, output_dir)
        
        # Save model and scalers
        if output_dir:
            torch.save(model.state_dict(), os.path.join(output_dir, 'neighbor_lstm_model.pt'))
            torch.save({
                'property_scaler': property_scaler,
                'temporal_scaler': temporal_scaler,
                'target_scaler': target_scaler,
                'max_neighbors': max_neighbors,
                'seq_length': seq_length,
                'lstm_hidden_dim': lstm_hidden_dim
            }, os.path.join(output_dir, 'neighbor_lstm_scalers.pt'))
            print(f"Model and scalers saved to {output_dir}")
            
            # Save test predictions
            test_results = pd.DataFrame({
                'listing_id': test_df['listing_id'].values,
                'date': test_df['date'].values,
                'actual': test_actuals,
                'predicted': test_predictions.flatten(),
                'error': test_actuals - test_predictions.flatten(),
                'abs_error': np.abs(test_actuals - test_predictions.flatten()),
                'pct_error': np.abs((test_actuals - test_predictions.flatten()) / (test_actuals + 1e-8)) * 100
            })
            test_results.to_csv(os.path.join(output_dir, 'neighbor_lstm_test_predictions.csv'), index=False)
            print(f"Test predictions saved to {os.path.join(output_dir, 'neighbor_lstm_test_predictions.csv')}")
        
        # Return model and metrics
        return model, property_scaler, temporal_scaler, target_scaler, test_metrics
    
    except Exception as e:
        print(f"Error in NeighborBasedLSTM model training: {str(e)}")
        import traceback
        traceback.print_exc()
        return None

def compare_lstm_dimensions(train_path, train_ids_path, test_ids_path, neighbor_csv_path,
                          output_dir=None, max_neighbors=5, seq_length=30, sample_size=None):
    """
    Compare LSTM models with different hidden dimensions (8 vs 16)
    """
    # Create output directory if not exists
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
    
    # Set parameters for each model
    lstm_hidden_dim_1 = 8   # First LSTM hidden dimension
    lstm_hidden_dim_2 = 16  # Second LSTM hidden dimension
    
    # Run first model
    print("\n===== Running NeighborBasedLSTM Model (8 hidden dim) =====")
    result_1 = run_neighbor_lstm_model(
        train_path=train_path,
        train_ids_path=train_ids_path,
        test_ids_path=test_ids_path,
        neighbor_csv_path=neighbor_csv_path,
        output_dir=os.path.join(output_dir, 'lstm_dim_8') if output_dir else None,
        lstm_hidden_dim=lstm_hidden_dim_1,
        max_neighbors=max_neighbors,
        seq_length=seq_length,
        epochs=30,  # Reduced epochs for faster comparison
        sample_size=sample_size
    )
    
    # Run second model
    print("\n===== Running NeighborBasedLSTM Model (16 hidden dim) =====")
    result_2 = run_neighbor_lstm_model(
        train_path=train_path,
        train_ids_path=train_ids_path,
        test_ids_path=test_ids_path,
        neighbor_csv_path=neighbor_csv_path,
        output_dir=os.path.join(output_dir, 'lstm_dim_16') if output_dir else None,
        lstm_hidden_dim=lstm_hidden_dim_2,
        max_neighbors=max_neighbors,
        seq_length=seq_length,
        epochs=30,  # Reduced epochs for faster comparison
        sample_size=sample_size
    )
    
    # Check if both models ran successfully
    if result_1 and result_2:
        # Extract model performance
        _, _, _, _, metrics_1 = result_1
        _, _, _, _, metrics_2 = result_2
        
        # Compare metrics
        print("\n===== Model Comparison =====")
        metrics = ['rmse', 'mae', 'r2', 'mape']
        
        comparison_df = pd.DataFrame({
            'Metric': metrics,
            f'LSTM (dim={lstm_hidden_dim_1})': [metrics_1[m] for m in metrics],
            f'LSTM (dim={lstm_hidden_dim_2})': [metrics_2[m] for m in metrics]
        })
        
        # Calculate percentage change
        comparison_df['Improvement %'] = [
            ((metrics_2[m] - metrics_1[m]) / metrics_1[m] * 100) if m not in ['r2'] else
            ((metrics_2[m] - metrics_1[m]) * 100) for m in metrics
        ]
        
        print(comparison_df)
        
        # Save comparison if output_dir is provided
        if output_dir:
            comparison_df.to_csv(os.path.join(output_dir, 'lstm_dim_comparison.csv'), index=False)
            print(f"Model comparison saved to {os.path.join(output_dir, 'lstm_dim_comparison.csv')}")
        
        return comparison_df
    else:
        print("One or both models failed to run. Check error logs.")
        return None

In [3]:
# Main execution
if __name__ == "__main__":
    # Set paths to your data
    train_path = "train_up3.csv"
    train_ids_path = "train_ids.txt"
    test_ids_path = "test_ids.txt"
    neighbor_csv_path = "./neighbor_data/neighbor_dict.csv"  # Path to neighbor information CSV
    
    # Output directory
    output_dir = "./output/neighbor_lstm_model"
    os.makedirs(output_dir, exist_ok=True)
    
    # Model parameters
    lstm_hidden_dim = 8         # Hidden dimension for LSTM (8 or 16)
    hidden_dim = 64             # Hidden dimension size for rest of model
    max_neighbors = 5           # Maximum number of neighbors to consider per listing
    seq_length = 30             # Sequence length for neighbor price history
    epochs = 125                 # Maximum number of epochs
    lr = 0.001                  # Learning rate
    
    # Choose between different run modes
    run_mode = "rolling_window"  # Options: "single", "rolling_window", "compare_dims"
    
    try:
        if run_mode == "single":
            # Run single model training
            result_tuple = run_neighbor_lstm_model(
                train_path=train_path,
                train_ids_path=train_ids_path,
                test_ids_path=test_ids_path,
                neighbor_csv_path=neighbor_csv_path,
                output_dir=output_dir,
                lstm_hidden_dim=lstm_hidden_dim,
                hidden_dim=hidden_dim,
                max_neighbors=max_neighbors,
                seq_length=seq_length,
                epochs=epochs,
                lr=lr,
                sample_size=None  # Set to a number for testing or None for full dataset
            )
            
            if result_tuple:
                model, property_scaler, temporal_scaler, target_scaler, test_metrics = result_tuple
                print("NeighborBasedLSTM model training completed successfully!")
                
                # Print summary of model
                print("\n===== Model Summary =====")
                total_param = sum(p.numel() for p in model.parameters())
                lstm_param = sum(p.numel() for name, p in model.named_parameters() if 'lstm' in name)
                print(f"Total model parameters: {total_param:,}")
                print(f"LSTM parameters: {lstm_param:,}")
                print(f"LSTM parameters as % of total: {lstm_param/total_param*100:.2f}%")
                
        elif run_mode == "rolling_window":
            # Run with rolling window cross-validation
            results = run_neighbor_lstm_with_rolling_window_cv(
                train_path=train_path,
                train_ids_path=train_ids_path,
                test_ids_path=test_ids_path,
                neighbor_csv_path=neighbor_csv_path,
                output_dir=output_dir,
                window_size=35,  # 5 weeks
                n_splits=5,
                max_neighbors=max_neighbors,
                seq_length=seq_length,
                lstm_hidden_dim=lstm_hidden_dim,
                hidden_dim=hidden_dim,
                epochs=epochs,
                lr=lr,
                sample_size=4500  # Set to a number for testing or None for full dataset
            )
            print("NeighborBasedLSTM model with rolling window CV completed successfully!")
            
        elif run_mode == "compare_dims":
            # Run comparison between 8-dim and 16-dim LSTM models
            comparison = compare_lstm_dimensions(
                train_path=train_path,
                train_ids_path=train_ids_path,
                test_ids_path=test_ids_path,
                neighbor_csv_path=neighbor_csv_path,
                output_dir=output_dir,
                max_neighbors=max_neighbors,
                seq_length=seq_length,
                sample_size=200  # Use small sample for faster comparison
            )
            print("LSTM dimension comparison completed successfully!")
            
    except Exception as e:
        print(f"Error running NeighborBasedLSTM model: {str(e)}")
        import traceback
        traceback.print_exc()

Processing dataset: train_up3.csv
Loading data...
Loading train/test listing IDs...
Loading neighbor data from ./neighbor_data/neighbor_dict.csv
Loaded neighbor data for 1573 test listings
Loaded 6291 train IDs and 1573 test IDs
Limiting to 4500 random listings for testing
Applying log transformation to price data
Creating calculated features...
Using 8 property features and 5 temporal features
Created 5 test periods:
  Period 1: 2024-01-05 to 2024-01-11
  Period 2: 2024-01-12 to 2024-01-18
  Period 3: 2024-01-19 to 2024-01-25
  Period 4: 2024-01-26 to 2024-02-01
  Period 5: 2024-02-02 to 2024-02-08
Using device: cuda

===== Split 1/5 =====
Training period: 2023-08-07 to 2024-01-04
Testing period: 2024-01-05 to 2024-01-11
Train data: 517955 rows, 3600 unique listings
Test data: 6300 rows, 900 unique listings

----- Training NeighborBasedLSTM Model (Split 1) -----

===== Training NeighborBasedLSTM Model =====
LSTM hidden dimension: 8, Max neighbors: 5
Preparing data for NeighborBasedLST



Epoch 1/125 - Loss: 0.5953, Val Loss: 0.4114, RMSE: 186.47, MAE: 116.30
Epoch 2/125 - Loss: 0.5187, Val Loss: 0.4062, RMSE: 185.67, MAE: 115.57
Epoch 3/125 - Loss: 0.4644, Val Loss: 0.3998, RMSE: 184.77, MAE: 114.66
Epoch 4/125 - Loss: 0.4289, Val Loss: 0.3923, RMSE: 183.74, MAE: 113.59
Epoch 5/125 - Loss: 0.4016, Val Loss: 0.3841, RMSE: 182.57, MAE: 112.40
Epoch 6/125 - Loss: 0.3853, Val Loss: 0.3755, RMSE: 181.30, MAE: 111.14
Epoch 7/125 - Loss: 0.3711, Val Loss: 0.3666, RMSE: 179.92, MAE: 109.85
Epoch 8/125 - Loss: 0.3596, Val Loss: 0.3577, RMSE: 178.45, MAE: 108.52
Epoch 9/125 - Loss: 0.3497, Val Loss: 0.3489, RMSE: 176.92, MAE: 107.19
Epoch 10/125 - Loss: 0.3418, Val Loss: 0.3400, RMSE: 175.26, MAE: 105.83
Epoch 11/125 - Loss: 0.3335, Val Loss: 0.3312, RMSE: 173.50, MAE: 104.42
Epoch 12/125 - Loss: 0.3268, Val Loss: 0.3223, RMSE: 171.61, MAE: 102.96
Epoch 13/125 - Loss: 0.3203, Val Loss: 0.3132, RMSE: 169.56, MAE: 101.41
Epoch 14/125 - Loss: 0.3159, Val Loss: 0.3038, RMSE: 167.29,



Epoch 2/125 - Loss: 0.4508, Val Loss: 0.3976, RMSE: 183.40, MAE: 113.55
Epoch 3/125 - Loss: 0.4064, Val Loss: 0.3913, RMSE: 182.96, MAE: 112.71
Epoch 4/125 - Loss: 0.3781, Val Loss: 0.3842, RMSE: 182.29, MAE: 111.77
Epoch 5/125 - Loss: 0.3596, Val Loss: 0.3765, RMSE: 181.40, MAE: 110.72
Epoch 6/125 - Loss: 0.3452, Val Loss: 0.3682, RMSE: 180.28, MAE: 109.57
Epoch 7/125 - Loss: 0.3346, Val Loss: 0.3596, RMSE: 178.98, MAE: 108.35
Epoch 8/125 - Loss: 0.3252, Val Loss: 0.3509, RMSE: 177.51, MAE: 107.08
Epoch 9/125 - Loss: 0.3187, Val Loss: 0.3424, RMSE: 175.94, MAE: 105.79
Epoch 10/125 - Loss: 0.3104, Val Loss: 0.3342, RMSE: 174.28, MAE: 104.50
Epoch 11/125 - Loss: 0.3044, Val Loss: 0.3262, RMSE: 172.56, MAE: 103.20
Epoch 12/125 - Loss: 0.2991, Val Loss: 0.3184, RMSE: 170.76, MAE: 101.87
Epoch 13/125 - Loss: 0.2946, Val Loss: 0.3107, RMSE: 168.88, MAE: 100.51
Epoch 14/125 - Loss: 0.2894, Val Loss: 0.3028, RMSE: 166.89, MAE: 99.08
Epoch 15/125 - Loss: 0.2853, Val Loss: 0.2946, RMSE: 164.79,



Epoch 2/125 - Loss: 0.5700, Val Loss: 0.3992, RMSE: 179.35, MAE: 112.94
Epoch 3/125 - Loss: 0.4963, Val Loss: 0.3884, RMSE: 177.20, MAE: 111.27
Epoch 4/125 - Loss: 0.4507, Val Loss: 0.3778, RMSE: 175.22, MAE: 109.59
Epoch 5/125 - Loss: 0.4218, Val Loss: 0.3681, RMSE: 173.48, MAE: 108.04
Epoch 6/125 - Loss: 0.4016, Val Loss: 0.3598, RMSE: 171.96, MAE: 106.68
Epoch 7/125 - Loss: 0.3875, Val Loss: 0.3528, RMSE: 170.67, MAE: 105.53
Epoch 8/125 - Loss: 0.3733, Val Loss: 0.3469, RMSE: 169.56, MAE: 104.56
Epoch 9/125 - Loss: 0.3600, Val Loss: 0.3418, RMSE: 168.59, MAE: 103.71
Epoch 10/125 - Loss: 0.3517, Val Loss: 0.3370, RMSE: 167.69, MAE: 102.92
Epoch 11/125 - Loss: 0.3447, Val Loss: 0.3321, RMSE: 166.74, MAE: 102.11
Epoch 12/125 - Loss: 0.3388, Val Loss: 0.3265, RMSE: 165.66, MAE: 101.18
Epoch 13/125 - Loss: 0.3330, Val Loss: 0.3200, RMSE: 164.35, MAE: 100.08
Epoch 14/125 - Loss: 0.3293, Val Loss: 0.3122, RMSE: 162.74, MAE: 98.74
Epoch 15/125 - Loss: 0.3240, Val Loss: 0.3034, RMSE: 160.80,



Epoch 1/125 - Loss: 0.5189, Val Loss: 0.4003, RMSE: 180.40, MAE: 112.42
Epoch 2/125 - Loss: 0.4519, Val Loss: 0.3932, RMSE: 179.93, MAE: 111.45
Epoch 3/125 - Loss: 0.4106, Val Loss: 0.3862, RMSE: 179.30, MAE: 110.49
Epoch 4/125 - Loss: 0.3856, Val Loss: 0.3792, RMSE: 178.54, MAE: 109.55
Epoch 5/125 - Loss: 0.3700, Val Loss: 0.3721, RMSE: 177.65, MAE: 108.60
Epoch 6/125 - Loss: 0.3551, Val Loss: 0.3650, RMSE: 176.67, MAE: 107.62
Epoch 7/125 - Loss: 0.3460, Val Loss: 0.3578, RMSE: 175.60, MAE: 106.62
Epoch 8/125 - Loss: 0.3372, Val Loss: 0.3506, RMSE: 174.48, MAE: 105.60
Epoch 9/125 - Loss: 0.3291, Val Loss: 0.3434, RMSE: 173.30, MAE: 104.56
Epoch 10/125 - Loss: 0.3214, Val Loss: 0.3361, RMSE: 172.06, MAE: 103.48
Epoch 11/125 - Loss: 0.3167, Val Loss: 0.3284, RMSE: 170.72, MAE: 102.32
Epoch 12/125 - Loss: 0.3121, Val Loss: 0.3202, RMSE: 169.27, MAE: 101.06
Epoch 13/125 - Loss: 0.3053, Val Loss: 0.3115, RMSE: 167.64, MAE: 99.69
Epoch 14/125 - Loss: 0.3021, Val Loss: 0.3022, RMSE: 165.80, 



Epoch 1/125 - Loss: 0.5040, Val Loss: 0.4122, RMSE: 178.96, MAE: 113.43
Epoch 2/125 - Loss: 0.4414, Val Loss: 0.4049, RMSE: 178.68, MAE: 112.38
Epoch 3/125 - Loss: 0.3965, Val Loss: 0.3970, RMSE: 178.20, MAE: 111.28
Epoch 4/125 - Loss: 0.3640, Val Loss: 0.3886, RMSE: 177.51, MAE: 110.10
Epoch 5/125 - Loss: 0.3462, Val Loss: 0.3797, RMSE: 176.61, MAE: 108.86
Epoch 6/125 - Loss: 0.3325, Val Loss: 0.3703, RMSE: 175.54, MAE: 107.59
Epoch 7/125 - Loss: 0.3245, Val Loss: 0.3610, RMSE: 174.35, MAE: 106.29
Epoch 8/125 - Loss: 0.3180, Val Loss: 0.3518, RMSE: 173.08, MAE: 104.99
Epoch 9/125 - Loss: 0.3115, Val Loss: 0.3431, RMSE: 171.78, MAE: 103.73
Epoch 10/125 - Loss: 0.3061, Val Loss: 0.3349, RMSE: 170.47, MAE: 102.50
Epoch 11/125 - Loss: 0.2992, Val Loss: 0.3272, RMSE: 169.16, MAE: 101.32
Epoch 12/125 - Loss: 0.2933, Val Loss: 0.3200, RMSE: 167.87, MAE: 100.19
Epoch 13/125 - Loss: 0.2872, Val Loss: 0.3130, RMSE: 166.56, MAE: 99.06
Epoch 14/125 - Loss: 0.2843, Val Loss: 0.3060, RMSE: 165.18, 