In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os
import gc


def load_neighbor_data(neighbor_csv_path):
    """
    Load neighbor relationships from CSV file
    """
    print(f"Loading neighbor data from {neighbor_csv_path}")
    neighbor_df = pd.read_csv(neighbor_csv_path)
    
    # Create a dictionary mapping test_listing_id to its neighbors
    neighbor_dict = {}
    
    for _, row in neighbor_df.iterrows():
        test_id = row['test_listing_id']
        neighbor_id = row['neighbor_listing_id']
        rank = row['rank']
        distance = row['distance']
        
        if test_id not in neighbor_dict:
            neighbor_dict[test_id] = []
        
        neighbor_dict[test_id].append({
            'neighbor_id': neighbor_id,
            'rank': rank,
            'distance': distance
        })
    
    # Sort neighbors by rank for each test listing
    for test_id in neighbor_dict:
        neighbor_dict[test_id] = sorted(neighbor_dict[test_id], key=lambda x: x['rank'])
    
    print(f"Loaded neighbor data for {len(neighbor_dict)} test listings")
    return neighbor_dict

def extract_price_history(listing_data, date, seq_length=30):
    """
    Extract price history for a listing up to a specific date
    """
    previous_data = listing_data[listing_data['date'] < date].sort_values('date', ascending=False)
    
    # Extract prices
    price_history = []
    for _, row in previous_data.head(seq_length).iterrows():
        price_history.append(row['price'])
    
    # Pad if needed
    if len(price_history) < seq_length:
        padding = [price_history[-1] if price_history else 0] * (seq_length - len(price_history))
        price_history.extend(padding)
    
    # Keep only the most recent seq_length prices and reverse to chronological order
    price_history = price_history[:seq_length]
    price_history.reverse()
    
    return price_history

def prepare_neighbor_data_batch(test_data, train_data, neighbor_dict, max_neighbors=5, seq_length=30):
    """
    Prepare batched neighbor data for all test instances
    """
    print("Preparing neighbor data batch...")
    
    # Create lookup dictionary for train listings
    train_listings_dict = {}
    for listing_id in train_data['listing_id'].unique():
        listing_data = train_data[train_data['listing_id'] == listing_id].sort_values('date')
        train_listings_dict[listing_id] = listing_data
    
    # Initialize arrays to store neighbor histories and masks
    test_size = len(test_data)
    neighbor_histories = np.zeros((test_size, max_neighbors, seq_length), dtype=np.float32)
    neighbor_masks = np.zeros((test_size, max_neighbors), dtype=bool)
    
    # Process each test instance
    for idx, (_, test_row) in enumerate(test_data.iterrows()):
        test_id = test_row['listing_id']
        test_date = test_row['date']
        
        if test_id not in neighbor_dict:
            continue  # Skip if no neighbors found
        
        # Get neighbors for this test listing
        neighbors = neighbor_dict[test_id][:max_neighbors]
        
        # Process each neighbor
        for n_idx, neighbor in enumerate(neighbors):
            if n_idx >= max_neighbors:
                break
                
            neighbor_id = neighbor['neighbor_id']
            
            # Only use neighbors from the training set
            if neighbor_id in train_listings_dict:
                neighbor_data = train_listings_dict[neighbor_id]
                
                # Extract price history
                price_history = extract_price_history(neighbor_data, test_date, seq_length)
                
                # Store data
                neighbor_histories[idx, n_idx] = price_history
                neighbor_masks[idx, n_idx] = True
    
    print(f"Prepared neighbor data for {test_size} test instances")
    return neighbor_histories, neighbor_masks

def prepare_data_for_neighbor_lstm(train_data, test_data, neighbor_dict, property_features, 
                                 temporal_features, property_scaler=None, temporal_scaler=None, 
                                 target_scaler=None, max_neighbors=5, seq_length=30):
    """
    Prepare data for the NeighborBasedLSTM model
    """
    print("Preparing data for NeighborBasedLSTM...")
    
    # Initialize or use provided scalers
    if property_scaler is None:
        property_scaler = StandardScaler()
        property_scaler.fit(train_data[property_features])
    
    if temporal_scaler is None:
        temporal_scaler = StandardScaler()
        temporal_scaler.fit(train_data[temporal_features])
    
    if target_scaler is None:
        target_scaler = StandardScaler()
        target_scaler.fit(train_data['price'].values.reshape(-1, 1))
    
    # Scale property features
    X_train_property = property_scaler.transform(train_data[property_features]).astype(np.float32)
    X_test_property = property_scaler.transform(test_data[property_features]).astype(np.float32)
    
    # Scale temporal features
    X_train_temporal = temporal_scaler.transform(train_data[temporal_features]).astype(np.float32)
    X_test_temporal = temporal_scaler.transform(test_data[temporal_features]).astype(np.float32)
    
    # Prepare neighbor histories for test data
    neighbor_histories, neighbor_masks = prepare_neighbor_data_batch(
        test_data, train_data, neighbor_dict, max_neighbors, seq_length
    )
    
    # Scale the target variable
    y_train = target_scaler.transform(train_data['price'].values.reshape(-1, 1)).flatten().astype(np.float32)
    y_test = target_scaler.transform(test_data['price'].values.reshape(-1, 1)).flatten().astype(np.float32)
    
    # Create data objects
    train_data_obj = Data(
        property_features=torch.FloatTensor(X_train_property),
        temporal_features=torch.FloatTensor(X_train_temporal),
        y=torch.FloatTensor(y_train.reshape(-1, 1))
    )
    
    test_data_obj = Data(
        property_features=torch.FloatTensor(X_test_property),
        temporal_features=torch.FloatTensor(X_test_temporal),
        neighbor_histories=torch.FloatTensor(neighbor_histories),
        neighbor_mask=torch.BoolTensor(neighbor_masks),
        y=torch.FloatTensor(y_test.reshape(-1, 1))
    )
    
    return train_data_obj, test_data_obj, property_scaler, temporal_scaler, target_scaler

def predict_with_neighbor_lstm(model, test_data, train_data, neighbor_dict, property_features, 
                             temporal_features, property_scaler, temporal_scaler, target_scaler,
                             max_neighbors=5, seq_length=30, device='cuda'):
    """
    Make predictions using the trained NeighborBasedLSTM model
    """
    print("\n===== Predicting with NeighborBasedLSTM Model =====")
    
    # Prepare test data
    _, test_data_obj, _, _, _ = prepare_data_for_neighbor_lstm(
        train_data, test_data, neighbor_dict, property_features, temporal_features,
        property_scaler, temporal_scaler, target_scaler,
        max_neighbors=max_neighbors, seq_length=seq_length
    )
    
    # Move to device
    test_data_obj = test_data_obj.to(device)
    
    # Make predictions
    model.eval()
    with torch.no_grad():
        predictions = model(test_data_obj)
        
        # Transform back to original scale
        predictions_np = target_scaler.inverse_transform(predictions.cpu().numpy())
        
        # If log-transformed, apply inverse
        predictions_orig = np.expm1(predictions_np)
    
    return predictions_orig

In [2]:
# Function to evaluate predictions
def evaluate_gnn_predictions(y_true, y_pred, print_results=True):
    """
    Evaluate GNN predictions using multiple metrics
    """
    # Calculate metrics
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-8))) * 100
    
    # Store metrics in dictionary
    metrics = {
        'rmse': rmse,
        'mae': mae,
        'r2': r2,
        'mape': mape
    }
    
    # Print results if requested
    if print_results:
        print("=== GNN Model Evaluation ===")
        print(f"RMSE: {rmse:.2f}")
        print(f"MAE: {mae:.2f}")
        print(f"R²: {r2:.4f}")
        print(f"MAPE: {mape:.2f}%")
    
    return metrics

def apply_price_transformation(train_data, inverse=False):
    """
    Apply log transformation to price data or inverse the transformation
    """
    df = train_data.copy()
    
    if not inverse:
        # Apply log transformation
        print("Applying log transformation to price data")
        df['original_price'] = df['price']  # Store original price
        df['price'] = np.log1p(df['price'])  # log1p to handle zero values
    else:
        # Inverse transform
        print("Inverting log transformation for predictions")
        df['price'] = np.expm1(df['price'])  # expm1 is the inverse of log1p
    
    return df

def create_calculated_features(df):
    """
    Adapt calculated features to work with provided dataset columns
    """
    # Create a copy to avoid modifying the original
    df_copy = df.copy()
    
    # Bedroom ratio
    if 'bedrooms' in df_copy.columns and 'accommodates' in df_copy.columns:
        df_copy['bedroom_ratio'] = df_copy['bedrooms'] / df_copy['accommodates'].clip(lower=1)
    
    # Count amenities
    amenity_columns = df_copy.filter(like='has_').columns
    if len(amenity_columns) > 0:
        df_copy['amenity_count'] = df_copy[amenity_columns].sum(axis=1)
    
    # Luxury score - use specific amenities from your dataset
    luxury_amenities = ['has_hot_water', 'has_hair_dryer', 'has_dedicated_workspace', 
                         'has_tv', 'has_wifi', 'has_shampoo']
    available_luxury = [col for col in luxury_amenities if col in df_copy.columns]
    
    if available_luxury:
        df_copy['luxury_score'] = df_copy[available_luxury].sum(axis=1) / len(available_luxury)
    else:
        df_copy['luxury_score'] = 0
    
    # Essential score - basic amenities that are essential
    essential_amenities = ['has_essentials', 'has_bed_linens', 'has_kitchen', 
                           'has_smoke_alarm', 'has_heating']
    available_essential = [col for col in essential_amenities if col in df_copy.columns]
    
    if available_essential:
        df_copy['essential_score'] = df_copy[available_essential].sum(axis=1) / len(available_essential)
    else:
        df_copy['essential_score'] = 0
    
    # Price volatility features based on rolling statistics
    if all(col in df_copy.columns for col in ['rolling_max_7d', 'rolling_min_7d']):
        df_copy['price_range_7d'] = df_copy['rolling_max_7d'] - df_copy['rolling_min_7d']
    
    if all(col in df_copy.columns for col in ['rolling_max_14d', 'rolling_min_14d']):
        df_copy['price_range_14d'] = df_copy['rolling_max_14d'] - df_copy['rolling_min_14d']
    
    if all(col in df_copy.columns for col in ['rolling_max_30d', 'rolling_min_30d']):
        df_copy['price_range_30d'] = df_copy['rolling_max_30d'] - df_copy['rolling_min_30d']
    
    # Fill any NaN values that might have been created
    numeric_cols = df_copy.select_dtypes(include=['number']).columns
    for col in numeric_cols:
        if df_copy[col].isnull().any():
            df_copy[col] = df_copy[col].fillna(df_copy[col].median())
    
    return df_copy

def compare_lstm_dimensions(train_path, train_ids_path, test_ids_path, neighbor_csv_path,
                          output_dir=None, max_neighbors=5, seq_length=30, sample_size=None):
    """
    Compare LSTM models with different hidden dimensions (8 vs 16)
    """
    # Create output directory if not exists
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
    
    # Set parameters for each model
    lstm_hidden_dim_1 = 8   # First LSTM hidden dimension
    lstm_hidden_dim_2 = 16  # Second LSTM hidden dimension
    
    # Run first model
    print("\n===== Running NeighborBasedLSTM Model (8 hidden dim) =====")
    result_1 = run_neighbor_lstm_model(
        train_path=train_path,
        train_ids_path=train_ids_path,
        test_ids_path=test_ids_path,
        neighbor_csv_path=neighbor_csv_path,
        output_dir=os.path.join(output_dir, 'lstm_dim_8') if output_dir else None,
        lstm_hidden_dim=lstm_hidden_dim_1,
        max_neighbors=max_neighbors,
        seq_length=seq_length,
        epochs=30,  # Reduced epochs for faster comparison
        sample_size=sample_size
    )
    
    # Run second model
    print("\n===== Running NeighborBasedLSTM Model (16 hidden dim) =====")
    result_2 = run_neighbor_lstm_model(
        train_path=train_path,
        train_ids_path=train_ids_path,
        test_ids_path=test_ids_path,
        neighbor_csv_path=neighbor_csv_path,
        output_dir=os.path.join(output_dir, 'lstm_dim_16') if output_dir else None,
        lstm_hidden_dim=lstm_hidden_dim_2,
        max_neighbors=max_neighbors,
        seq_length=seq_length,
        epochs=30,  # Reduced epochs for faster comparison
        sample_size=sample_size
    )
    
    # Check if both models ran successfully
    if result_1 and result_2:
        # Extract model performance
        _, _, _, _, metrics_1 = result_1
        _, _, _, _, metrics_2 = result_2
        
        # Compare metrics
        print("\n===== Model Comparison =====")
        metrics = ['rmse', 'mae', 'r2', 'mape']
        
        comparison_df = pd.DataFrame({
            'Metric': metrics,
            f'LSTM (dim={lstm_hidden_dim_1})': [metrics_1[m] for m in metrics],
            f'LSTM (dim={lstm_hidden_dim_2})': [metrics_2[m] for m in metrics]
        })
        
        # Calculate percentage change
        comparison_df['Improvement %'] = [
            ((metrics_2[m] - metrics_1[m]) / metrics_1[m] * 100) if m not in ['r2'] else
            ((metrics_2[m] - metrics_1[m]) * 100) for m in metrics
        ]
        
        print(comparison_df)
        
        # Save comparison if output_dir is provided
        if output_dir:
            comparison_df.to_csv(os.path.join(output_dir, 'lstm_dim_comparison.csv'), index=False)
            print(f"Model comparison saved to {os.path.join(output_dir, 'lstm_dim_comparison.csv')}")
        
        return comparison_df
    else:
        print("One or both models failed to run. Check error logs.")
        return None

In [3]:
from torch_geometric.nn import GATv2Conv
from sklearn.neighbors import NearestNeighbors

# Enhanced NeighborBasedLSTM with spatial GAT layers
class EnhancedNeighborBasedLSTM(nn.Module):
    def __init__(self, input_dim, spatial_features_dim, temporal_features_dim, property_features_dim, 
                 max_neighbors=5, lstm_hidden_dim=16, hidden_dim=64, dropout=0.3, heads=4, edge_dim=1):
        super(EnhancedNeighborBasedLSTM, self).__init__()
        
        self.max_neighbors = max_neighbors
        self.lstm_hidden_dim = lstm_hidden_dim
        self.heads = heads
        self.head_dim = hidden_dim // heads
        
        # GAT layers for spatial relationship processing
        gat_out_dim = self.head_dim * heads
        self.gat1 = GATv2Conv(spatial_features_dim, self.head_dim, heads=heads, edge_dim=edge_dim)
        self.gat2 = GATv2Conv(gat_out_dim, self.head_dim, heads=heads, edge_dim=edge_dim)
        
        # Batch normalization for GAT layers
        self.bn1 = nn.BatchNorm1d(gat_out_dim)
        self.bn2 = nn.BatchNorm1d(gat_out_dim)
        
        # Optional dimension adjustment if needed
        self.dim_adjust = None
        if gat_out_dim != hidden_dim:
            self.dim_adjust = nn.Linear(gat_out_dim, hidden_dim)
        
        # LSTM for processing neighbor price histories (unchanged)
        self.neighbor_lstm = nn.LSTM(
            input_size=1,  # Single feature per timestep (price)
            hidden_size=lstm_hidden_dim,
            num_layers=1,
            batch_first=True,
            bidirectional=True
        )
        
        # Attention mechanism for neighbor contributions (unchanged)
        self.attention = nn.Linear(lstm_hidden_dim * 2, 1)
        
        # NEW: Add projection layer to match LSTM output dimension with hidden_dim
        self.neighbor_projection = nn.Linear(lstm_hidden_dim * 2, hidden_dim)
        
        # Processing for property features (unchanged)
        self.property_layer1 = nn.Linear(property_features_dim, hidden_dim)
        self.property_bn1 = nn.BatchNorm1d(hidden_dim)
        self.property_layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.property_bn2 = nn.BatchNorm1d(hidden_dim)
        
        # Processing for temporal features (unchanged)
        self.temporal_layer1 = nn.Linear(temporal_features_dim, hidden_dim)
        self.temporal_bn1 = nn.BatchNorm1d(hidden_dim)
        self.temporal_layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.temporal_bn2 = nn.BatchNorm1d(hidden_dim)
        
        # Feature fusion mechanism - now including spatial features
        self.fusion_layer = nn.Linear(hidden_dim * 4, hidden_dim)
        self.fusion_bn = nn.BatchNorm1d(hidden_dim)
        
        # Output layers
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc1_bn = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)
        
        self.dropout = nn.Dropout(dropout)
        self.dropout_heavy = nn.Dropout(dropout + 0.1)
    
    def forward(self, data):
        # Unpack the data
        property_features = data.property_features
        temporal_features = data.temporal_features
        neighbor_histories = data.neighbor_histories
        neighbor_mask = data.neighbor_mask
        
        # Spatial graph data
        x = data.x  # Spatial features
        edge_index = data.edge_index
        edge_attr = data.edge_attr
        
        batch_size = property_features.shape[0]
        
        # Process spatial features with GAT
        spatial_features = self.gat1(x, edge_index, edge_attr=edge_attr)
        spatial_features = F.elu(spatial_features)
        spatial_features = self.bn1(spatial_features)
        spatial_features = self.dropout(spatial_features)
        
        # Second GAT layer with residual connection
        spatial_features_res = spatial_features
        spatial_features = self.gat2(spatial_features, edge_index, edge_attr=edge_attr)
        spatial_features = self.bn2(spatial_features)
        
        # Add residual connection if dimensions match
        if spatial_features.shape == spatial_features_res.shape:
            spatial_features = spatial_features + spatial_features_res
        
        # Apply dimension adjustment if needed
        if self.dim_adjust is not None:
            spatial_features = self.dim_adjust(spatial_features)
        
        # Only use spatial features for test listings (val_mask)
        spatial_features = spatial_features[data.val_mask]
        
        # Process property features with residual connection (unchanged)
        prop_out = F.relu(self.property_layer1(property_features))
        prop_out = self.property_bn1(prop_out)
        prop_out = self.dropout(prop_out)
        prop_out_res = prop_out
        prop_out = F.relu(self.property_layer2(prop_out))
        prop_out = self.property_bn2(prop_out)
        prop_out = prop_out + prop_out_res  # Residual connection
        
        # Process temporal features with residual connection (unchanged)
        temp_out = F.relu(self.temporal_layer1(temporal_features))
        temp_out = self.temporal_bn1(temp_out)
        temp_out = self.dropout(temp_out)
        temp_out_res = temp_out
        temp_out = F.relu(self.temporal_layer2(temp_out))
        temp_out = self.temporal_bn2(temp_out)
        temp_out = temp_out + temp_out_res  # Residual connection
        
        # Process neighbor histories with LSTM (unchanged)
        seq_len = neighbor_histories.size(2)
        reshaped_histories = neighbor_histories.view(batch_size * self.max_neighbors, seq_len, 1)
        
        lstm_out, (h_n, _) = self.neighbor_lstm(reshaped_histories)
        
        h_forward = h_n[0]
        h_backward = h_n[1]
        h_combined = torch.cat([h_forward, h_backward], dim=1)
        
        h_combined = h_combined.view(batch_size, self.max_neighbors, -1)
        
        # Apply attention to weight the neighbors' contributions (unchanged)
        attention_scores = self.attention(h_combined)
        attention_scores = attention_scores.squeeze(-1)
        
        attention_scores = attention_scores.masked_fill(~neighbor_mask, -1e9)
        attention_weights = F.softmax(attention_scores, dim=1).unsqueeze(-1)
        
        weighted_features = h_combined * attention_weights
        neighbor_context = weighted_features.sum(dim=1)  # [batch, lstm_hidden_dim*2]
        
        # NEW: Project neighbor context to match hidden_dim
        neighbor_context = self.neighbor_projection(neighbor_context)
        
        # Dynamic feature fusion with learned weights
        concatenated_features = torch.cat([spatial_features, prop_out, temp_out, neighbor_context], dim=1)
        fused_features = self.fusion_layer(concatenated_features)
        fused_features = self.fusion_bn(fused_features)
        fused_features = F.relu(fused_features)  # Add non-linearity for better feature extraction
        
        # Final prediction layers
        out = F.relu(self.fc1(fused_features))
        out = self.fc1_bn(out)
        out = self.dropout_heavy(out)
        price_prediction = self.fc2(out)
        
        return price_prediction
    
def build_enhanced_spatial_graph_for_new_listings(train_data, test_data, k=10, feature_weight=0.3):
    """
    Build a graph with edge weights based on both geographic and feature similarity
    for new listings
    """
    # Extract coordinates
    train_coords = train_data[['latitude', 'longitude']].values
    test_coords = test_data[['latitude', 'longitude']].values
    
    print(f"Building enhanced spatial graph with {len(test_coords)} test listings and {k} nearest neighbors...")
    
    # Extract and normalize key features for similarity calculation
    features = ['accommodates', 'bedrooms', 'bathrooms']
    available_features = [f for f in features if f in train_data.columns]
    
    if available_features:
        scaler = StandardScaler()
        train_features = scaler.fit_transform(train_data[available_features].fillna(0))
        test_features = scaler.transform(test_data[available_features].fillna(0))
    else:
        # Fallback if no features are available
        print("Warning: No property features available for similarity calculation")
        train_features = np.ones((len(train_coords), 1))
        test_features = np.ones((len(test_coords), 1))
    
    # Find k nearest neighbors for each test listing
    nn = NearestNeighbors(n_neighbors=min(k, len(train_coords)))
    nn.fit(train_coords)
    distances, indices = nn.kneighbors(test_coords)
    
    # Create edge indices and attributes
    edge_index = []
    edge_attr = []
    
    for test_idx, (neighbor_indices, neighbor_distances) in enumerate(zip(indices, distances)):
        test_feat = test_features[test_idx]
        
        for train_idx, distance in zip(neighbor_indices, neighbor_distances):
            # Calculate feature similarity (cosine similarity)
            train_feat = train_features[train_idx]
            feat_norm_product = np.linalg.norm(test_feat) * np.linalg.norm(train_feat)
            
            if feat_norm_product > 1e-8:  # Avoid division by zero
                feat_sim = np.dot(test_feat, train_feat) / feat_norm_product
            else:
                feat_sim = 0.0
            
            # Normalize distance for better numerical stability
            geo_weight = 1.0 / (distance + 1e-6)
            
            # Combined weight: (1-α) * geo_weight + α * feature_weight
            combined_weight = (1 - feature_weight) * geo_weight + feature_weight * max(0, feat_sim)
            
            # Add edge from test listing to train listing
            edge_index.append([test_idx + len(train_data), train_idx])
            edge_attr.append([combined_weight])
            
            # Add reverse edge
            edge_index.append([train_idx, test_idx + len(train_data)])
            edge_attr.append([combined_weight])
    
    # Add edges between training listings (optional, for smaller datasets)
    if len(train_coords) <= 5000:
        train_nn = NearestNeighbors(n_neighbors=min(5, len(train_coords) - 1))
        train_nn.fit(train_coords)
        train_distances, train_indices = train_nn.kneighbors(train_coords)
        
        for train_idx, (neighbor_indices, neighbor_distances) in enumerate(zip(train_indices, train_distances)):
            for neighbor_idx, distance in zip(neighbor_indices, neighbor_distances):
                if train_idx != neighbor_idx:  # Skip self-loops
                    # Calculate feature similarity
                    train_feat_i = train_features[train_idx]
                    train_feat_j = train_features[neighbor_idx]
                    
                    feat_norm_product = np.linalg.norm(train_feat_i) * np.linalg.norm(train_feat_j)
                    if feat_norm_product > 1e-8:
                        feat_sim = np.dot(train_feat_i, train_feat_j) / feat_norm_product
                    else:
                        feat_sim = 0.0
                    
                    geo_weight = 1.0 / (distance + 1e-6)
                    combined_weight = (1 - feature_weight) * geo_weight + feature_weight * max(0, feat_sim)
                    
                    edge_index.append([train_idx, neighbor_idx])
                    edge_attr.append([combined_weight])
    
    # Convert to tensors with explicit dtype
    edge_index_tensor = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    edge_attr_tensor = torch.tensor(edge_attr, dtype=torch.float32)
    
    print(f"Created graph with {edge_index_tensor.shape[1]} edges")
    
    return edge_index_tensor, edge_attr_tensor

def prepare_data_for_enhanced_neighbor_lstm(train_data, test_data, neighbor_dict, 
                                         spatial_features, temporal_features, property_features,
                                         property_scaler=None, temporal_scaler=None, 
                                         spatial_scaler=None, target_scaler=None, 
                                         max_neighbors=5, seq_length=30, k=10, feature_weight=0.3):
    """
    Prepare data for the EnhancedNeighborBasedLSTM model with spatial GAT
    """
    print("Preparing data for EnhancedNeighborBasedLSTM...")
    
    # Initialize or use provided scalers
    if property_scaler is None:
        property_scaler = StandardScaler()
        property_scaler.fit(train_data[property_features])
    
    if temporal_scaler is None:
        temporal_scaler = StandardScaler()
        temporal_scaler.fit(train_data[temporal_features])
        
    if spatial_scaler is None:
        spatial_scaler = StandardScaler()
        spatial_scaler.fit(train_data[spatial_features])
    
    if target_scaler is None:
        target_scaler = StandardScaler()
        target_scaler.fit(train_data['price'].values.reshape(-1, 1))
    
    # Scale property features
    X_train_property = property_scaler.transform(train_data[property_features]).astype(np.float32)
    X_test_property = property_scaler.transform(test_data[property_features]).astype(np.float32)
    
    # Scale temporal features
    X_train_temporal = temporal_scaler.transform(train_data[temporal_features]).astype(np.float32)
    X_test_temporal = temporal_scaler.transform(test_data[temporal_features]).astype(np.float32)
    
    # Scale spatial features
    X_train_spatial = spatial_scaler.transform(train_data[spatial_features]).astype(np.float32)
    X_test_spatial = spatial_scaler.transform(test_data[spatial_features]).astype(np.float32)
    
    # Prepare neighbor histories for test data
    neighbor_histories, neighbor_masks = prepare_neighbor_data_batch(
        test_data, train_data, neighbor_dict, max_neighbors, seq_length
    )
    
    # Scale the target variable
    y_train = target_scaler.transform(train_data['price'].values.reshape(-1, 1)).flatten().astype(np.float32)
    y_test = target_scaler.transform(test_data['price'].values.reshape(-1, 1)).flatten().astype(np.float32)
    
    # Build spatial graph
    edge_index, edge_attr = build_enhanced_spatial_graph_for_new_listings(
        train_data, test_data, k=k, feature_weight=feature_weight
    )
    
    # Combine spatial features for graph
    X_combined_spatial = np.vstack([X_train_spatial, X_test_spatial])
    
    # Create data object
    data_obj = Data(
        x=torch.FloatTensor(X_combined_spatial),
        edge_index=edge_index,
        edge_attr=edge_attr,
        property_features=torch.FloatTensor(X_test_property),
        temporal_features=torch.FloatTensor(X_test_temporal),
        neighbor_histories=torch.FloatTensor(neighbor_histories),
        neighbor_mask=torch.BoolTensor(neighbor_masks),
        y=torch.FloatTensor(y_test.reshape(-1, 1)),
        train_mask=torch.zeros(len(X_combined_spatial), dtype=torch.bool),
        val_mask=torch.zeros(len(X_combined_spatial), dtype=torch.bool),
    )
    
    # Set masks
    data_obj.train_mask[:len(X_train_spatial)] = True
    data_obj.val_mask[len(X_train_spatial):] = True
    
    print(f"Prepared data with {len(X_combined_spatial)} nodes and {edge_index.shape[1]} edges")
    
    return data_obj, property_scaler, temporal_scaler, spatial_scaler, target_scaler

def train_enhanced_neighbor_lstm_model(train_data, test_data, neighbor_dict, spatial_features,
                                    temporal_features, property_features, max_neighbors=5, 
                                    seq_length=30, lstm_hidden_dim=16, hidden_dim=64,
                                    epochs=50, lr=0.001, device='cuda', k=10, feature_weight=0.3):
    """
    Train the EnhancedNeighborBasedLSTM model
    """
    print("\n===== Training EnhancedNeighborBasedLSTM Model =====")
    print(f"LSTM hidden dimension: {lstm_hidden_dim}, Max neighbors: {max_neighbors}")
    
    # Prepare data
    data_obj, property_scaler, temporal_scaler, spatial_scaler, target_scaler = prepare_data_for_enhanced_neighbor_lstm(
        train_data, test_data, neighbor_dict, spatial_features, temporal_features, property_features,
        max_neighbors=max_neighbors, seq_length=seq_length, k=k, feature_weight=feature_weight
    )
    
    # Move data to device
    data_obj = data_obj.to(device)
    
    # Initialize model
    model = EnhancedNeighborBasedLSTM(
        input_dim=1,  # Single price feature
        spatial_features_dim=len(spatial_features),
        temporal_features_dim=len(temporal_features),
        property_features_dim=len(property_features),
        max_neighbors=max_neighbors,
        lstm_hidden_dim=lstm_hidden_dim,
        hidden_dim=hidden_dim,
        dropout=0.3,
        heads=4,
        edge_dim=1
    ).to(device)
    
    # Initialize optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    criterion = nn.HuberLoss(delta=1.0)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5, verbose=True
    )
    
    # Training loop
    best_val_loss = float('inf')
    best_model_state = None
    patience = 10
    counter = 0
    
    history = {
        'train_loss': [],
        'val_loss': [],
        'val_rmse': [],
        'val_mae': [],
        'lr': []
    }
    
    for epoch in range(epochs):
        # Training
        model.train()
        optimizer.zero_grad()
        
        # Forward pass
        out = model(data_obj)
        
        # Calculate loss
        loss = criterion(out, data_obj.y)
        
        # Backward pass and optimize
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        # Validation
        model.eval()
        with torch.no_grad():
            val_out = model(data_obj)
            val_loss = criterion(val_out, data_obj.y)
            
            # Convert predictions back to original scale for metrics
            val_pred_orig = np.expm1(target_scaler.inverse_transform(val_out.cpu().numpy()))
            val_true_orig = np.expm1(target_scaler.inverse_transform(data_obj.y.cpu().numpy()))
            
            # Calculate metrics
            val_rmse = np.sqrt(mean_squared_error(val_true_orig, val_pred_orig))
            val_mae = mean_absolute_error(val_true_orig, val_pred_orig)
            
        # Store history
        history['train_loss'].append(loss.item())
        history['val_loss'].append(val_loss.item())
        history['val_rmse'].append(val_rmse)
        history['val_mae'].append(val_mae)
        history['lr'].append(optimizer.param_groups[0]['lr'])
        
        # Print progress
        print(f"Epoch {epoch+1}/{epochs} - Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, "
              f"RMSE: {val_rmse:.2f}, MAE: {val_mae:.2f}")
        
        # Learning rate scheduling
        scheduler.step(val_loss)
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict().copy()
            counter = 0
        else:
            counter += 1
        
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break
        
        # Memory management
        if device.type == 'cuda':
            torch.cuda.empty_cache()
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    return model, property_scaler, temporal_scaler, spatial_scaler, target_scaler, history

def run_enhanced_neighbor_lstm_with_rolling_window_cv(train_path, train_ids_path, test_ids_path, neighbor_csv_path,
                                                   output_dir=None, window_size=35, n_splits=5,
                                                   max_neighbors=5, seq_length=30, lstm_hidden_dim=8,
                                                   hidden_dim=64, epochs=50, lr=0.001, sample_size=None):
    """
    Run EnhancedNeighborBasedLSTM model with rolling window cross-validation
    """
    print(f"Processing dataset: {os.path.basename(train_path)}")
    
    # Create output directory if not exists
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
    
    try:
        # Load training data
        print("Loading data...")
        train_data = pd.read_csv(train_path)
        
        # Load listing IDs for train/test split
        print("Loading train/test listing IDs...")
        with open(train_ids_path, 'r') as f:
            train_listing_ids = [int(line.strip()) for line in f.readlines()]
        
        with open(test_ids_path, 'r') as f:
            test_listing_ids = [int(line.strip()) for line in f.readlines()]
        
        # Load neighbor data
        neighbor_dict = load_neighbor_data(neighbor_csv_path)
        
        print(f"Loaded {len(train_listing_ids)} train IDs and {len(test_listing_ids)} test IDs")
        
        # For testing - take only a small sample of listings if specified
        if sample_size:
            print(f"Limiting to {sample_size} random listings for testing")
            np.random.seed(42)
            selected_train = np.random.choice(train_listing_ids, int(sample_size * 0.8), replace=False)
            selected_test = np.random.choice(test_listing_ids, int(sample_size * 0.2), replace=False)
            train_listing_ids = selected_train.tolist()
            test_listing_ids = selected_test.tolist()
        
        # Convert date column to datetime
        train_data['date'] = pd.to_datetime(train_data['date'])
        
        # Filter data to include only dates in the desired range
        start_date = pd.to_datetime('2023-07-08')
        end_date = pd.to_datetime('2024-02-08')
        train_data = train_data[(train_data['date'] >= start_date) & (train_data['date'] <= end_date)]
        
        # Apply log transformation to price
        train_data = apply_price_transformation(train_data)
        
        # Create calculated features
        print("Creating calculated features...")
        train_data = create_calculated_features(train_data)
        
        # Check for NaN values in the dataset and fill them
        nan_columns = train_data.columns[train_data.isna().any()].tolist()
        if nan_columns:
            print(f"Warning: Found NaN values in columns: {nan_columns}")
            print("Filling NaN values with column means/medians")
            
            for col in nan_columns:
                if np.issubdtype(train_data[col].dtype, np.number):
                    # Fill with median for numeric columns
                    train_data[col] = train_data[col].fillna(train_data[col].median())
                else:
                    # For non-numeric, fill with mode
                    train_data[col] = train_data[col].fillna(train_data[col].mode()[0])
        
        # Define feature groups
        spatial_features = [
            'latitude', 'longitude'
        ]
        
        property_features = [
            'accommodates', 'bedrooms', 'bathrooms',
            'amenity_count', 'luxury_score', 'essential_score'
        ]
        
        temporal_features = [
            'DTF_day_of_week', 'DTF_month', 'DTF_is_weekend',
            'DTF_season_sin', 'DTF_season_cos'
        ]
        
        # Ensure all feature lists only contain columns that exist in the dataset
        spatial_features = [f for f in spatial_features if f in train_data.columns]
        property_features = [f for f in property_features if f in train_data.columns]
        temporal_features = [f for f in temporal_features if f in train_data.columns]
        
        print(f"Using {len(spatial_features)} spatial features, {len(property_features)} property features, and {len(temporal_features)} temporal features")
        
        # Get unique dates and ensure they're properly sorted
        unique_dates = sorted(train_data['date'].dt.date.unique())
        
        # Create explicit test periods - last 35 days split into 5 equal parts (7 days each)
        last_35_days = unique_dates[-window_size:]
        
        # Define explicit test periods - each 7 days
        test_periods = []
        for i in range(n_splits):
            start_idx = i * (window_size // n_splits)
            end_idx = start_idx + (window_size // n_splits)
            # Make sure we don't go beyond the available data
            if end_idx <= len(last_35_days):
                test_periods.append((last_35_days[start_idx], last_35_days[end_idx-1]))
        
        # Adjust n_splits if we couldn't create enough test periods
        n_splits = len(test_periods)
        
        print(f"Created {n_splits} test periods:")
        for i, (test_start, test_end) in enumerate(test_periods):
            print(f"  Period {i+1}: {test_start} to {test_end}")
        
        # Storage for results
        cv_results = []
        all_predictions = []
        all_targets = []
        split_metrics = []
        
        # Initialize device
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
        
        # Run time series cross-validation using our explicit test periods
        for i, (test_start, test_end) in enumerate(test_periods):
            print(f"\n===== Split {i+1}/{n_splits} =====")
            
            # Define training period: everything before test_start
            train_end = pd.to_datetime(test_start) - pd.Timedelta(days=1)
            train_end_date = train_end.date()
            
            print(f"Training period: {unique_dates[0]} to {train_end_date}")
            print(f"Testing period: {test_start} to {test_end}")
            
            # Split by date first
            train_date_mask = train_data['date'].dt.date <= train_end_date
            test_date_mask = (train_data['date'].dt.date >= test_start) & (train_data['date'].dt.date <= test_end)
            
            date_filtered_train = train_data[train_date_mask]
            date_filtered_test = train_data[test_date_mask]
            
            # Now further split by listing IDs
            train_id_mask = date_filtered_train['listing_id'].isin(train_listing_ids)
            test_id_mask = date_filtered_test['listing_id'].isin(test_listing_ids)
            
            split_train_data = date_filtered_train[train_id_mask].copy()
            split_test_data = date_filtered_test[test_id_mask].copy()
            
            print(f"Train data: {len(split_train_data)} rows, {len(split_train_data['listing_id'].unique())} unique listings")
            print(f"Test data: {len(split_test_data)} rows, {len(split_test_data['listing_id'].unique())} unique listings")
            
            # Check if we have enough data for this split
            if len(split_train_data) < 100 or len(split_test_data) < 10:
                print(f"Insufficient data for split {i+1}, skipping")
                continue
            
            # Manage memory before training
            gc.collect()
            if device.type == 'cuda':
                torch.cuda.empty_cache()
            
            # Train model for this split
            try:
                print(f"\n----- Training EnhancedNeighborBasedLSTM Model (Split {i+1}) -----")
                
                # Prepare data for enhanced model
                data_obj, property_scaler, temporal_scaler, spatial_scaler, target_scaler = prepare_data_for_enhanced_neighbor_lstm(
                    split_train_data, split_test_data, neighbor_dict, 
                    spatial_features, temporal_features, property_features,
                    max_neighbors=max_neighbors, seq_length=seq_length
                )
                
                # Move data to device
                data_obj = data_obj.to(device)
                
                # Initialize enhanced model
                model = EnhancedNeighborBasedLSTM(
                    input_dim=1,
                    spatial_features_dim=len(spatial_features),
                    temporal_features_dim=len(temporal_features),
                    property_features_dim=len(property_features),
                    max_neighbors=max_neighbors,
                    lstm_hidden_dim=lstm_hidden_dim,
                    hidden_dim=hidden_dim,
                    dropout=0.3,
                    heads=4,
                    edge_dim=1
                ).to(device)
                
                # Initialize optimizer and loss
                optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
                criterion = nn.HuberLoss(delta=1.0)
                scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer, mode='min', factor=0.5, patience=5, verbose=True
                )
                
                # Training loop
                best_val_loss = float('inf')
                best_model_state = None
                patience = 10
                counter = 0
                
                for epoch in range(epochs):
                    # Training
                    model.train()
                    optimizer.zero_grad()
                    
                    # Forward pass
                    out = model(data_obj)
                    
                    # Calculate loss
                    loss = criterion(out, data_obj.y)
                    
                    # Backward pass and optimize
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                    optimizer.step()
                    
                    # Validation
                    model.eval()
                    with torch.no_grad():
                        val_out = model(data_obj)
                        val_loss = criterion(val_out, data_obj.y)
                        
                        # Print progress
                        print(f"Epoch {epoch+1}/{epochs} - Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")
                        
                        # Learning rate scheduling
                        scheduler.step(val_loss)
                        
                        # Early stopping
                        if val_loss < best_val_loss:
                            best_val_loss = val_loss
                            best_model_state = model.state_dict().copy()
                            counter = 0
                        else:
                            counter += 1
                        
                        if counter >= patience:
                            print(f"Early stopping at epoch {epoch+1}")
                            break
                    
                    # Memory management
                    if device.type == 'cuda':
                        torch.cuda.empty_cache()
                
                # Load best model
                if best_model_state is not None:
                    model.load_state_dict(best_model_state)
                
                # Get predictions
                model.eval()
                with torch.no_grad():
                    predictions = model(data_obj)
                    predictions_np = target_scaler.inverse_transform(predictions.cpu().numpy())
                    predictions_orig = np.expm1(predictions_np)
                
                # Get actual test values (original scale)
                test_actuals = split_test_data['original_price'].values if 'original_price' in split_test_data.columns else split_test_data['price'].values
                
                # Evaluate predictions
                metrics = evaluate_gnn_predictions(test_actuals, predictions_orig.flatten(), print_results=True)
                
                print(f"Split {i+1} Results - RMSE: {metrics['rmse']:.4f}, MAE: {metrics['mae']:.4f}, R²: {metrics['r2']:.4f}")
                
                # Store results for this split
                split_results = pd.DataFrame({
                    'split': i,
                    'date': split_test_data['date'],
                    'listing_id': split_test_data['listing_id'],
                    'price': test_actuals,
                    'predicted': predictions_orig.flatten(),
                    'error': test_actuals - predictions_orig.flatten(),
                    'abs_error': np.abs(test_actuals - predictions_orig.flatten()),
                    'pct_error': np.abs((test_actuals - predictions_orig.flatten()) / (test_actuals + 1e-8)) * 100
                })
                
                cv_results.append(split_results)
                all_predictions.extend(predictions_orig.flatten())
                all_targets.extend(test_actuals)
                
                # Save model for this split if output_dir is provided
                if output_dir:
                    model_path = os.path.join(output_dir, f'enhanced_neighbor_lstm_model_split_{i+1}.pt')
                    torch.save(model.state_dict(), model_path)
                    print(f"Model for split {i+1} saved to {model_path}")
                
                # Store metrics for this split
                split_metrics.append({
                    'split': i,
                    'rmse': metrics['rmse'],
                    'mae': metrics['mae'],
                    'r2': metrics['r2'],
                    'mape': metrics['mape'],
                    'n_samples': len(test_actuals)
                })
                
                # Memory management after each split
                del model, property_scaler, temporal_scaler, spatial_scaler, target_scaler
                del predictions_orig, data_obj, split_train_data, split_test_data
                gc.collect()
                if device.type == 'cuda':
                    torch.cuda.empty_cache()
                
            except Exception as e:
                print(f"Error in split {i+1}: {str(e)}")
                import traceback
                traceback.print_exc()
                continue
        
        # Combine all results
        if not cv_results:
            print("No valid splits completed. Check your data and parameters.")
            return None
                
        all_results = pd.concat(cv_results, ignore_index=True)
        
        # Calculate overall metrics
        all_targets_array = np.array(all_targets)
        all_predictions_array = np.array(all_predictions)
        
        overall_metrics = {
            'rmse': np.sqrt(mean_squared_error(all_targets_array, all_predictions_array)),
            'mae': mean_absolute_error(all_targets_array, all_predictions_array),
            'r2': r2_score(all_targets_array, all_predictions_array),
            'mape': np.mean(np.abs((all_targets_array - all_predictions_array) / (all_targets_array + 1e-8))) * 100
        }
        
        # Calculate daily metrics
        all_results['date_str'] = pd.to_datetime(all_results['date']).dt.strftime('%Y-%m-%d')
        
        daily_metrics = []
        for day, group in all_results.groupby('date_str'):
            y_true_day = group['price']
            y_pred_day = group['predicted']
            
            daily_metrics.append({
                'date': day,
                'rmse': np.sqrt(mean_squared_error(y_true_day, y_pred_day)),
                'mae': mean_absolute_error(y_true_day, y_pred_day),
                'r2': r2_score(y_true_day, y_pred_day) if len(set(y_true_day)) > 1 else np.nan,
                'mape': np.mean(np.abs((y_true_day - y_pred_day) / (y_true_day + 1e-8))) * 100,
                'n_samples': len(y_true_day)
            })
        
        daily_metrics_df = pd.DataFrame(daily_metrics)
        daily_metrics_df['date'] = pd.to_datetime(daily_metrics_df['date'])
        daily_metrics_df = daily_metrics_df.sort_values('date')
        
        split_metrics_df = pd.DataFrame(split_metrics)
        
        # Create a results dictionary
        evaluation_results = {
            'overall_metrics': overall_metrics,
            'split_metrics': split_metrics_df,
            'daily_metrics': daily_metrics_df,
            'all_results': all_results,
            'train_listings': len(train_listing_ids),
            'test_listings': len(test_listing_ids)
        }
        
        # Save results if output directory is provided
        if output_dir:
            # Save all results
            results_file = os.path.join(output_dir, 'enhanced_neighbor_lstm_rolling_window_results.csv')
            all_results.to_csv(results_file, index=False)
            print(f"Results saved to {results_file}")
            
            # Save metrics
            metrics_file = os.path.join(output_dir, 'enhanced_neighbor_lstm_rolling_window_metrics.csv')
            daily_metrics_df.to_csv(metrics_file, index=False)
            print(f"Daily metrics saved to {metrics_file}")
            
            # Save summary
            with open(os.path.join(output_dir, 'enhanced_neighbor_lstm_cv_summary.txt'), 'w') as f:
                f.write(f"EnhancedNeighborBasedLSTM Rolling Window CV Model Summary\n")
                f.write(f"=================================\n\n")
                f.write(f"Window size: {window_size} days\n")
                f.write(f"Number of splits: {n_splits}\n")
                f.write(f"Training period: {unique_dates[0]} to {unique_dates[-1]}\n")
                f.write(f"Number of training listings: {len(train_listing_ids)}\n")
                f.write(f"Number of test listings: {len(test_listing_ids)}\n\n")
                f.write(f"LSTM hidden dimension: {lstm_hidden_dim}\n")
                f.write(f"Maximum neighbors per listing: {max_neighbors}\n\n")
                f.write(f"Overall Metrics:\n")
                for k, v in overall_metrics.items():
                    f.write(f"  {k}: {v:.6f}\n")
        
        # Print summary
        print("\n===== EnhancedNeighborBasedLSTM ROLLING WINDOW CV SUMMARY =====")
        print(f"Using {len(train_listing_ids)} listings for training and {len(test_listing_ids)} listings for testing")
        
        print("\n=== Overall Metrics ===")
        print(f"RMSE: {overall_metrics['rmse']:.4f}")
        print(f"MAE: {overall_metrics['mae']:.4f}")
        print(f"R²: {overall_metrics['r2']:.4f}")
        print(f"MAPE: {overall_metrics['mape']:.4f}%")
        
        print("\n=== Split Performance ===")
        print(split_metrics_df[['split', 'rmse', 'mae', 'r2', 'n_samples']].to_string(index=False))
        
        # Return evaluation results
        return evaluation_results
    
    except Exception as e:
        print(f"Error in rolling window CV: {str(e)}")
        import traceback
        traceback.print_exc()
        return None

def run_enhanced_neighbor_lstm_model(train_path, train_ids_path, test_ids_path, neighbor_csv_path, 
                                  output_dir=None, lstm_hidden_dim=8, hidden_dim=64, max_neighbors=5,
                                  seq_length=30, epochs=50, lr=0.001, sample_size=None):
    """
    A minimal wrapper to use the enhanced spatial model instead of the original
    """
    # Use the same initialization as the original function
    try:
        # Create output directory if not exists
        if output_dir:
            os.makedirs(output_dir, exist_ok=True)
        
        # Load training data
        print("Loading data...")
        train_data = pd.read_csv(train_path)
        
        # Load listing IDs for train/test split
        print("Loading train/test listing IDs...")
        with open(train_ids_path, 'r') as f:
            train_listing_ids = [int(line.strip()) for line in f.readlines()]
        
        with open(test_ids_path, 'r') as f:
            test_listing_ids = [int(line.strip()) for line in f.readlines()]
        
        print(f"Loaded {len(train_listing_ids)} train IDs and {len(test_listing_ids)} test IDs")
        
        # Load neighbor data
        neighbor_dict = load_neighbor_data(neighbor_csv_path)
        
        # Sample size handling (unchanged)
        if sample_size:
            print(f"Limiting to {sample_size} random listings for testing")
            np.random.seed(42)
            selected_train = np.random.choice(train_listing_ids, int(sample_size * 0.8), replace=False)
            selected_test = np.random.choice(test_listing_ids, int(sample_size * 0.2), replace=False)
            train_listing_ids = selected_train.tolist()
            test_listing_ids = selected_test.tolist()
        
        # Data preprocessing (unchanged)
        if 'date' in train_data.columns and not pd.api.types.is_datetime64_any_dtype(train_data['date']):
            train_data['date'] = pd.to_datetime(train_data['date'])
        
        # Create calculated features
        print("Creating calculated features...")
        train_data = create_calculated_features(train_data)
        
        # Handle NaN values (unchanged)
        nan_columns = train_data.columns[train_data.isna().any()].tolist()
        if nan_columns:
            print(f"Warning: Found NaN values in columns: {nan_columns}")
            print("Filling NaN values with column means/medians")
            
            for col in nan_columns:
                if np.issubdtype(train_data[col].dtype, np.number):
                    train_data[col] = train_data[col].fillna(train_data[col].median())
                else:
                    train_data[col] = train_data[col].fillna(train_data[col].mode()[0])
        
        # Split data into train and test based on listing IDs (unchanged)
        train_mask = train_data['listing_id'].isin(train_listing_ids)
        test_mask = train_data['listing_id'].isin(test_listing_ids)
        
        train_df = train_data[train_mask].copy()
        test_df = train_data[test_mask].copy()
        
        print(f"Train data: {len(train_df)} rows, {len(train_df['listing_id'].unique())} unique listings")
        print(f"Test data: {len(test_df)} rows, {len(test_df['listing_id'].unique())} unique listings")
        
        # Define feature groups, now including spatial features
        spatial_features = [
            'latitude', 'longitude'
        ]
        
        property_features = [
            'accommodates', 'bedrooms', 'bathrooms',
            'amenity_count', 'luxury_score', 'essential_score', 'bedroom_ratio'
        ]
        
        temporal_features = [
            'DTF_day_of_week', 'DTF_month', 'DTF_is_weekend',
            'DTF_season_sin', 'DTF_season_cos'
        ]
        
        # Ensure all feature lists only contain columns that exist in the dataset
        spatial_features = [f for f in spatial_features if f in train_df.columns]
        property_features = [f for f in property_features if f in train_df.columns]
        temporal_features = [f for f in temporal_features if f in train_df.columns]
        
        print(f"Using {len(spatial_features)} spatial features, {len(property_features)} property features, and {len(temporal_features)} temporal features")
        
        # Apply log transformation to prices
        train_df = apply_price_transformation(train_df)
        test_df = apply_price_transformation(test_df)
        
        # Initialize device
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
        
        # Memory management before training
        gc.collect()
        if device.type == 'cuda':
            torch.cuda.empty_cache()
        
        # Train enhanced model
        print("\n===== Training Enhanced NeighborBasedLSTM Model with Spatial GAT =====")
        model, property_scaler, temporal_scaler, spatial_scaler, target_scaler, history = train_enhanced_neighbor_lstm_model(
            train_df, test_df, neighbor_dict, 
            spatial_features, temporal_features, property_features,
            max_neighbors=max_neighbors, 
            seq_length=seq_length, 
            lstm_hidden_dim=lstm_hidden_dim,
            hidden_dim=hidden_dim, 
            epochs=epochs, 
            lr=lr, 
            device=device
        )
        
        # Plot training history (unchanged)
        plt.figure(figsize=(12, 10))
        plt.subplot(2, 2, 1)
        plt.plot(history['train_loss'], label='Train Loss')
        plt.plot(history['val_loss'], label='Validation Loss')
        plt.title('Training and Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        plt.subplot(2, 2, 2)
        plt.plot(history['val_rmse'], label='Validation RMSE')
        plt.title('Validation RMSE')
        plt.xlabel('Epoch')
        plt.ylabel('RMSE')
        
        plt.subplot(2, 2, 3)
        plt.plot(history['val_mae'], label='Validation MAE')
        plt.title('Validation MAE')
        plt.xlabel('Epoch')
        plt.ylabel('MAE')
        
        plt.subplot(2, 2, 4)
        plt.plot(history['lr'], label='Learning Rate')
        plt.title('Learning Rate')
        plt.xlabel('Epoch')
        plt.ylabel('LR')
        plt.yscale('log')
        
        plt.tight_layout()
        
        if output_dir:
            plt.savefig(os.path.join(output_dir, 'enhanced_neighbor_lstm_training_history.png'))
        plt.show()
        
        # For evaluation, we can use the last predicted values
        data_obj, _, _, _, _ = prepare_data_for_enhanced_neighbor_lstm(
            train_df, test_df, neighbor_dict, 
            spatial_features, temporal_features, property_features,
            property_scaler, temporal_scaler, spatial_scaler, target_scaler,
            max_neighbors=max_neighbors, seq_length=seq_length
        )
        
        # Get predictions
        model.eval()
        with torch.no_grad():
            data_obj = data_obj.to(device)
            predictions = model(data_obj)
            predictions_np = target_scaler.inverse_transform(predictions.cpu().numpy())
            predictions_orig = np.expm1(predictions_np)
        
        # Get actual test values
        test_actuals = test_df['original_price'].values if 'original_price' in test_df.columns else test_df['price'].values
        
        # Evaluate predictions
        test_metrics = evaluate_gnn_predictions(test_actuals, predictions_orig.flatten(), print_results=True)
        
        # Plot results
        plot_gnn_results(test_actuals, predictions_orig.flatten(), history, output_dir)
        
        # Save model and scalers
        if output_dir:
            torch.save(model.state_dict(), os.path.join(output_dir, 'enhanced_neighbor_lstm_model.pt'))
            torch.save({
                'property_scaler': property_scaler,
                'temporal_scaler': temporal_scaler,
                'spatial_scaler': spatial_scaler,
                'target_scaler': target_scaler,
                'max_neighbors': max_neighbors,
                'seq_length': seq_length,
                'lstm_hidden_dim': lstm_hidden_dim
            }, os.path.join(output_dir, 'enhanced_neighbor_lstm_scalers.pt'))
            print(f"Model and scalers saved to {output_dir}")
            
            # Save test predictions
            test_results = pd.DataFrame({
                'listing_id': test_df['listing_id'].values,
                'date': test_df['date'].values,
                'actual': test_actuals,
                'predicted': predictions_orig.flatten(),
                'error': test_actuals - predictions_orig.flatten(),
                'abs_error': np.abs(test_actuals - predictions_orig.flatten()),
                'pct_error': np.abs((test_actuals - predictions_orig.flatten()) / (test_actuals + 1e-8)) * 100
            })
            test_results.to_csv(os.path.join(output_dir, 'enhanced_neighbor_lstm_test_predictions.csv'), index=False)
            print(f"Test predictions saved to {os.path.join(output_dir, 'enhanced_neighbor_lstm_test_predictions.csv')}")
        
        # Return model and metrics
        return model, property_scaler, temporal_scaler, spatial_scaler, target_scaler, test_metrics
    
    except Exception as e:
        print(f"Error in Enhanced NeighborBasedLSTM model training: {str(e)}")
        import traceback
        traceback.print_exc()
        return None

if __name__ == "__main__":
    # Set paths to your data
    train_path = "train_up3.csv"
    train_ids_path = "train_ids.txt"
    test_ids_path = "test_ids.txt"
    neighbor_csv_path = "./neighbor_data/neighbor_dict.csv"  # Path to neighbor information CSV
    
    # Output directory
    output_dir = "./output/enhanced_neighbor_lstm_model"  # Change directory name to reflect enhanced model
    os.makedirs(output_dir, exist_ok=True)
    
    # Model parameters
    lstm_hidden_dim = 32         # Hidden dimension for LSTM (8 or 16)
    hidden_dim = 64             # Hidden dimension size for rest of model
    max_neighbors = 5           # Maximum number of neighbors to consider per listing
    seq_length = 60             # Sequence length for neighbor price history
    epochs = 125                # Maximum number of epochs
    lr = 0.001                  # Learning rate
    
    # Choose between different run modes
    run_mode = "rolling_window"  # Options: "single", "rolling_window", "compare_dims"
    
    try:
        if run_mode == "single":
            # Change this line to use the enhanced model
            result_tuple = run_enhanced_neighbor_lstm_model(
                train_path=train_path,
                train_ids_path=train_ids_path,
                test_ids_path=test_ids_path,
                neighbor_csv_path=neighbor_csv_path,
                output_dir=output_dir,
                lstm_hidden_dim=lstm_hidden_dim,
                hidden_dim=hidden_dim,
                max_neighbors=max_neighbors,
                seq_length=seq_length,
                epochs=epochs,
                lr=lr,
                sample_size=None  # Set to a number for testing or None for full dataset
            )
            
            if result_tuple:
                model, property_scaler, temporal_scaler, spatial_scaler, target_scaler, test_metrics = result_tuple
                print("Enhanced NeighborBasedLSTM model training completed successfully!")
                
                # Print summary of model
                print("\n===== Model Summary =====")
                total_param = sum(p.numel() for p in model.parameters())
                lstm_param = sum(p.numel() for name, p in model.named_parameters() if 'lstm' in name)
                gat_param = sum(p.numel() for name, p in model.named_parameters() if 'gat' in name)
                print(f"Total model parameters: {total_param:,}")
                print(f"LSTM parameters: {lstm_param:,}")
                print(f"GAT parameters: {gat_param:,}")
                print(f"LSTM parameters as % of total: {lstm_param/total_param*100:.2f}%")
                print(f"GAT parameters as % of total: {gat_param/total_param*100:.2f}%")
                
        # [rest of the if-elif blocks remain unchanged]
        elif run_mode == "rolling_window":
            # Run with rolling window cross-validation using enhanced model
            results = run_enhanced_neighbor_lstm_with_rolling_window_cv(
                train_path=train_path,
                train_ids_path=train_ids_path,
                test_ids_path=test_ids_path,
                neighbor_csv_path=neighbor_csv_path,
                output_dir=output_dir,
                window_size=35,  # 5 weeks
                n_splits=5,
                max_neighbors=max_neighbors,
                seq_length=seq_length,
                lstm_hidden_dim=lstm_hidden_dim,
                hidden_dim=hidden_dim,
                epochs=epochs,
                lr=lr,
                sample_size=None  # Set to a number for testing or None for full dataset
                )
            print("EnhancedNeighborBasedLSTM model with rolling window CV completed successfully!")       
    except Exception as e:
        print(f"Error running Enhanced NeighborBasedLSTM model: {str(e)}")
        import traceback
        traceback.print_exc()

Processing dataset: train_up3.csv
Loading data...
Loading train/test listing IDs...
Loading neighbor data from ./neighbor_data/neighbor_dict.csv
Loaded neighbor data for 1573 test listings
Loaded 6291 train IDs and 1573 test IDs
Applying log transformation to price data
Creating calculated features...
Using 2 spatial features, 6 property features, and 5 temporal features
Created 5 test periods:
  Period 1: 2024-01-05 to 2024-01-11
  Period 2: 2024-01-12 to 2024-01-18
  Period 3: 2024-01-19 to 2024-01-25
  Period 4: 2024-01-26 to 2024-02-01
  Period 5: 2024-02-02 to 2024-02-08
Using device: cuda

===== Split 1/5 =====
Training period: 2023-08-07 to 2024-01-04
Testing period: 2024-01-05 to 2024-01-11
Train data: 903142 rows, 6291 unique listings
Test data: 11011 rows, 1573 unique listings

----- Training EnhancedNeighborBasedLSTM Model (Split 1) -----
Preparing data for EnhancedNeighborBasedLSTM...
Preparing neighbor data batch...
Prepared neighbor data for 11011 test instances
Building 



Epoch 1/125 - Loss: 0.5653, Val Loss: 0.3517
Epoch 2/125 - Loss: 0.4492, Val Loss: 0.3449
Epoch 3/125 - Loss: 0.3646, Val Loss: 0.3373
Epoch 4/125 - Loss: 0.3261, Val Loss: 0.3290
Epoch 5/125 - Loss: 0.3120, Val Loss: 0.3209
Epoch 6/125 - Loss: 0.3046, Val Loss: 0.3133
Epoch 7/125 - Loss: 0.2941, Val Loss: 0.3068
Epoch 8/125 - Loss: 0.2841, Val Loss: 0.3013
Epoch 9/125 - Loss: 0.2763, Val Loss: 0.2966
Epoch 10/125 - Loss: 0.2658, Val Loss: 0.2922
Epoch 11/125 - Loss: 0.2653, Val Loss: 0.2874
Epoch 12/125 - Loss: 0.2556, Val Loss: 0.2817
Epoch 13/125 - Loss: 0.2537, Val Loss: 0.2749
Epoch 14/125 - Loss: 0.2529, Val Loss: 0.2669
Epoch 15/125 - Loss: 0.2436, Val Loss: 0.2578
Epoch 16/125 - Loss: 0.2428, Val Loss: 0.2480
Epoch 17/125 - Loss: 0.2428, Val Loss: 0.2383
Epoch 18/125 - Loss: 0.2381, Val Loss: 0.2290
Epoch 19/125 - Loss: 0.2292, Val Loss: 0.2203
Epoch 20/125 - Loss: 0.2299, Val Loss: 0.2127
Epoch 21/125 - Loss: 0.2285, Val Loss: 0.2063
Epoch 22/125 - Loss: 0.2269, Val Loss: 0.20



Epoch 1/125 - Loss: 0.5250, Val Loss: 0.3386
Epoch 2/125 - Loss: 0.4076, Val Loss: 0.3308
Epoch 3/125 - Loss: 0.3449, Val Loss: 0.3231
Epoch 4/125 - Loss: 0.3280, Val Loss: 0.3167
Epoch 5/125 - Loss: 0.3168, Val Loss: 0.3118
Epoch 6/125 - Loss: 0.3028, Val Loss: 0.3081
Epoch 7/125 - Loss: 0.2907, Val Loss: 0.3052
Epoch 8/125 - Loss: 0.2805, Val Loss: 0.3027
Epoch 9/125 - Loss: 0.2761, Val Loss: 0.3000
Epoch 10/125 - Loss: 0.2734, Val Loss: 0.2963
Epoch 11/125 - Loss: 0.2675, Val Loss: 0.2910
Epoch 12/125 - Loss: 0.2648, Val Loss: 0.2844
Epoch 13/125 - Loss: 0.2632, Val Loss: 0.2767
Epoch 14/125 - Loss: 0.2538, Val Loss: 0.2680
Epoch 15/125 - Loss: 0.2502, Val Loss: 0.2585
Epoch 16/125 - Loss: 0.2499, Val Loss: 0.2491
Epoch 17/125 - Loss: 0.2459, Val Loss: 0.2398
Epoch 18/125 - Loss: 0.2415, Val Loss: 0.2309
Epoch 19/125 - Loss: 0.2375, Val Loss: 0.2229
Epoch 20/125 - Loss: 0.2376, Val Loss: 0.2161
Epoch 21/125 - Loss: 0.2323, Val Loss: 0.2103
Epoch 22/125 - Loss: 0.2353, Val Loss: 0.20



Epoch 1/125 - Loss: 0.4735, Val Loss: 0.3490
Epoch 2/125 - Loss: 0.3772, Val Loss: 0.3410
Epoch 3/125 - Loss: 0.3296, Val Loss: 0.3333
Epoch 4/125 - Loss: 0.3066, Val Loss: 0.3265
Epoch 5/125 - Loss: 0.2999, Val Loss: 0.3205
Epoch 6/125 - Loss: 0.2866, Val Loss: 0.3154
Epoch 7/125 - Loss: 0.2799, Val Loss: 0.3110
Epoch 8/125 - Loss: 0.2682, Val Loss: 0.3068
Epoch 9/125 - Loss: 0.2682, Val Loss: 0.3021
Epoch 10/125 - Loss: 0.2534, Val Loss: 0.2971
Epoch 11/125 - Loss: 0.2579, Val Loss: 0.2912
Epoch 12/125 - Loss: 0.2516, Val Loss: 0.2845
Epoch 13/125 - Loss: 0.2513, Val Loss: 0.2769
Epoch 14/125 - Loss: 0.2452, Val Loss: 0.2689
Epoch 15/125 - Loss: 0.2406, Val Loss: 0.2605
Epoch 16/125 - Loss: 0.2391, Val Loss: 0.2519
Epoch 17/125 - Loss: 0.2341, Val Loss: 0.2437
Epoch 18/125 - Loss: 0.2287, Val Loss: 0.2361
Epoch 19/125 - Loss: 0.2280, Val Loss: 0.2292
Epoch 20/125 - Loss: 0.2258, Val Loss: 0.2228
Epoch 21/125 - Loss: 0.2215, Val Loss: 0.2170
Epoch 22/125 - Loss: 0.2243, Val Loss: 0.21



Epoch 1/125 - Loss: 0.4555, Val Loss: 0.3506
Epoch 2/125 - Loss: 0.3724, Val Loss: 0.3441
Epoch 3/125 - Loss: 0.3367, Val Loss: 0.3369
Epoch 4/125 - Loss: 0.3179, Val Loss: 0.3297
Epoch 5/125 - Loss: 0.2975, Val Loss: 0.3231
Epoch 6/125 - Loss: 0.2881, Val Loss: 0.3171
Epoch 7/125 - Loss: 0.2757, Val Loss: 0.3116
Epoch 8/125 - Loss: 0.2645, Val Loss: 0.3063
Epoch 9/125 - Loss: 0.2614, Val Loss: 0.3009
Epoch 10/125 - Loss: 0.2568, Val Loss: 0.2954
Epoch 11/125 - Loss: 0.2511, Val Loss: 0.2894
Epoch 12/125 - Loss: 0.2445, Val Loss: 0.2829
Epoch 13/125 - Loss: 0.2430, Val Loss: 0.2760
Epoch 14/125 - Loss: 0.2369, Val Loss: 0.2690
Epoch 15/125 - Loss: 0.2396, Val Loss: 0.2616
Epoch 16/125 - Loss: 0.2303, Val Loss: 0.2543
Epoch 17/125 - Loss: 0.2290, Val Loss: 0.2471
Epoch 18/125 - Loss: 0.2232, Val Loss: 0.2400
Epoch 19/125 - Loss: 0.2223, Val Loss: 0.2333
Epoch 20/125 - Loss: 0.2191, Val Loss: 0.2267
Epoch 21/125 - Loss: 0.2181, Val Loss: 0.2206
Epoch 22/125 - Loss: 0.2126, Val Loss: 0.21



Epoch 1/125 - Loss: 0.5846, Val Loss: 0.3532
Epoch 2/125 - Loss: 0.4482, Val Loss: 0.3473
Epoch 3/125 - Loss: 0.3725, Val Loss: 0.3410
Epoch 4/125 - Loss: 0.3404, Val Loss: 0.3346
Epoch 5/125 - Loss: 0.3233, Val Loss: 0.3288
Epoch 6/125 - Loss: 0.3104, Val Loss: 0.3237
Epoch 7/125 - Loss: 0.3056, Val Loss: 0.3192
Epoch 8/125 - Loss: 0.2911, Val Loss: 0.3153
Epoch 9/125 - Loss: 0.2727, Val Loss: 0.3117
Epoch 10/125 - Loss: 0.2742, Val Loss: 0.3083
Epoch 11/125 - Loss: 0.2723, Val Loss: 0.3043
Epoch 12/125 - Loss: 0.2703, Val Loss: 0.2992
Epoch 13/125 - Loss: 0.2590, Val Loss: 0.2929
Epoch 14/125 - Loss: 0.2601, Val Loss: 0.2852
Epoch 15/125 - Loss: 0.2504, Val Loss: 0.2763
Epoch 16/125 - Loss: 0.2472, Val Loss: 0.2666
Epoch 17/125 - Loss: 0.2448, Val Loss: 0.2562
Epoch 18/125 - Loss: 0.2383, Val Loss: 0.2454
Epoch 19/125 - Loss: 0.2350, Val Loss: 0.2351
Epoch 20/125 - Loss: 0.2373, Val Loss: 0.2257
Epoch 21/125 - Loss: 0.2322, Val Loss: 0.2175
Epoch 22/125 - Loss: 0.2294, Val Loss: 0.21