In [5]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from collections import deque, namedtuple
import random
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import os
import gc

ImportError: DLL load failed while importing _C: The specified module could not be found.

In [None]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define experience tuple structure
Experience = namedtuple('Experience', ['state', 'action', 'reward', 'next_state', 'done'])

class ReplayBuffer:
    """Experience replay buffer to store and sample agent experiences"""

    def __init__(self, buffer_size, batch_size):
        self.memory = deque(maxlen=buffer_size)
        self.batch_size = batch_size

    def add(self, state, action, reward, next_state, done):
        """Add a new experience to memory"""
        experience = Experience(state, action, reward, next_state, done)
        self.memory.append(experience)

    def sample(self):
        """Randomly sample a batch of experiences"""
        experiences = random.sample(self.memory, k=min(self.batch_size, len(self.memory)))

        states = torch.from_numpy(np.vstack([e.state for e in experiences])).float().to(device)
        actions = torch.from_numpy(np.vstack([e.action for e in experiences])).long().to(device)
        rewards = torch.from_numpy(np.vstack([e.reward for e in experiences])).float().to(device)
        next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences])).float().to(device)
        dones = torch.from_numpy(np.vstack([e.done for e in experiences]).astype(np.uint8)).float().to(device)

        return states, actions, rewards, next_states, dones

    def __len__(self):
        return len(self.memory)

class QNetwork(nn.Module):
    """Deep Q-Network for real estate decision making"""

    def __init__(self, state_size, action_size, hidden_layers=[256, 128, 64]):
        """Initialize network parameters and build model

        Args:
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            hidden_layers (list): List of hidden layer sizes
        """
        super(QNetwork, self).__init__()

        # Input layer
        self.layers = nn.ModuleList([nn.Linear(state_size, hidden_layers[0])])

        # Hidden layers
        for i in range(len(hidden_layers)-1):
            self.layers.append(nn.Linear(hidden_layers[i], hidden_layers[i+1]))

        # Output layer
        self.layers.append(nn.Linear(hidden_layers[-1], action_size))

        # Batch normalization for better training stability
        self.batch_norm_layers = nn.ModuleList([nn.BatchNorm1d(size) for size in hidden_layers])

        # Dropout for regularization
        self.dropout = nn.Dropout(0.2)

    def forward(self, state):
        """Build a network that maps state -> action values"""
        x = state
        for i in range(len(self.layers)-1):
            x = self.layers[i](x)
            x = self.batch_norm_layers[i](x)
            x = F.relu(x)
            x = self.dropout(x)

        return self.layers[-1](x)

class RealEstateAgent:
    """Agent that interacts with and learns from the real estate environment"""

    def __init__(self, state_size, action_size, hidden_layers=[256, 128, 64], seed=42):
        """Initialize an Agent object

        Args:
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            hidden_layers (list): List of hidden layer sizes
            seed (int): Random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size, hidden_layers).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size, hidden_layers).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=5e-4)

        # Replay memory
        self.memory = ReplayBuffer(buffer_size=10000, batch_size=64)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0

        # Hyperparameters
        self.gamma = 0.99    # discount factor
        self.tau = 1e-3      # for soft update of target parameters
        self.update_every = 4  # how often to update the network
        self.batch_size = 64   # minibatch size

        # Exploration parameters
        self.epsilon = 1.0     # exploration rate
        self.epsilon_decay = 0.995  # decay rate
        self.epsilon_min = 0.01     # minimum exploration rate

        # Portfolio tracking
        self.portfolio = {}    # Dictionary to track owned properties
        self.cash = 1000000    # Starting cash (can be adjusted)
        self.net_worth_history = []  # Track net worth over time

        # Holding period preferences based on net worth
        self.net_worth_threshold_1 = 2000000  # First threshold
        self.net_worth_threshold_2 = 5000000  # Second threshold

    def step(self, state, action, reward, next_state, done):
        """Save experience in replay memory, and use random sample to learn"""
        # Save experience in replay memory
        self.memory.add(state, action, reward, next_state, done)

        # Learn every UPDATE_EVERY time steps
        self.t_step = (self.t_step + 1) % self.update_every
        if self.t_step == 0 and len(self.memory) > self.batch_size:
            experiences = self.memory.sample()
            self.learn(experiences)

    def act(self, state, eval_mode=False):
        """Returns actions for given state as per current policy

        Args:
            state (array_like): Current state
            eval_mode (bool): Whether to use evaluation mode (no exploration)
        """

        if isinstance(state, np.ndarray) and state.dtype == np.dtype('O'):
          state = np.vstack(state).astype(np.float32)

        state = torch.from_numpy(state).float().unsqueeze(0).to(device)

        # Epsilon-greedy action selection
        if eval_mode:
            epsilon = 0.0  # No exploration in evaluation mode
        else:
            epsilon = self.epsilon

        self.qnetwork_local.eval()
        with torch.no_grad():
            action_values = self.qnetwork_local(state)
        self.qnetwork_local.train()

        # Epsilon-greedy action selection
        if random.random() > epsilon:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

    def learn(self, experiences):
        """Update value parameters using given batch of experience tuples

        Args:
            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done)
        """
        states, actions, rewards, next_states, dones = experiences

        # Get max predicted Q values (for next states) from target model
        Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)

        # Compute Q targets for current states
        Q_targets = rewards + (self.gamma * Q_targets_next * (1 - dones))

        # Get expected Q values from local model
        Q_expected = self.qnetwork_local(states).gather(1, actions)

        # Compute loss
        loss = F.mse_loss(Q_expected, Q_targets)

        # Minimize the loss
        self.optimizer.zero_grad()
        loss.backward()
        # Clip gradients to prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(self.qnetwork_local.parameters(), 1)
        self.optimizer.step()

        # Soft update target network
        self.soft_update(self.qnetwork_local, self.qnetwork_target)

        # Update epsilon
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

    def soft_update(self, local_model, target_model):
        """Soft update model parameters:
        ÃŽÂ¸_target = Ãâ€ž*ÃŽÂ¸_local + (1 - Ãâ€ž)*ÃŽÂ¸_target

        Args:
            local_model (PyTorch model): source model
            target_model (PyTorch model): target model
            tau (float): interpolation parameter
        """
        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
            target_param.data.copy_(self.tau * local_param.data + (1.0 - self.tau) * target_param.data)

    def update_portfolio(self, property_id, action_type, price, year):
        """Update agent's portfolio based on actions taken

        Args:
            property_id (str): Property ID
            action_type (str): 'buy' or 'sell'
            price (float): Transaction price
            year (int): Transaction year
        """
        if action_type == 'buy':
            if self.cash >= price:
                self.portfolio[property_id] = {
                    'purchase_year': year,
                    'purchase_price': price,
                    'recommended_holding_period': self.get_recommended_holding_period()
                }
                self.cash -= price
                return True
            else:
                return False  # Not enough cash

        elif action_type == 'sell':
            if property_id in self.portfolio:
                self.cash += price
                purchased_price = self.portfolio[property_id]['purchase_price']
                profit = price - purchased_price
                del self.portfolio[property_id]
                return profit
            else:
                return 0  # Property not in portfolio

    def get_recommended_holding_period(self):
        """Determine recommended holding period based on net worth"""
        net_worth = self.calculate_net_worth()

        if net_worth < self.net_worth_threshold_1:
            # Early phase: shorter holding periods
            return random.randint(2, 3)
        elif net_worth < self.net_worth_threshold_2:
            # Middle phase: medium holding periods
            return random.randint(3, 4)
        else:
            # High net worth phase: longer holding periods
            return random.randint(4, 5)

    def calculate_net_worth(self, current_property_values=None):
        """Calculate current net worth including cash and property values"""
        if current_property_values is None:
            # If no property values provided, use purchase prices
            property_value_sum = sum(p['purchase_price'] for p in self.portfolio.values())
        else:
            # Use provided current values
            property_value_sum = sum(current_property_values.get(pid, p['purchase_price'])
                                    for pid, p in self.portfolio.items())

        return self.cash + property_value_sum

    def record_net_worth(self, current_property_values):
        """Record current net worth for tracking"""
        net_worth = self.calculate_net_worth(current_property_values)
        self.net_worth_history.append(net_worth)
        return net_worth

    def display_portfolio_details(self, data_processor):
        """Display detailed information about properties in the portfolio"""
        print("\n========== PORTFOLIO DETAILS ==========")
        print(f"Total Properties: {len(self.portfolio)}")
        print(f"Cash Balance: ${self.cash:,.2f}")

        if not self.portfolio:
            print("No properties currently in portfolio.")
            return

        # Get property listings for location data
        property_listings = data_processor.datasets['property_listings']

        # Set up a table format
        headers = ["ZPID", "Location", "Purchase Price", "Purchase Year", "Holding Period", "Est. Years to Profit"]
        row_format = "{:<12} {:<30} {:<15} {:<15} {:<15} {:<20}"
        print(row_format.format(*headers))
        print("-" * 100)

        # Display each property
        for zpid, details in self.portfolio.items():
            # Get property info
            property_info = property_listings[property_listings['zpid'] == zpid]

            if not property_info.empty:
                # Extract location data
                city = property_info['city'].values[0] if 'city' in property_info.columns else 'Unknown'
                state = property_info['state'].values[0] if 'state' in property_info.columns else 'Unknown'
                location = f"{city}, {state}"

                # Calculate estimated years to profit
                est_years_to_profit = self.estimate_years_to_profit(zpid, details, data_processor)

                # Format and print the row
                row = [
                    zpid,
                    location,
                    f"${details['purchase_price']:,.2f}",
                    details['purchase_year'],
                    details['recommended_holding_period'],
                    f"{est_years_to_profit:.1f} years"
                ]
                print(row_format.format(*row))

        print("=======================================\n")

    def estimate_years_to_profit(self, property_id, property_details, data_processor):
        """Estimate years to profitability based on historical price trends"""
        purchase_price = property_details['purchase_price']
        purchase_year = property_details['purchase_year']

        # Get historical price data for similar properties in the same area
        property_info = data_processor.datasets['property_listings'][
            data_processor.datasets['property_listings']['zpid'] == property_id]

        if property_info.empty:
            # Default to recommended holding period if no property info available
            return property_details['recommended_holding_period']

        # Get price history for this property or similar properties
        price_history = data_processor.datasets['price_history']

        # Try to get this property's specific price history
        property_price_history = price_history[price_history['zpid'] == property_id]

        # If no specific history, try to get similar properties (e.g., same city/state)
        if len(property_price_history) < 2:  # Need at least 2 points to calculate trend
            # Get city and state
            city = property_info['city'].values[0] if 'city' in property_info.columns else None
            state = property_info['state'].values[0] if 'state' in property_info.columns else None

            # Find similar properties
            similar_properties = data_processor.datasets['property_listings'][
                (data_processor.datasets['property_listings']['city'] == city) &
                (data_processor.datasets['property_listings']['state'] == state)
            ]['zpid'].unique()

            # Get price history for similar properties
            property_price_history = price_history[price_history['zpid'].isin(similar_properties)]

        # Calculate average annual appreciation rate from historical data
        if len(property_price_history) >= 2:
            property_price_history['dateOfEvent'] = pd.to_datetime(property_price_history['dateOfEvent'])
            property_price_history = property_price_history.sort_values('dateOfEvent')

            # Group by year and get average price
            yearly_prices = property_price_history.groupby(
                property_price_history['dateOfEvent'].dt.year)['price'].mean()

            if len(yearly_prices) >= 2:
                # Calculate year-over-year growth rates
                yearly_growth_rates = yearly_prices.pct_change().dropna()

                # Get average annual growth rate
                avg_growth_rate = yearly_growth_rates.mean()

                # Ensure we have a valid growth rate
                if pd.notnull(avg_growth_rate) and avg_growth_rate > 0:
                    # Calculate years to profit based on growth rate
                    # Formula: FV = PV * (1 + r)^t â†’ solve for t
                    # t = log(FV/PV) / log(1 + r)
                    # We want FV to be at least 10% more than PV for profit (after costs)
                    target_value = purchase_price * 1.1  # 10% profit
                    years_to_profit = np.log(target_value / purchase_price) / np.log(1 + avg_growth_rate)
                    return max(1, years_to_profit)  # Minimum 1 year

        # Default fallback: use recommended holding period
        return property_details['recommended_holding_period']

class RealEstateDataProcessor:
    """Class to handle real estate data loading, processing and feature engineering"""

    def __init__(self, data_dir='./'):
        """Initialize the data processor

        Args:
            data_dir (str): Directory containing the dataset files
        """
        self.data_dir = data_dir
        self.scaler = None
        self.datasets = {}
        self.property_sets = {}  # For train/val/test/prod splits
        self.last_updated_years = {}  # Track when each property was last updated
        self.property_values_by_year = {}  # Track property values by year

    def load_datasets(self):
        """Load all dataset files"""
        print("Loading datasets...")

        try:
            self.datasets = {}

            self.datasets['mortgage_info'] = reduce_memory_usage(pd.read_csv(os.path.join(self.data_dir, 'listing_mortgage_info.csv')))
            self.datasets['nearby_homes'] = reduce_memory_usage(pd.read_csv(os.path.join(self.data_dir, 'listing_nearby_homes.csv')))
            self.datasets['price_history'] = reduce_memory_usage(pd.read_csv(os.path.join(self.data_dir, 'listing_price_history.csv')))
            self.datasets['schools_info'] = reduce_memory_usage(pd.read_csv(os.path.join(self.data_dir, 'listing_schools_info.csv')))
            self.datasets['subtype'] = reduce_memory_usage(pd.read_csv(os.path.join(self.data_dir, 'listing_subtype.csv')))
            self.datasets['tax_info'] = reduce_memory_usage(pd.read_csv(os.path.join(self.data_dir, 'listing_tax_info.csv')))
            self.datasets['property_listings'] = reduce_memory_usage(pd.read_csv(os.path.join(self.data_dir, 'property_listings.csv')))

            print("All datasets loaded successfully!")
            print(f"Main property dataset shape: {self.datasets['property_listings'].shape}")

            # Extract historical years from price_history
            self.datasets['price_history']['dateOfEvent'] = pd.to_datetime(self.datasets['price_history']['dateOfEvent'])
            price_history_years = self.datasets['price_history']['dateOfEvent'].dt.year

            # Filter for years 2000 and later
            valid_years = sorted(price_history_years.unique())
            valid_years = [year for year in valid_years if year >= 2000]

            # Extract current property years from lastUpdated
            self.datasets['property_listings']['year'] = pd.to_datetime(
                self.datasets['property_listings']['lastUpdated']).dt.year

            # Create a mapping of zpid to all its available years from both datasets
            property_years = {}

            # First add years from price_history (historical data)
            for zpid, year in zip(self.datasets['price_history']['zpid'],
                                  self.datasets['price_history']['dateOfEvent'].dt.year):
                if year >= 2000:  # Only include years from 2000 onwards
                    if zpid not in property_years:
                        property_years[zpid] = set()
                    property_years[zpid].add(year)

            # Then add current years from property_listings
            for zpid, year in zip(self.datasets['property_listings']['zpid'],
                                  self.datasets['property_listings']['year']):
                if zpid in property_years:  # Only add if property exists in historical data
                    property_years[zpid].add(year)

            # Use all years from 2000 to present
            self.years = sorted([year for year in valid_years if year >= 2000])

            print(f"Dataset spans years: {min(self.years)} to {max(self.years)}")
            print(f"Total years available: {len(self.years)}")

            return True

        except FileNotFoundError as e:
            print(f"Error loading datasets: {e}")
            print("Please ensure all required CSV files are in the specified directory.")
            return False


    def split_data_by_properties(self):
        """Split data by properties while maintaining the full timeline
        Uses property-based split instead of time-based split.
        Only considers 30% of properties for all processing.
        """
        if not self.datasets:
            print("Datasets not loaded. Call load_datasets() first.")
            return

        # Get all unique property IDs
        all_properties = self.datasets['property_listings']['zpid'].unique()
        print(f"Total unique properties: {len(all_properties)}")

        # Shuffle the properties for random assignment
        np.random.shuffle(all_properties)
        
        # Select only 30% of properties for consideration
        considered_count = int(len(all_properties) * 0.3)
        considered_properties = all_properties[:considered_count]
        print(f"Considering only {considered_count} properties ({considered_count/len(all_properties):.1%} of total)")
        
        # Split the considered 30% into train (60%), validation (15%), test (15%), and production (10%)
        train_size = int(len(considered_properties) * 0.15)
        val_size = int(len(considered_properties) * 0.15)
        test_size = int(len(considered_properties) * 0.15)

        self.property_sets = {
            'train': considered_properties[:train_size],
            'val': considered_properties[train_size:train_size + val_size],
            'test': considered_properties[train_size + val_size:train_size + val_size + test_size],
            'prod': considered_properties[train_size + val_size + test_size:]
        }

        # Print statistics
        for split, props in self.property_sets.items():
            print(f"{split} set: {len(props)} properties ({len(props)/len(all_properties):.1%} of total)")

        return self.property_sets


    def engineer_features(self):
        """Engineer features with focus on historical price data integration"""
        if not self.datasets or not self.property_sets:
            print("Datasets not loaded or not split. Call load_datasets() and split_data_by_properties() first.")
            return

        print("Engineering features with historical data integration...")

        # Get all properties from our splits (the 30% considered)
        considered_properties = np.concatenate(list(self.property_sets.values()))
        
        # 1. Start with price history to create a base DataFrame with historical data
        price_history = self.datasets['price_history'].copy()
        # Filter to only include our considered properties
        price_history = price_history[price_history['zpid'].isin(considered_properties)]
        
        price_history['dateOfEvent'] = pd.to_datetime(price_history['dateOfEvent'])
        price_history['year'] = price_history['dateOfEvent'].dt.year

        # Filter for years 2000 and later
        price_history = price_history[price_history['year'] >= 2000]

        # Group by zpid and year to get price per year (if multiple events per year)
        yearly_prices = price_history.groupby(['zpid', 'year'])['price'].mean().reset_index()

        # Calculate year-over-year price changes
        yearly_prices = yearly_prices.sort_values(['zpid', 'year'])
        yearly_prices['prev_price'] = yearly_prices.groupby('zpid')['price'].shift(1)
        yearly_prices['price_yoy_change'] = (yearly_prices['price'] - yearly_prices['prev_price']) / yearly_prices['prev_price']

        # 2. Get property information from property_listings
        properties = self.datasets['property_listings'].copy()
        # Filter to only include considered properties
        properties = properties[properties['zpid'].isin(considered_properties)]
        
        properties['lastUpdated'] = pd.to_datetime(properties['lastUpdated'])
        properties['year_updated'] = properties['lastUpdated'].dt.year

        # 3. Create a base DataFrame with all property-year combinations
        # Use only our considered properties
        all_properties = considered_properties
        
        # Get all years from 2000 to present
        all_years = list(range(2000, max(properties['year_updated'].max(),
                                        price_history['year'].max()) + 1))

        # Create a cross-product of properties and years
        property_years = []
        for prop in all_properties:
            for year in all_years:
                property_years.append({'zpid': prop, 'year': year})

        # Create base DataFrame
        df = pd.DataFrame(property_years)

        # 4. Merge property information
        # First get the latest property info
        latest_properties = properties.sort_values('lastUpdated').groupby('zpid').last().reset_index()

        # Merge with base DataFrame
        df = pd.merge(df, latest_properties[['zpid', 'price', 'livingArea', 'bedrooms', 'bathrooms',
                                          'yearBuilt', 'homeType', 'homeStatus', 'state',
                                          'pageViewCount', 'favoriteCount']],
                    on='zpid', how='left')

        # 5. Merge historical prices
        df = pd.merge(df, yearly_prices[['zpid', 'year', 'price_yoy_change']],
                    on=['zpid', 'year'], how='left')

        # For historical years, update property price using the price from price_history
        historical_prices = pd.merge(df[['zpid', 'year']],
                                    yearly_prices[['zpid', 'year', 'price']],
                                    on=['zpid', 'year'], how='left')

        # Update prices where historical data exists
        mask = historical_prices['price'].notna()
        if mask.any():
            df.loc[mask, 'price'] = historical_prices.loc[mask, 'price'].values

        # 6. Add basic features
        df.loc[:, 'price_per_sqft'] = df['price'] / df['livingArea']
        df.loc[:, 'age'] = df['year'] - df['yearBuilt']
        df.loc[:, 'bed_bath_ratio'] = df['bedrooms'] / df['bathrooms']

        # 7. Add tax and mortgage features
        tax_info = self.datasets['tax_info'][['zpid', 'taxPaid', 'valueIncreaseRate']].copy()
        # Filter to only include considered properties
        tax_info = tax_info[tax_info['zpid'].isin(considered_properties)]
        df = pd.merge(df, tax_info, on='zpid', how='left')

        mortgage_info = self.datasets['mortgage_info'][['zpid', 'rate']].copy()
        # Filter to only include considered properties
        mortgage_info = mortgage_info[mortgage_info['zpid'].isin(considered_properties)]
        df = pd.merge(df, mortgage_info, on='zpid', how='left')

        # 8. Add school ratings
        schools = self.datasets['schools_info'][['zpid', 'schoolRating']].copy()
        # Filter to only include considered properties
        schools = schools[schools['zpid'].isin(considered_properties)]
        school_ratings = schools.groupby('zpid')['schoolRating'].mean().reset_index()
        school_ratings.rename(columns={'schoolRating': 'avg_school_rating'}, inplace=True)
        df = pd.merge(df, school_ratings, on='zpid', how='left')

        # 9. Add nearby home comparison
        nearby = self.datasets['nearby_homes'][['zpid', 'priceComp']].copy()
        # Filter to only include considered properties
        nearby = nearby[nearby['zpid'].isin(considered_properties)]
        nearby_avg_price = nearby.groupby('zpid')['priceComp'].mean().reset_index()
        nearby_avg_price.rename(columns={'priceComp': 'nearby_avg_price'}, inplace=True)
        df = pd.merge(df, nearby_avg_price, on='zpid', how='left')
        df.loc[:, 'price_vs_nearby'] = df['price'] / df['nearby_avg_price']
        
        # 10. One-hot encoding
        for col in ['homeType', 'homeStatus', 'state']:
            if col in df.columns:
                top_values = df[col].value_counts().nlargest(5).index
                for val in top_values:
                    df.loc[:, f'{col}_{val}'] = (df[col] == val).astype('int8')

        # Drop original categorical columns to save memory
        df = df.drop(['homeType', 'homeStatus', 'state'], axis=1, errors='ignore')

        # 11. Handle missing values
        df.fillna({
            'price_yoy_change': 0,
            'valueIncreaseRate': df['valueIncreaseRate'].median(),
            'taxPaid': df['taxPaid'].median(),
            'rate': 0.05,  # Default mortgage rate
            'avg_school_rating': df['avg_school_rating'].median(),
            'nearby_avg_price': df['nearby_avg_price'].median(),
            'price_vs_nearby': 1,
            'pageViewCount': 0,
            'favoriteCount': 0
        }, inplace=True)

        # 12. Store property values by year for reward calculation
        for year in df['year'].unique():
            year_data = df[df['year'] == year][['zpid', 'price']]
            self.property_values_by_year[year] = dict(zip(year_data['zpid'].values, year_data['price'].values))

        # 13. Final cleanup
        df.replace([np.inf, -np.inf], np.finfo('float32').max, inplace=True)

        # Update main dataframe
        self.datasets['processed_data'] = df

        print(f"Feature engineering complete. Final dataframe shape: {df.shape}")
        print(f"Years available in processed data: {sorted(df['year'].unique())}")
        return df


    def prepare_state_features(self):
        """Prepare the state features for the DQN model"""
        if 'processed_data' not in self.datasets:
            print("Features not engineered. Call engineer_features() first.")
            return

        df = self.datasets['processed_data']

        # Check for and add default values for missing columns
        missing_columns = {
            'rate': 0.05,  # Default mortgage rate
            'pageViewCount': 0,  # Default page views
            'favoriteCount': 0   # Default favorites
        }

        for col, default_val in missing_columns.items():
            if col not in df.columns:
                print(f"Warning: Column '{col}' not found, adding with default value {default_val}")
                df[col] = default_val

        # Now proceed with feature selection
        feature_columns = [
            'price', 'livingArea', 'bedrooms', 'bathrooms', 'yearBuilt', 'age',
            'price_per_sqft', 'bed_bath_ratio', 'price_yoy_change', 'taxPaid',
            'valueIncreaseRate', 'rate', 'avg_school_rating', 'nearby_avg_price',
            'price_vs_nearby', 'pageViewCount', 'favoriteCount'
        ]
        # Add the one-hot encoded columns for homeType, homeStatus, and state
        one_hot_columns = [col for col in df.columns if col.startswith(('homeType_', 'homeStatus_', 'state_'))]
        feature_columns.extend(one_hot_columns)

        # Extract just the feature columns
        features_df = df[feature_columns + ['zpid', 'year']]

        # Scale the numerical features
        numerical_features = [
            'price', 'livingArea', 'bedrooms', 'bathrooms', 'age',
            'price_per_sqft', 'bed_bath_ratio', 'price_yoy_change', 'taxPaid',
            'valueIncreaseRate', 'rate', 'avg_school_rating', 'nearby_avg_price',
            'price_vs_nearby', 'pageViewCount', 'favoriteCount'
        ]

        # Fit scaler on training data only to prevent data leakage
        train_features = features_df[features_df['zpid'].isin(self.property_sets['train'])]
        self.scaler = StandardScaler()
        self.scaler.fit(train_features[numerical_features])

        # Transform all features
        features_df[numerical_features] = self.scaler.transform(features_df[numerical_features])

        # Store the prepared features
        self.datasets['state_features'] = features_df

        # Return the feature dimension for the DQN model
        self.state_size = len(feature_columns)
        print(f"State features prepared. State dimension: {self.state_size}")

        return features_df, self.state_size

    def get_properties_for_year(self, year, split):
        """Get properties available in a specific year for a specific data split"""
        if 'state_features' not in self.datasets:
            print("State features not prepared. Call prepare_state_features() first.")
            return None

        # Get properties for the given split
        split_properties = self.property_sets[split]

        # Get data for the given year filtered by split properties
        year_data = self.datasets['state_features'][
            (self.datasets['state_features']['year'] == year) &
            (self.datasets['state_features']['zpid'].isin(split_properties))
        ]

        return year_data

    def get_property_state(self, property_id, year):
        """Get the state representation for a specific property and year"""
        if 'state_features' not in self.datasets:
            print("State features not prepared. Call prepare_state_features() first.")
            return None

        # Get data for the specific property and year
        property_data = self.datasets['state_features'][
            (self.datasets['state_features']['zpid'] == property_id) &
            (self.datasets['state_features']['year'] == year)
        ]

        if property_data.empty:
            return None

        # Return state features (excluding zpid and year)
        state_features = property_data.drop(['zpid', 'year'], axis=1).values[0]

        return state_features

    def get_property_value(self, property_id, year):
        """Get the value of a property in a specific year"""
        return self.property_values_by_year.get(year, {}).get(property_id, None)


def reduce_memory_usage(df):
    """Reduces memory usage of a dataframe by downcasting numeric types."""
    start_mem = df.memory_usage().sum() / 1024**2
    print(f'Memory usage before optimization: {start_mem:.2f} MB')

    for col in df.columns:
        col_type = df[col].dtype

        # Skip categorical columns or convert them to ordered
        if pd.api.types.is_categorical_dtype(col_type):
            # Option 1: Skip categorical columns entirely
            continue

            # Option 2: Make categorical columns ordered before operations
            # df[col] = df[col].cat.as_ordered()

        elif col_type != object:
            # Use safe min/max operations with explicit null handling
            if df[col].isna().any():  # Check for nulls first
                non_null_values = df[col].dropna()
                if len(non_null_values) > 0:  # Make sure we have values after dropping nulls
                    c_min = non_null_values.min()
                    c_max = non_null_values.max()
                else:
                    # If all values are null, use safe defaults
                    c_min = 0
                    c_max = 0
            else:
                # No nulls, proceed normally
                c_min = df[col].min()
                c_max = df[col].max()

            # Integer downcasting
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)

            # Float downcasting
            elif str(col_type)[:5] == 'float':
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)

        # For object columns that we want to convert to category
        elif df[col].nunique() < df.shape[0] * 0.5:
            # Handle null values in categorical data
            if df[col].isna().any():
                # Convert to object type first, fill nulls with placeholder, then convert to category
                df[col] = df[col].astype(object).fillna('Unknown').astype('category')
            else:
                # No nulls, safe to convert directly
                df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print(f'Memory usage after optimization: {end_mem:.2f} MB')
    print(f'Reduced by {100 * (start_mem - end_mem) / start_mem:.1f}%')

    return df

def train_agent(agent, data_processor, split='train', num_epochs=5):
    """Train the agent using the specified data split"""
    print(f"Training agent on {split} split for {num_epochs} epochs...")

    years = data_processor.years
    returns_history = []

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")

        # Reset agent's portfolio at the beginning of each epoch
        agent.portfolio = {}
        agent.cash = 1000000  # Reset starting cash
        agent.net_worth_history = []

        total_profit = 0

        # For each year in chronological order
        for i, year in enumerate(tqdm(years, desc=f"Processing years")):
            # Get available properties for this year and split
            year_data = data_processor.get_properties_for_year(year, split)

            if year_data is None or year_data.empty:
                print(f"No data available for year {year}, split {split}")
                continue

            # Track properties acted on this year to avoid duplicates
            acted_properties = set()

            # First, check if any properties in the portfolio should be sold
            properties_to_sell = []
            for prop_id, details in agent.portfolio.items():
                holding_years = year - details['purchase_year']

                # Check if we've held the property for the recommended period
                if holding_years >= details['recommended_holding_period']:
                    properties_to_sell.append(prop_id)

            # Sell properties that have reached their holding period
            for prop_id in properties_to_sell:
                current_value = data_processor.get_property_value(prop_id, year)

                if current_value is not None:
                    # Get property state for the selling decision
                    prop_state = data_processor.get_property_state(prop_id, year)

                    if prop_state is not None:
                        # Get agent's action (0: hold, 1: sell)
                        action = agent.act(prop_state)

                        if action == 1:  # Sell
                            profit = agent.update_portfolio(prop_id, 'sell', current_value, year)
                            total_profit += profit

                            # Calculate reward (profit as percentage of purchase price)
                            purchase_price = agent.portfolio.get(prop_id, {}).get('purchase_price', current_value)
                            reward = profit / purchase_price if purchase_price > 0 else 0

                            # Get the next state (could be None at the end of data)
                            next_state = prop_state  # Simplified; in real scenario would be updated state

                            # Determine if this is the end of the episode
                            done = (i == len(years) - 1)

                            # Agent learning step
                            agent.step(prop_state, np.array([action]), reward, next_state, done)

                            acted_properties.add(prop_id)

            # Then, consider buying new properties
            for _, row in year_data.iterrows():
                prop_id = row['zpid']

                # Skip if we already acted on this property this year
                if prop_id in acted_properties:
                    continue

                # Get property state
                prop_state = data_processor.get_property_state(prop_id, year)

                if prop_state is not None:
                    # Get agent's action (0: hold/don't buy, 1: buy)
                    action = agent.act(prop_state)

                    if action == 1:  # Buy
                        property_price = data_processor.get_property_value(prop_id, year)

                        if property_price is not None and agent.cash >= property_price:
                            success = agent.update_portfolio(prop_id, 'buy', property_price, year)

                            # Calculate immediate reward (small negative for buying to account for transaction costs)
                            reward = -0.01  # Small negative reward for buying

                            # Get the next state (could be None at the end of data)
                            next_state = prop_state  # Simplified

                            # Determine if this is the end of the episode
                            done = (i == len(years) - 1)

                            # Agent learning step
                            agent.step(prop_state, np.array([[action]]), reward, next_state, done)

                            acted_properties.add(prop_id)

            # Update agent's net worth at the end of each year
            current_property_values = {prop_id: data_processor.get_property_value(prop_id, year)
                                      for prop_id in agent.portfolio.keys()}
            current_property_values = {k: v for k, v in current_property_values.items() if v is not None}

            net_worth = agent.record_net_worth(current_property_values)
            print(f"Year {year} - Net Worth: ${net_worth:,.2f}, Cash: ${agent.cash:,.2f}, Properties: {len(agent.portfolio)}")

        epoch_return = agent.net_worth_history[-1] - 1000000  # Final net worth minus initial cash
        returns_history.append(epoch_return)
        print(f"Epoch {epoch+1} Return: ${epoch_return:,.2f}")

        # Display detailed portfolio information after each epoch
        agent.display_portfolio_details(data_processor)

    return returns_history

def evaluate_agent(agent, data_processor, split='test'):
    """Evaluate the agent on the specified data split

    Args:
        agent (RealEstateAgent): The trained agent
        data_processor (RealEstateDataProcessor): The data processor
        split (str): The data split to use ('val', 'test', 'prod')
    """
    print(f"Evaluating agent on {split} split...")

    years = data_processor.years

    # Reset agent's portfolio for evaluation
    agent.portfolio = {}
    agent.cash = 1000000  # Reset starting cash
    agent.net_worth_history = []

    total_profit = 0
    transactions = []

    # For each year in chronological order
    for i, year in enumerate(tqdm(years, desc=f"Evaluating years")):
        # Get available properties for this year and split
        year_data = data_processor.get_properties_for_year(year, split)

        if year_data is None or year_data.empty:
            print(f"No data available for year {year}, split {split}")
            continue

        # Track properties acted on this year to avoid duplicates
        acted_properties = set()

        # First, check if any properties in the portfolio should be sold
        properties_to_sell = []
        for prop_id, details in agent.portfolio.items():
            holding_years = year - details['purchase_year']

            # Check if we've held the property for the recommended period
            if holding_years >= details['recommended_holding_period']:
                properties_to_sell.append(prop_id)

        # Sell properties that have reached their holding period
        for prop_id in properties_to_sell:
            current_value = data_processor.get_property_value(prop_id, year)

            if current_value is not None:
                # Get property state for the selling decision
                prop_state = data_processor.get_property_state(prop_id, year)

                if prop_state is not None:
                    # Get agent's action (0: hold, 1: sell) - in eval mode
                    action = agent.act(prop_state, eval_mode=True)

                    if action == 1:  # Sell
                        purchase_price = agent.portfolio[prop_id]['purchase_price']
                        purchase_year = agent.portfolio[prop_id]['purchase_year']
                        profit = agent.update_portfolio(prop_id, 'sell', current_value, year)
                        total_profit += profit

                        # Record the transaction
                        transactions.append({
                            'year': year,
                            'action': 'sell',
                            'property_id': prop_id,
                            'price': current_value,
                            'profit': profit,
                            'holding_period': year - purchase_year
                        })

                        acted_properties.add(prop_id)

        # Then, consider buying new properties
        for _, row in year_data.iterrows():
            prop_id = row['zpid']

            # Skip if we already acted on this property this year
            if prop_id in acted_properties:
                continue

            # Get property state
            prop_state = data_processor.get_property_state(prop_id, year)

            if prop_state is not None:
                # Get agent's action (0: hold/don't buy, 1: buy) - in eval mode
                action = agent.act(prop_state, eval_mode=True)

                if action == 1:  # Buy
                    property_price = data_processor.get_property_value(prop_id, year)

                    if property_price is not None and agent.cash >= property_price:
                        success = agent.update_portfolio(prop_id, 'buy', property_price, year)

                        if success:
                            # Record the transaction
                            transactions.append({
                                'year': year,
                                'action': 'buy',
                                'property_id': prop_id,
                                'price': property_price
                            })

                            acted_properties.add(prop_id)

        # Update agent's net worth at the end of each year
        current_property_values = {prop_id: data_processor.get_property_value(prop_id, year)
                                  for prop_id in agent.portfolio.keys()}
        current_property_values = {k: v for k, v in current_property_values.items() if v is not None}

        net_worth = agent.record_net_worth(current_property_values)
        print(f"Year {year} - Net Worth: ${net_worth:,.2f}, Cash: ${agent.cash:,.2f}, Properties: {len(agent.portfolio)}")

    # Calculate final return
    final_return = agent.net_worth_history[-1] - 1000000  # Final net worth minus initial cash
    roi = (final_return / 1000000) * 100  # ROI as percentage

    print(f"\nEvaluation Results on {split} split:")
    print(f"Final Net Worth: ${agent.net_worth_history[-1]:,.2f}")
    print(f"Total Return: ${final_return:,.2f}")
    print(f"ROI: {roi:.2f}%")
    print(f"Total Transactions: {len(transactions)}")

    # Convert transactions to DataFrame for analysis
    if transactions:
        transactions_df = pd.DataFrame(transactions)
        buy_count = len(transactions_df[transactions_df['action'] == 'buy'])
        sell_count = len(transactions_df[transactions_df['action'] == 'sell'])
        avg_profit = transactions_df[transactions_df['action'] == 'sell']['profit'].mean()
        avg_holding = transactions_df[transactions_df['action'] == 'sell']['holding_period'].mean()

        print(f"Buy Transactions: {buy_count}")
        print(f"Sell Transactions: {sell_count}")
        print(f"Average Profit per Sale: ${avg_profit:,.2f}")
        print(f"Average Holding Period: {avg_holding:.1f} years")

    # Plot net worth over time
    plt.figure(figsize=(12, 6))
    plt.plot(years[:len(agent.net_worth_history)], agent.net_worth_history)
    plt.title(f'Agent Net Worth Over Time ({split} split)')
    plt.xlabel('Year')
    plt.ylabel('Net Worth ($)')
    plt.grid(True)
    plt.savefig(f'agent_net_worth_{split}.png')
    plt.show()

    return {
        'final_net_worth': agent.net_worth_history[-1],
        'total_return': final_return,
        'roi': roi,
        'transactions': transactions
    }

def main():
    """Main function to orchestrate the complete workflow"""
    # Initialize data processor
    data_processor = RealEstateDataProcessor(data_dir='./')

    # Load and process data
    if not data_processor.load_datasets():
        print("Failed to load datasets. Exiting.")
        return

    # Split data by properties
    data_processor.split_data_by_properties()

    # Engineer features
    data_processor.engineer_features()
    print("TWAS SPLIT")
    # Prepare state features
    _, state_size = data_processor.prepare_state_features()
    print("TWAS PREPARED")
    # Define action space: 0 = Hold/Don't Buy, 1 = Buy/Sell
    action_size = 2

    # Initialize the agent
    agent = RealEstateAgent(state_size=state_size, action_size=action_size)
    print("HEEEEEEEEEEEERE")
    # Train the agent
    train_returns = train_agent(agent, data_processor, split='train', num_epochs=5)

    # Evaluate on validation set
    val_results = evaluate_agent(agent, data_processor, split='val')

    # If validation results are satisfactory, evaluate on test set
    test_results = evaluate_agent(agent, data_processor, split='test')

    # Finally, evaluate on production set
    prod_results = evaluate_agent(agent, data_processor, split='prod')

    # Compare results across splits
    results = {
        'val': val_results,
        'test': test_results,
        'prod': prod_results
    }

    # Print comparison
    print("\nPerformance Comparison:")
    print("Split\tROI\tFinal Net Worth")
    for split, res in results.items():
        print(f"{split}\t{res['roi']:.2f}%\t${res['final_net_worth']:,.2f}")

    print("\nTraining complete!")

if __name__ == "__main__":
    main()
