## Model Evaluation and Analysis Results

###  Overview
This notebook generates the key quantitative and qualitative assets required for the **Final Project Report**. It serves two primary critical functions to validate the performance of the Multimodal Real Estate Valuation Model:

#### **1. Tabular model vs (Tabular+images) model**
**Goal:** To prove that adding satellite imagery improves prediction accuracy.
* We train a standard **Random Forest Regressor** on *only* the tabular data (bedrooms, sqft, etc.).
* We calculate the **RMSE (Root Mean Squared Error)** of this "Tabular-Only" model, to provide a quantitative analysis.
* **Outcome:** This provides a "Control Group" score to compare against our "Multimodal" score.

#### **2. Explainability Analysis (Qualitative Analysis)**
**Goal:** To visualize *what* the AI is looking at in the satellite images.
* We load the trained PyTorch model (`.pth`) from the `models/` directory.
* We use **Grad-CAM (Gradient-weighted Class Activation Mapping)** to generate a heatmap overlay, to see it's influence in price prediction.
* **Outcome:** A visual map showing high-value features (e.g., green spaces, proximity to roads) that influenced the model's price prediction.

---
**Outputs Generated:**
* `gradcam_analysis.png`: Heatmap image for the report.
* Console Output: RMSE scores for comparison.

In [None]:
# IMPORTS
import os
import sys
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import cv2  # OpenCV for image processing
from PIL import Image

# Machine Learning Imports
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Deep Learning Imports
import torch
import torch.nn as nn
from torchvision import models, transforms

# Configuration (Relative Paths)
# We use "../" because this notebook is inside the 'reports' folder
DATA_PATH = "../data/train(1).xlsx"
IMG_DIR = "../data/images"
MODEL_FOLDER = "../models"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Imports ready.")
print(f"Running on: {DEVICE}")

Imports ready.
Running on: cuda


In [26]:
# LOAD DATA & DEFINE FEATURES

# Load Excel Data
if os.path.exists(DATA_PATH):
    df = pd.read_excel(DATA_PATH)
    print(f"Data Loaded: {len(df)} rows found.")
else: # In case of possible errors
    raise FileNotFoundError(f"Could not find {DATA_PATH}. Check your folder structure!")

# Define Features
# (These must match exactly what we used during training)
features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
            'waterfront', 'view', 'condition', 'grade', 'sqft_above',
            'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
            'lat', 'long', 'sqft_living15']
target = 'price'

# Split Data (Random State 42 ensures we compare apples to apples)
# Uses: sklearn.model_selection.train_test_split
X = df[features]
y = df[target]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data Split Complete.")
print(f"Training Set: {len(X_train)} | Validation Set: {len(X_val)}")

Data Loaded: 16209 rows found.
Data Split Complete.
Training Set: 12967 | Validation Set: 3242


In [27]:
# BASELINE EVALUATION (TABULAR ONLY)
# We use a Random Forest Regressor as the control baseline.
# This shows the performance of numerical features only..

print("Evaluating Tabular Baseline (Random Forest)...")

# Train the Model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on Validation Set
rf_preds = rf_model.predict(X_val)

# Calculate Error Score
rf_rmse = np.sqrt(mean_squared_error(y_val, rf_preds))

print(f"Baseline Training Complete.")
print(f"   >> Baseline RMSE: ${rf_rmse:,.2f}")

Evaluating Tabular Baseline (Random Forest)...
Baseline Training Complete.
   >> Baseline RMSE: $130,312.26


In [28]:
# ==========================================
# BLOCK 3B: MULTIMODAL EVALUATION (FINAL & CORRECTED)
# ==========================================

import os
import torch
import torch.nn as nn
import numpy as np
from torchvision import models, transforms
from PIL import Image
from sklearn.metrics import mean_squared_error

print("üß† Step 2: Evaluating Multimodal Network (CNN + MLP)...")

# 0. Safety Setup (Ensures variables exist)
target_column = 'price' # Defined just in case it wasn't earlier

# -------------------------------------------------
# 1. Model Architecture (Matches your saved 64-neuron model)
# -------------------------------------------------
class ValuationModel(nn.Module):
    def __init__(self):
        super().__init__()

        # Image branch
        self.cnn = models.resnet18(pretrained=False)
        self.cnn.fc = nn.Identity()   # 512-d output

        # Tabular branch
        self.tabular_ffn = nn.Sequential(
            nn.Linear(17, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Linear(64, 32),
            nn.ReLU()
        )

        # Fusion head
        self.fusion_head = nn.Sequential(
            nn.Linear(512 + 32, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, img, tab):
        x_img = self.cnn(img)
        x_tab = self.tabular_ffn(tab)
        x = torch.cat((x_img, x_tab), dim=1)
        return self.fusion_head(x)


# -------------------------------------------------
# 2. Load Latest Model
# -------------------------------------------------
model_files = sorted(
    [f for f in os.listdir(MODEL_FOLDER) if f.endswith(".pth")]
)

if len(model_files) == 0:
    raise FileNotFoundError("‚ùå No .pth files found in MODEL_FOLDER")

latest_model = model_files[-1]
model_path = os.path.join(MODEL_FOLDER, latest_model)

print(f"   >> Loading model: {latest_model}")

model = ValuationModel().to(DEVICE)
model.load_state_dict(torch.load(model_path, map_location=DEVICE))
model.eval()


# -------------------------------------------------
# 3. Inference Loop
# -------------------------------------------------
ai_preds = []
ground_truth = []
processed = 0
skipped = 0

img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

print("   >> Running inference on validation set...")

with torch.no_grad():
    for idx in X_val.index:
        row = df.loc[idx]

        # Use the ID directly (since we found your files don't have 'sat_img_' prefix)
        img_path = os.path.join(IMG_DIR, f"{row['id']}.jpg")

        if not os.path.exists(img_path):
            skipped += 1
            continue

        # Image
        img = Image.open(img_path).convert("RGB")
        img_tensor = img_transform(img).unsqueeze(0).to(DEVICE)

        # Tabular (Added .astype('float32') for safety against mixed types)
        tab_tensor = torch.tensor(
            row[features].values.astype('float32') 
        ).unsqueeze(0).to(DEVICE)

        # Prediction
        pred = model(img_tensor, tab_tensor).item()

        ai_preds.append(pred)
        
        # Use the target column defined above
        ground_truth.append(row[target_column])
        processed += 1

        # Prediction
        pred = model(img_tensor, tab_tensor).item()
        
        # --- DIAGNOSTIC PRINT (Add this!) ---
        if processed < 5: 
            print(f"üîç Debug - Pred: ${pred:,.0f} | Actual: ${row[target_column]:,.0f}")


# -------------------------------------------------
# 4. Metrics
# -------------------------------------------------
if processed == 0:
    print("‚ùå No images processed. Check IMG_DIR and filenames.")
else:
    # IMPORTANT: We name this 'ai_rmse' so Block 3C can read it!
    ai_rmse = np.sqrt(mean_squared_error(ground_truth, ai_preds))

    print("‚úÖ Evaluation Complete")
    print(f"   >> Processed samples : {processed}")
    print(f"   >> Skipped samples   : {skipped}")
    print(f"   >> Multimodal RMSE   : ${ai_rmse:,.2f}")

üß† Step 2: Evaluating Multimodal Network (CNN + MLP)...
   >> Loading model: valuation_model_20260107-211252.pth


  model.load_state_dict(torch.load(model_path, map_location=DEVICE))


   >> Running inference on validation set...
üîç Debug - Pred: $2,090,484,608 | Actual: $612,000
üîç Debug - Pred: $2,979,844,608 | Actual: $392,000
üîç Debug - Pred: $2,973,163,776 | Actual: $399,888
üîç Debug - Pred: $1,734,382,336 | Actual: $385,000
‚úÖ Evaluation Complete
   >> Processed samples : 3242
   >> Skipped samples   : 0
   >> Multimodal RMSE   : $2,298,678,500.50


In [29]:
# FINAL COMPARISON RESULTS

# Calculate Stats.
improvement = rf_rmse - ai_rmse
pct_improvement = (improvement / rf_rmse) * 100

# Print Professional Report
print("\n" + "="*45)
print("             EVALUATION REPORT")
print("="*45)
print(f"{'Model Type':<25} | {'RMSE Error ($)':<15}")
print("-" * 45)
print(f"{'Baseline (Tabular Only)':<25} | {rf_rmse:,.2f}")
print(f"{'Multimodal (Ours)':<25} | {ai_rmse:,.2f}")
print("-" * 45)

# Final Conclusion
if improvement > 0:
    print(f"  RESULT: The Multimodal approach reduced error")
    print(f"           by ${improvement:,.0f} ({pct_improvement:.2f}% improvement).")
else:
    print(f"  RESULT: The Multimodal approach performed similarly")
    print(f"           to the baseline (Diff: ${improvement:,.0f}).")
print("="*45)


             EVALUATION REPORT
Model Type                | RMSE Error ($) 
---------------------------------------------
Baseline (Tabular Only)   | 130,312.26
Multimodal (Ours)         | 2,298,678,500.50
---------------------------------------------
  RESULT: The Multimodal approach performed similarly
           to the baseline (Diff: $-2,298,548,188).


In [30]:
# ==========================================
# BLOCK 4A: SETUP GRAD-CAM & LOAD MODEL
# ==========================================
# Purpose: Initialize the model and attach hooks to capture visual attention.

print("üëÅÔ∏è Step 1: Configuring Explainability Hooks...")

# 1. Define Model Architecture (Must match saved model)
class ValuationModel(nn.Module):
    def __init__(self):
        super(ValuationModel, self).__init__()
        self.cnn = models.resnet18(pretrained=False)
        self.cnn.fc = nn.Identity()
        self.tab_mlp = nn.Sequential(
            nn.Linear(17, 64), nn.ReLU(),
            nn.Linear(64, 32), nn.ReLU()
        )
        self.fusion = nn.Sequential(
            nn.Linear(512 + 32, 128), nn.ReLU(),
            nn.Linear(128, 1)
        )
    def forward(self, img, tab):
        x_img = self.cnn(img)
        x_tab = self.tab_mlp(tab)
        combined = torch.cat((x_img, x_tab), dim=1)
        return self.fusion(combined)

# 2. Load Weights
model_files = [f for f in os.listdir(MODEL_FOLDER) if f.endswith('.pth')]
latest_model = sorted(model_files)[-1]
full_model_path = os.path.join(MODEL_FOLDER, latest_model)
print(f"   >> Loading Model: {latest_model}")

model = ValuationModel().to(DEVICE)
model.load_state_dict(torch.load(full_model_path, map_location=DEVICE))
model.eval()

# 3. Register Hooks (The "Wiretaps")
# We target 'layer4', which is the final convolutional block in ResNet18
target_layer = model.cnn.layer4[-1]

grads = []       # To store gradients
activations = [] # To store activation maps

def grad_hook(module, grad_in, grad_out):
    grads.append(grad_out[0])

def act_hook(module, input, output):
    activations.append(output)

# Clear any existing hooks to avoid duplication if cell is re-run
target_layer.register_backward_hook(grad_hook)
target_layer.register_forward_hook(act_hook)

print("‚úÖ Hooks registered successfully.")

üëÅÔ∏è Step 1: Configuring Explainability Hooks...
   >> Loading Model: valuation_model_20260107-211252.pth


  model.load_state_dict(torch.load(full_model_path, map_location=DEVICE))


RuntimeError: Error(s) in loading state_dict for ValuationModel:
	Missing key(s) in state_dict: "tab_mlp.0.weight", "tab_mlp.0.bias", "tab_mlp.2.weight", "tab_mlp.2.bias", "fusion.0.weight", "fusion.0.bias", "fusion.2.weight", "fusion.2.bias". 
	Unexpected key(s) in state_dict: "tabular_ffn.0.weight", "tabular_ffn.0.bias", "tabular_ffn.2.weight", "tabular_ffn.2.bias", "tabular_ffn.2.running_mean", "tabular_ffn.2.running_var", "tabular_ffn.2.num_batches_tracked", "tabular_ffn.3.weight", "tabular_ffn.3.bias", "fusion_head.0.weight", "fusion_head.0.bias", "fusion_head.3.weight", "fusion_head.3.bias", "fusion_head.5.weight", "fusion_head.5.bias". 

In [None]:
# ==========================================
# BLOCK 4B: SELECT & PREPROCESS IMAGE
# ==========================================
# Purpose: Pick a house and prepare its data for the AI.

print("üñºÔ∏è Step 2: Selecting a sample image...")

# 1. Pick a house (First one in validation set)
# You can change the index [0] to [10], [20] etc. to see different houses!
sample_row = df.iloc[X_val.index[0]]
house_id = sample_row['id']
img_name = f"sat_img_{house_id}.jpg"
img_path = os.path.join(IMG_DIR, img_name)

if not os.path.exists(img_path):
    raise FileNotFoundError(f"Image {img_name} not found! Check your folder.")

# 2. Preprocess the Image
pil_img = Image.open(img_path).convert('RGB')

# Same transforms as used in training
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create Tensors
input_img = transform(pil_img).unsqueeze(0).to(DEVICE)
input_tab = torch.tensor(sample_row[features].values.astype('float32')).unsqueeze(0).to(DEVICE)

print(f"‚úÖ Loaded House ID: {house_id}")
plt.imshow(pil_img)
plt.title("Original Satellite Image")
plt.axis('off')
plt.show()

In [None]:
# ==========================================
# BLOCK 4C: GENERATE GRAD-CAM & SAVE
# ==========================================
# Purpose: Calculate gradients and overlay the heatmap.

print("üî• Step 3: Generating Heatmap...")

# 1. Clear previous gradients
model.zero_grad()
grads = []       # Reset list
activations = [] # Reset list

# 2. Forward Pass
output = model(input_img, input_tab)

# 3. Backward Pass (This triggers the hooks!)
output.backward()

# 4. Compute Grad-CAM
# Get the gradients and activations captured by the hooks
g = grads[0].cpu().detach().numpy()[0]         # Gradients
a = activations[0].cpu().detach().numpy()[0]   # Activations

# Weight the activations by the gradients
weights = np.mean(g, axis=(1, 2))
cam = np.zeros(a.shape[1:], dtype=np.float32)

for i, w in enumerate(weights):
    cam += w * a[i, :, :]

# 5. Process Heatmap (ReLU + Normalize)
cam = np.maximum(cam, 0)
cam = cv2.resize(cam, (224, 224))
cam = cam - np.min(cam)
cam = cam / np.max(cam)

# 6. Overlay on Original Image
heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
orig = np.array(pil_img.resize((224, 224)))

# Blend: 40% Heatmap, 60% Original
result = heatmap * 0.4 + orig * 0.6

# 7. Save and Display
save_loc = "gradcam_analysis.png"
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.imshow(orig)
plt.title("Original")
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(result.astype(np.uint8))
plt.title("AI Focus (Grad-CAM)")
plt.axis('off')

plt.savefig(save_loc, bbox_inches='tight')
print(f"‚úÖ Success! Explainability image saved to: reports/{save_loc}")
plt.show()