# Model Name: Qwen/Qwen2.5-VL-7B-Instruct
# Dataset: Mixed Image Tabular Data

In [7]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cu121


In [2]:
import torch
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected.")

PyTorch Version: 2.5.1+cu124
CUDA Available: True
GPU Name: NVIDIA A100 80GB PCIe


In [8]:
!pip install git+https://github.com/huggingface/transformers accelerate
!pip install qwen-vl-utils[decord]==0.0.8

Defaulting to user installation because normal site-packages is not writeable
Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/gjenni-tmpdir-MYds2U/pip-req-build-o_hwfqll
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/gjenni-tmpdir-MYds2U/pip-req-build-o_hwfqll
  Resolved https://github.com/huggingface/transformers to commit a5c6172c81d69a6fa2c3b1340d72fc669b941dcd
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Defaulting to user installation because normal site-packages is not writeable


### Load the model and processor

In [1]:
import pandas as pd
import os
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch

# Load the model on the GPU
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch.float16, device_map="auto"
)

# Load the processor
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")

print("Model and Processor Loaded Successfully!")

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Model and Processor Loaded Successfully!


## AND Case
### Single Inference Testing

In [25]:
import pandas as pd
from PIL import Image
import torch
import re


# Load tabular data and one row
csv_path = '/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/mixed_table/attic_GT_IMG_AND_and_GT_IMG_OR_Labels.csv'
df = pd.read_csv(csv_path)
row = df.iloc[375]

# Load corresponding image
image_id = str(int(row['gid'])) + ".png"
image_path = f"/mnt/data/oe215/env/guna/tabular_image_inference/data/image_data/mixed/{image_id}"
image = Image.open(image_path).convert("RGB")

# ✅ Final answer extractor using last YES/NO in response
def extract_final_yes_no(text):
    text = text.strip().lower()
    text = re.sub(r"[^\w\s]", "", text)  # remove punctuation and formatting (**)
    words = text.split()
    for word in reversed(words):
        if word == "yes":
            return "yes"
        elif word == "no":
            return "no"
    return "unknown"

# Convert answer to binary
def parse_binary_from_answer(answer):
    answer = answer.strip().lower()
    if answer == "yes":
        return 1
    elif answer == "no":
        return 0
    return -1

# Tabular prompt function
def predict_from_tabular(row, processor, model):
    prompt = (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No'. Do not repeat the input. Do not explain.**"
    )
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)
    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    print("🔍 Tabular raw output:\n", decoded)
    return extract_final_yes_no(decoded)

# Dormer detection prompt
def detect_dormer(image, processor, model):
    prompt = "Does the building roof have dormers? Answer with a yes or no."
    messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
    prompt_str = processor.apply_chat_template(messages, tokenize=False)
    inputs = processor(text=prompt_str, images=image, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)
    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    print("🔍 Dormer raw output:\n", decoded)
    return extract_final_yes_no(decoded)

# Run both inference stages
tabular_result = predict_from_tabular(row, processor, model)
dormer_result = detect_dormer(image, processor, model)

# Convert to binary
tabular_binary = parse_binary_from_answer(tabular_result)
dormer_binary = parse_binary_from_answer(dormer_result)

# Final prediction logic
if tabular_binary == 1 or dormer_binary == 1:
    final_prediction = 1
elif tabular_binary == 0 and dormer_binary == 0:
    final_prediction = 0
else:
    final_prediction = -1  # unknown

# Output final results
print(f"\n📐 Tabular condition (slope ≥ 30 AND height ≥ 4): {tabular_result} → {tabular_binary}")
print(f"🪟 Dormer detected: {dormer_result} → {dormer_binary}")
print(f"🏠 Final Attic Usability Prediction: {'Yes' if final_prediction == 1 else 'No' if final_prediction == 0 else 'Unknown'}")


🔍 Tabular raw output:
 Given the following building attributes:
- Building ID: 3581635.0
- Maximum roof slope: 33.02212920656211 degrees
- Maximum roof height: 5.129999999999996 meters

If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No'. Do not repeat the input. Do not explain.** Yes
🔍 Dormer raw output:
 system
You are a helpful assistant.
user
Does the building roof have dormers? Answer with a yes or no.
 addCriterion
No

📐 Tabular condition (slope ≥ 30 AND height ≥ 4): yes → 1
🪟 Dormer detected: no → 0
🏠 Final Attic Usability Prediction: Yes


### Image Inference Check

In [None]:
from PIL import Image
import torch
from modelscope import Qwen2_5_VLForConditionalGeneration
from transformers import AutoProcessor
import re


# Load a test image
image_path = "/mnt/data/oe215/env/guna/tabular_image_inference/data/image_data/mixed/3581560.png"
image = Image.open(image_path).convert("RGB")

# Function to extract yes/no from final words
def extract_final_yes_no(text):
    text = text.strip().lower()
    text = re.sub(r"[^\w\s]", "", text)  # remove punctuation like ** or .
    words = text.split()
    for word in reversed(words):
        if word == "yes":
            return "yes"
        elif word == "no":
            return "no"
    return "unknown"

# Function to run dormer inference on a single image
def detect_dormer_single_image(image, processor, model):
    prompt = "D?"
    messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
    prompt_str = processor.apply_chat_template(messages, tokenize=False)
    inputs = processor(text=prompt_str, images=image, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)

    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    print("🔍 Model raw output:\n", decoded)

    return extract_final_yes_no(decoded)

# Run single image inference
dormer_prediction = detect_dormer_single_image(image, processor, model)
print(f"\n🪟 Dormer Prediction: {dormer_prediction.capitalize()}")


In [29]:
from PIL import Image
import torch


# Load your test aerial image
image_path = "/mnt/data/oe215/env/guna/tabular_image_inference/data/image_data/mixed/3581585.png"
image = Image.open(image_path).convert("RGB")

# Prompt: General image description
def create_general_description_prompt():
    return (
        "Please describe the contents of this aerial image.\n"
        "Focus on the building structure, roof shape, and any notable features such as dormers, skylights, chimneys, or surrounding environment.\n"
        "Keep the response short and factual."
    )

# Function to run inference
def infer_general_description(image, processor, model):
    prompt = create_general_description_prompt()
    messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
    prompt_str = processor.apply_chat_template(messages, tokenize=False)

    inputs = processor(text=prompt_str, images=image, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=100)

    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    return decoded

# Run the test
description = infer_general_description(image, processor, model)
print("📸 General Image Description:\n")
print(description)


📸 General Image Description:

system
You are a helpful assistant.
user
Please describe the contents of this aerial image.
Focus on the building structure, roof shape, and any notable features such as dormers, skylights, chimneys, or surrounding environment.
Keep the response short and factual.
 addCriterion
The image shows an aerial view of a large, rectangular building with a flat roof. The building has several sections, possibly indicating different wings or functional areas. There are no visible dormers, skylights, or chimneys in the image. The surrounding area includes greenery, suggesting a landscaped environment, and there appears to be a parking lot adjacent to the building.


### Batch Inference Testing

In [10]:
import os
import pandas as pd
from PIL import Image
import torch
import re
from tqdm import tqdm

# Load data
csv_path = '/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/mixed_table/attic_GT_IMG_AND_and_GT_IMG_OR_Labels.csv'
df = pd.read_csv(csv_path)

# Image directory
image_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/data/image_data/mixed/'

# Prediction helpers
def extract_final_yes_no(text):
    text = text.strip().lower()
    text = re.sub(r"[^\w\s]", "", text)
    words = text.split()
    for word in reversed(words):
        if word == "yes":
            return "yes"
        elif word == "no":
            return "no"
    return "unknown"

def parse_binary_from_answer(answer):
    return {"yes": 1, "no": 0}.get(answer.strip().lower(), -1)

def predict_from_tabular(row):
    prompt = (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No'. Do not repeat the input. Do not explain.**"
    )
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)
    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    return extract_final_yes_no(decoded)

def detect_dormer(image):
    prompt = "Does the building roof have dormers? Answer with a yes or no."
    messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
    prompt_str = processor.apply_chat_template(messages, tokenize=False)
    inputs = processor(text=prompt_str, images=image, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)
    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    return extract_final_yes_no(decoded)

# Run batch
predictions = []
for _, row in tqdm(df.iterrows(), total=len(df)):
    try:
        image_id = f"{int(row['gid'])}.png"
        image_path = os.path.join(image_dir, image_id)

        if not os.path.exists(image_path):
            predictions.append(-1)
            continue

        image = Image.open(image_path).convert("RGB")
        tabular = predict_from_tabular(row)
        dormer = detect_dormer(image)

        tab = parse_binary_from_answer(tabular)
        dor = parse_binary_from_answer(dormer)

        if tab == 1 or dor == 1:
            predictions.append(1)
        elif tab == 0 and dor == 0:
            predictions.append(0)
        else:
            predictions.append(-1)

    except Exception as e:
        predictions.append(-1)

# Save predictions
df["pred_img_AND"] = predictions
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'mixed_img_predictions_AND.csv')
df.to_csv(output_path, index=False)
print("✅ Saved to:", output_path)


100%|██████████| 745/745 [01:42<00:00,  7.27it/s]

✅ Saved to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/mixed_img_predictions_AND.csv





### Evaluation

In [11]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/mixed_img_predictions_AND.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_IMG_AND']
y_pred = df['pred_img_AND']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)
ax.set_title('🏠 Confusion Matrix for Attic Living Space Prediction')
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_img_mixed_AND.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 0.6899

🧮 Confusion Matrix (Numbers):
[[271 110]
 [121 243]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       0.69      0.71      0.70       381
        Used       0.69      0.67      0.68       364

    accuracy                           0.69       745
   macro avg       0.69      0.69      0.69       745
weighted avg       0.69      0.69      0.69       745


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_img_mixed_AND.png


  plt.tight_layout()
  plt.savefig(save_path)


## OR Case
### Single Inference Testing

In [4]:
import pandas as pd
from PIL import Image
import torch
import re


# Load tabular data and one row
csv_path = '/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/mixed_table/attic_GT_IMG_AND_and_GT_IMG_OR_Labels.csv'
df = pd.read_csv(csv_path)
row = df.iloc[1]

# Load corresponding image
image_id = str(int(row['gid'])) + ".png"
image_path = f"/mnt/data/oe215/env/guna/tabular_image_inference/data/image_data/mixed/{image_id}"
image = Image.open(image_path).convert("RGB")

# ✅ Final answer extractor using last YES/NO in response
def extract_final_yes_no(text):
    text = text.strip().lower()
    text = re.sub(r"[^\w\s]", "", text)  # remove punctuation and formatting (**)
    words = text.split()
    for word in reversed(words):
        if word == "yes":
            return "yes"
        elif word == "no":
            return "no"
    return "unknown"

# Convert answer to binary
def parse_binary_from_answer(answer):
    answer = answer.strip().lower()
    if answer == "yes":
        return 1
    elif answer == "no":
        return 0
    return -1

# Tabular prompt function
def predict_from_tabular(row, processor, model):
    prompt = (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No'. Do not repeat the input. Do not explain.**"
    )
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)
    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    print("🔍 Tabular raw output:\n", decoded)
    return extract_final_yes_no(decoded)

# Dormer detection prompt
def detect_dormer(image, processor, model):
    prompt = "Does the building roof have dormers? Answer with a yes or no."
    messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
    prompt_str = processor.apply_chat_template(messages, tokenize=False)
    inputs = processor(text=prompt_str, images=image, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)
    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    print("🔍 Dormer raw output:\n", decoded)
    return extract_final_yes_no(decoded)

# Run both inference stages
tabular_result = predict_from_tabular(row, processor, model)
dormer_result = detect_dormer(image, processor, model)

# Convert to binary
tabular_binary = parse_binary_from_answer(tabular_result)
dormer_binary = parse_binary_from_answer(dormer_result)

# Final prediction logic
if tabular_binary == 1 or dormer_binary == 1:
    final_prediction = 1
elif tabular_binary == 0 and dormer_binary == 0:
    final_prediction = 0
else:
    final_prediction = -1  # unknown

# Output final results
print(f"\n📐 Tabular condition (slope ≥ 30 AND height ≥ 4): {tabular_result} → {tabular_binary}")
print(f"🪟 Dormer detected: {dormer_result} → {dormer_binary}")
print(f"🏠 Final Attic Usability Prediction: {'Yes' if final_prediction == 1 else 'No' if final_prediction == 0 else 'Unknown'}")


🔍 Tabular raw output:
 Given the following building attributes:
- Building ID: 3581585.0
- Maximum roof slope: 27.147534846875136 degrees
- Maximum roof height: 1.8739999999999952 meters

If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No'. Do not repeat the input. Do not explain.** No
🔍 Dormer raw output:
 system
You are a helpful assistant.
user
Does the building roof have dormers? Answer with a yes or no.
 addCriterion
No

📐 Tabular condition (slope ≥ 30 AND height ≥ 4): no → 0
🪟 Dormer detected: no → 0
🏠 Final Attic Usability Prediction: No


### Batch Inference Testing

In [6]:
import os
import pandas as pd
from PIL import Image
import torch
import re
from tqdm import tqdm

# Load data
csv_path = '/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/mixed_table/attic_GT_IMG_AND_and_GT_IMG_OR_Labels.csv'
df = pd.read_csv(csv_path)

# Image directory
image_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/data/image_data/mixed/'

# Prediction helpers
def extract_final_yes_no(text):
    text = text.strip().lower()
    text = re.sub(r"[^\w\s]", "", text)
    words = text.split()
    for word in reversed(words):
        if word == "yes":
            return "yes"
        elif word == "no":
            return "no"
    return "unknown"

def parse_binary_from_answer(answer):
    return {"yes": 1, "no": 0}.get(answer.strip().lower(), -1)

def predict_from_tabular(row):
    prompt = (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No'. Do not repeat the input. Do not explain.**"
    )
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)
    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    return extract_final_yes_no(decoded)

def detect_dormer(image):
    prompt = "Does the building roof have dormers? Answer with a yes or no."
    messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
    prompt_str = processor.apply_chat_template(messages, tokenize=False)
    inputs = processor(text=prompt_str, images=image, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)
    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
    return extract_final_yes_no(decoded)

# Run batch
predictions = []
for _, row in tqdm(df.iterrows(), total=len(df)):
    try:
        image_id = f"{int(row['gid'])}.png"
        image_path = os.path.join(image_dir, image_id)

        if not os.path.exists(image_path):
            predictions.append(-1)
            continue

        image = Image.open(image_path).convert("RGB")
        tabular = predict_from_tabular(row)
        dormer = detect_dormer(image)

        tab = parse_binary_from_answer(tabular)
        dor = parse_binary_from_answer(dormer)

        if tab == 1 or dor == 1:
            predictions.append(1)
        elif tab == 0 and dor == 0:
            predictions.append(0)
        else:
            predictions.append(-1)

    except Exception as e:
        predictions.append(-1)

# Save predictions
df["pred_img_OR"] = predictions
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'mixed_img_predictions_OR.csv')
df.to_csv(output_path, index=False)
print("✅ Saved to:", output_path)


100%|██████████| 745/745 [01:42<00:00,  7.24it/s]

✅ Saved to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/mixed_img_predictions_OR.csv





### Evaluation

In [8]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/mixed_img_predictions_OR.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_IMG_OR']
y_pred = df['pred_img_OR']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)
ax.set_title('🏠 Confusion Matrix for Attic Living Space Prediction')
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_img_mixed_OR.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 0.6711

🧮 Confusion Matrix (Numbers):
[[194  83]
 [162 306]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       0.54      0.70      0.61       277
        Used       0.79      0.65      0.71       468

    accuracy                           0.67       745
   macro avg       0.67      0.68      0.66       745
weighted avg       0.70      0.67      0.68       745


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_img_mixed_OR.png


  plt.tight_layout()
  plt.savefig(save_path)
