# Model Name: Qwen/Qwen2.5-VL-7B-Instruct
# Dataset: Gable Tabular Data

In [1]:
!nvidia-smi

Sun May  4 14:37:29 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.14              Driver Version: 550.54.14      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          On  |   00000000:3D:00.0 Off |                    0 |
| N/A   43C    P0             47W /  300W |       0MiB /  81920MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cu121


In [4]:
import torch
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected.")

PyTorch Version: 2.5.1+cu124
CUDA Available: True
GPU Name: NVIDIA A100 80GB PCIe


In [5]:
!pip install git+https://github.com/huggingface/transformers accelerate
!pip install qwen-vl-utils[decord]==0.0.8

Defaulting to user installation because normal site-packages is not writeable
Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/gjenni-tmpdir-u7tWn9/pip-req-build-qf3bm4qq
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/gjenni-tmpdir-u7tWn9/pip-req-build-qf3bm4qq
  Resolved https://github.com/huggingface/transformers to commit 397a5ede33863d6f7137c771a68d40036cac0396
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers==4.52.0.dev0)
  Downloading huggingface_hub-0.30.2-py3-none-any.whl.metadata (13 kB)
Downloading huggingface_hub-0.30.2-py3-none-any.whl (481 kB)
Building wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... [?25ldone
[?25h  Create

### Load the model and processor

In [1]:
import pandas as pd
import os
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch

# Load the model on the GPU
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch.float16, device_map="auto"
)

# Load the processor
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")

print("Model and Processor Loaded Successfully!")

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Model and Processor Loaded Successfully!


## AND Case

### Single Inference Testing

In [28]:
import pandas as pd
import torch

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/gable_table/attic_ground_truths_gable_3100.csv')

# 2. Select the first row
row = df.iloc[0]

# 3. Create the prompt
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )


prompt = create_improved_prompt(row)


print("Generated Prompt:\n")
print(prompt)

# 4. Perform inference
inputs = processor(text=prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=10)



# 5. Decode and just print
prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
prediction = prediction.strip()

# Print full model output to debug
print("\nFull Model Output:\n")
print(prediction)


# After model prediction
final_text = prediction.strip().lower()

if " yes" in final_text or final_text.endswith('yes'):
    pred_value = 1
elif " no" in final_text or final_text.endswith('no'):
    pred_value = 0
else:
    pred_value = -1

print("Model final prediction:", pred_value)





Generated Prompt:

Given the following building attributes:
- Building ID: 3581560.0
- Maximum roof slope: 51.01464222107296 degrees
- Maximum roof height: 3.984000000000009 meters

If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**

Full Model Output:

Given the following building attributes:
- Building ID: 3581560.0
- Maximum roof slope: 51.01464222107296 degrees
- Maximum roof height: 3.984000000000009 meters

If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.** No
Model final prediction: 0

### Batch Inference Testing

In [29]:
import pandas as pd
import torch
from tqdm import tqdm  # for progress bar

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/gable_table/attic_ground_truths_gable_3100.csv')

# 2. Create the improved prompt function
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )

# 3. Function to perform prediction for a given prompt
def predict_attic_use(prompt):
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)

    prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prediction = prediction.strip()

    final_text = prediction.strip().lower()

    if " yes" in final_text or final_text.endswith('yes'):
        return 1
    elif " no" in final_text or final_text.endswith('no'):
        return 0
    else:
        return -1  # unexpected case

# 4. Inference for all rows
predictions = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Running inference on all rows"):
    prompt = create_improved_prompt(row)
    pred = predict_attic_use(prompt)
    predictions.append(pred)

# 5. Add predictions to dataframe
df['pred_AND'] = predictions

# 6. Save the updated dataframe
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'gable_predictions_AND.csv')
df.to_csv(output_path, index=False)

print(f"\n✅ All predictions saved successfully to: {output_path}")


Running inference on all rows: 100%|██████████| 375/375 [01:04<00:00,  5.80it/s]


✅ All predictions saved successfully to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/gable_predictions_AND.csv





### Evaluation

In [34]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/gable_predictions_AND.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_AND']
y_pred = df['pred_AND']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)
ax.set_title('🏠 Confusion Matrix for Attic Living Space Prediction')
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_gable_AND.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 1.0000

🧮 Confusion Matrix (Numbers):
[[282   0]
 [  0  93]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       1.00      1.00      1.00       282
        Used       1.00      1.00      1.00        93

    accuracy                           1.00       375
   macro avg       1.00      1.00      1.00       375
weighted avg       1.00      1.00      1.00       375


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_gable_AND.png


  plt.tight_layout()
  plt.savefig(save_path)


## OR case

### Single Inference Testing

In [39]:
import pandas as pd
import torch

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/gable_table/attic_ground_truths_gable_3100.csv')

# 2. Select the first row
row = df.iloc[2]

# 3. Create the prompt
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )


prompt = create_improved_prompt(row)


print("Generated Prompt:\n")
print(prompt)

# 4. Perform inference
inputs = processor(text=prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=10)



# 5. Decode and just print
prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
prediction = prediction.strip()

# Print full model output to debug
print("\nFull Model Output:\n")
print(prediction)


# After model prediction
final_text = prediction.strip().lower()

if " yes" in final_text or final_text.endswith('yes'):
    pred_value = 1
elif " no" in final_text or final_text.endswith('no'):
    pred_value = 0
else:
    pred_value = -1

print("Model final prediction:", pred_value)





Generated Prompt:

Given the following building attributes:
- Building ID: 3581597.0
- Maximum roof slope: 26.21714480174904 degrees
- Maximum roof height: 2.562000000000012 meters

If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**

Full Model Output:

Given the following building attributes:
- Building ID: 3581597.0
- Maximum roof slope: 26.21714480174904 degrees
- Maximum roof height: 2.562000000000012 meters

If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.** No
Model final prediction: 0


### Batch Inference Testing 

In [40]:
import pandas as pd
import torch
from tqdm import tqdm  # for progress bar

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/gable_table/attic_ground_truths_gable_3100.csv')

# 2. Create the improved prompt function
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )

# 3. Function to perform prediction for a given prompt
def predict_attic_use(prompt):
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)

    prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prediction = prediction.strip()

    final_text = prediction.strip().lower()

    if " yes" in final_text or final_text.endswith('yes'):
        return 1
    elif " no" in final_text or final_text.endswith('no'):
        return 0
    else:
        return -1  # unexpected case

# 4. Inference for all rows
predictions = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Running inference on all rows"):
    prompt = create_improved_prompt(row)
    pred = predict_attic_use(prompt)
    predictions.append(pred)

# 5. Add predictions to dataframe
df['pred_OR'] = predictions

# 6. Save the updated dataframe
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'gable_predictions_OR.csv')
df.to_csv(output_path, index=False)

print(f"\n✅ All predictions saved successfully to: {output_path}")


Running inference on all rows: 100%|██████████| 375/375 [01:25<00:00,  4.40it/s]


✅ All predictions saved successfully to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/gable_predictions_OR.csv





### Evaluation

In [42]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/gable_predictions_OR.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_OR']
y_pred = df['pred_OR']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)
ax.set_title('🏠 Confusion Matrix for Attic Living Space Prediction')
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_gable_OR.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 0.9040

🧮 Confusion Matrix (Numbers):
[[130   0]
 [ 36 209]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       0.78      1.00      0.88       130
        Used       1.00      0.85      0.92       245

    accuracy                           0.90       375
   macro avg       0.89      0.93      0.90       375
weighted avg       0.92      0.90      0.91       375


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_gable_OR.png


  plt.tight_layout()
  plt.savefig(save_path)


## AND Case - Combined Image and Table Only Based Ground Truth

### Batch Inference

In [2]:
import pandas as pd
import torch
from tqdm import tqdm  # for progress bar

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/gable_table/attic_ground_truths_gable_3100_GT_IMG.csv')

# 2. Create the improved prompt function
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )

# 3. Function to perform prediction for a given prompt
def predict_attic_use(prompt):
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)

    prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prediction = prediction.strip()

    final_text = prediction.strip().lower()

    if " yes" in final_text or final_text.endswith('yes'):
        return 1
    elif " no" in final_text or final_text.endswith('no'):
        return 0
    else:
        return -1  # unexpected case

# 4. Inference for all rows
predictions = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Running inference on all rows"):
    prompt = create_improved_prompt(row)
    pred = predict_attic_use(prompt)
    predictions.append(pred)

# 5. Add predictions to dataframe
df['pred_AND'] = predictions

# 6. Save the updated dataframe
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'gable_predictions_table_real_AND.csv')
df.to_csv(output_path, index=False)

print(f"\n✅ All predictions saved successfully to: {output_path}")


Running inference on all rows: 100%|██████████| 375/375 [00:29<00:00, 12.86it/s]


✅ All predictions saved successfully to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/gable_predictions_table_real_AND.csv





### Evaluation

In [6]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/gable_predictions_table_real_AND.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_IMG_AND']
y_pred = df['pred_AND']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)

ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_gable_table_real_GT_AND.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 0.7360

🧮 Confusion Matrix (Numbers):
[[183   0]
 [ 99  93]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       0.65      1.00      0.79       183
        Used       1.00      0.48      0.65       192

    accuracy                           0.74       375
   macro avg       0.82      0.74      0.72       375
weighted avg       0.83      0.74      0.72       375


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_gable_table_real_GT_AND.png


## OR Case - Combined Image and Table Only Based Ground Truth

### Batch inference

In [4]:
import pandas as pd
import torch
from tqdm import tqdm  # for progress bar

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/gable_table/attic_ground_truths_gable_3100_GT_IMG.csv')

# 2. Create the improved prompt function
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )

# 3. Function to perform prediction for a given prompt
def predict_attic_use(prompt):
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)

    prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prediction = prediction.strip()

    final_text = prediction.strip().lower()

    if " yes" in final_text or final_text.endswith('yes'):
        return 1
    elif " no" in final_text or final_text.endswith('no'):
        return 0
    else:
        return -1  # unexpected case

# 4. Inference for all rows
predictions = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Running inference on all rows"):
    prompt = create_improved_prompt(row)
    pred = predict_attic_use(prompt)
    predictions.append(pred)

# 5. Add predictions to dataframe
df['pred_OR'] = predictions

# 6. Save the updated dataframe
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'gable_predictions_table_real_OR.csv')
df.to_csv(output_path, index=False)

print(f"\n✅ All predictions saved successfully to: {output_path}")


Running inference on all rows: 100%|██████████| 375/375 [00:36<00:00, 10.17it/s]


✅ All predictions saved successfully to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/gable_predictions_table_real_OR.csv





### Evaluation

In [7]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/gable_predictions_table_real_OR.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_IMG_OR']
y_pred = df['pred_OR']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)

ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_gable_table_real_GT_OR.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 0.8293

🧮 Confusion Matrix (Numbers):
[[104   1]
 [ 63 207]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       0.62      0.99      0.76       105
        Used       1.00      0.77      0.87       270

    accuracy                           0.83       375
   macro avg       0.81      0.88      0.82       375
weighted avg       0.89      0.83      0.84       375


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_gable_table_real_GT_OR.png
