# Model Name: Qwen/Qwen2.5-VL-7B-Instruct
# Dataset: Hipped Tabular Data

In [1]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cu121


In [2]:
import torch
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected.")

PyTorch Version: 2.5.1+cu124
CUDA Available: True
GPU Name: NVIDIA A100 80GB PCIe


In [3]:
!pip install git+https://github.com/huggingface/transformers accelerate
!pip install qwen-vl-utils[decord]==0.0.8

Defaulting to user installation because normal site-packages is not writeable
Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/gjenni-tmpdir-LXqnIL/pip-req-build-9zudtvpf
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/gjenni-tmpdir-LXqnIL/pip-req-build-9zudtvpf
  Resolved https://github.com/huggingface/transformers to commit 2932f318a20d9e54cc7aea052e040164d85de7d6
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Defaulting to user installation because normal site-packages is not writeable


### Load the model and processor

In [1]:
import pandas as pd
import os
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch

# Load the model on the GPU
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch.float16, device_map="auto"
)

# Load the processor
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")

print("Model and Processor Loaded Successfully!")

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Model and Processor Loaded Successfully!


## AND Case

### Single Inference Testing

In [5]:
import pandas as pd
import torch

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/hipped_table/attic_ground_truths_hipped_3200.csv')

# 2. Select the first row
row = df.iloc[0]

# 3. Create the prompt
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )


prompt = create_improved_prompt(row)


print("Generated Prompt:\n")
print(prompt)

# 4. Perform inference
inputs = processor(text=prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=10)



# 5. Decode and just print
prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
prediction = prediction.strip()

# Print full model output to debug
print("\nFull Model Output:\n")
print(prediction)


# After model prediction
final_text = prediction.strip().lower()

if " yes" in final_text or final_text.endswith('yes'):
    pred_value = 1
elif " no" in final_text or final_text.endswith('no'):
    pred_value = 0
else:
    pred_value = -1

print("Model final prediction:", pred_value)





Generated Prompt:

Given the following building attributes:
- Building ID: 3581635.0
- Maximum roof slope: 33.02212920656211 degrees
- Maximum roof height: 5.129999999999996 meters

If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**

Full Model Output:

Given the following building attributes:
- Building ID: 3581635.0
- Maximum roof slope: 33.02212920656211 degrees
- Maximum roof height: 5.129999999999996 meters

If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.** Yes
Model final prediction: 

### Batch Inference Testing

In [6]:
import pandas as pd
import torch
from tqdm import tqdm  # for progress bar

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/hipped_table/attic_ground_truths_hipped_3200.csv')

# 2. Create the improved prompt function
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )

# 3. Function to perform prediction for a given prompt
def predict_attic_use(prompt):
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)

    prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prediction = prediction.strip()

    final_text = prediction.strip().lower()

    if " yes" in final_text or final_text.endswith('yes'):
        return 1
    elif " no" in final_text or final_text.endswith('no'):
        return 0
    else:
        return -1  # unexpected case

# 4. Inference for all rows
predictions = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Running inference on all rows"):
    prompt = create_improved_prompt(row)
    pred = predict_attic_use(prompt)
    predictions.append(pred)

# 5. Add predictions to dataframe
df['pred_AND'] = predictions

# 6. Save the updated dataframe
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'hipped_predictions_AND.csv')
df.to_csv(output_path, index=False)

print(f"\n✅ All predictions saved successfully to: {output_path}")


Running inference on all rows: 100%|██████████| 370/370 [00:25<00:00, 14.77it/s]


✅ All predictions saved successfully to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/hipped_predictions_AND.csv





### Evaluation

In [7]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/hipped_predictions_AND.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_AND']
y_pred = df['pred_AND']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)
ax.set_title('🏠 Confusion Matrix for Attic Living Space Prediction')
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_hipped_AND.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 0.9838

🧮 Confusion Matrix (Numbers):
[[290   6]
 [  0  74]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       1.00      0.98      0.99       296
        Used       0.93      1.00      0.96        74

    accuracy                           0.98       370
   macro avg       0.96      0.99      0.98       370
weighted avg       0.98      0.98      0.98       370


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_hipped_AND.png


  plt.tight_layout()
  plt.savefig(save_path)


## OR Case

### Single Inference Testing

In [13]:
import pandas as pd
import torch

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/hipped_table/attic_ground_truths_hipped_3200.csv')

# 2. Select the first row
row = df.iloc[28]

# 3. Create the prompt
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )


prompt = create_improved_prompt(row)


print("Generated Prompt:\n")
print(prompt)

# 4. Perform inference
inputs = processor(text=prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=10)



# 5. Decode and just print
prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
prediction = prediction.strip()

# Print full model output to debug
print("\nFull Model Output:\n")
print(prediction)


# After model prediction
final_text = prediction.strip().lower()

if " yes" in final_text or final_text.endswith('yes'):
    pred_value = 1
elif " no" in final_text or final_text.endswith('no'):
    pred_value = 0
else:
    pred_value = -1

print("Model final prediction:", pred_value)





Generated Prompt:

Given the following building attributes:
- Building ID: 3585384.0
- Maximum roof slope: 21.206023565427174 degrees
- Maximum roof height: 5.239000000000004 meters

If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**

Full Model Output:

Given the following building attributes:
- Building ID: 3585384.0
- Maximum roof slope: 21.206023565427174 degrees
- Maximum roof height: 5.239000000000004 meters

If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, predict that the attic is used as a living space.

**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.** No

Based on the given attri

### Batch Inference Testing

In [11]:
import pandas as pd
import torch
from tqdm import tqdm  # for progress bar

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/hipped_table/attic_ground_truths_hipped_3200.csv')

# 2. Create the improved prompt function
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )

# 3. Function to perform prediction for a given prompt
def predict_attic_use(prompt):
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)

    prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prediction = prediction.strip()

    final_text = prediction.strip().lower()

    if " yes" in final_text or final_text.endswith('yes'):
        return 1
    elif " no" in final_text or final_text.endswith('no'):
        return 0
    else:
        return -1  # unexpected case

# 4. Inference for all rows
predictions = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Running inference on all rows"):
    prompt = create_improved_prompt(row)
    pred = predict_attic_use(prompt)
    predictions.append(pred)

# 5. Add predictions to dataframe
df['pred_OR'] = predictions

# 6. Save the updated dataframe
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'hipped_predictions_OR.csv')
df.to_csv(output_path, index=False)

print(f"\n✅ All predictions saved successfully to: {output_path}")


Running inference on all rows: 100%|██████████| 370/370 [00:41<00:00,  8.87it/s]


✅ All predictions saved successfully to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/hipped_predictions_OR.csv





### Evaluation

In [12]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/hipped_predictions_OR.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_OR']
y_pred = df['pred_OR']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)
ax.set_title('🏠 Confusion Matrix for Attic Living Space Prediction')
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_hipped_OR.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 0.9405

🧮 Confusion Matrix (Numbers):
[[227   3]
 [ 19 121]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       0.92      0.99      0.95       230
        Used       0.98      0.86      0.92       140

    accuracy                           0.94       370
   macro avg       0.95      0.93      0.94       370
weighted avg       0.94      0.94      0.94       370


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_hipped_OR.png


  plt.tight_layout()
  plt.savefig(save_path)


## AND Case - Combined Image and Table Only Based Ground Truth

### Batch Inference

In [4]:
import pandas as pd
import torch
from tqdm import tqdm  # for progress bar

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/hipped_table/attic_ground_truths_hipped_3200_GT_IMG.csv')

# 2. Create the improved prompt function
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees AND the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )

# 3. Function to perform prediction for a given prompt
def predict_attic_use(prompt):
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)

    prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prediction = prediction.strip()

    final_text = prediction.strip().lower()

    if " yes" in final_text or final_text.endswith('yes'):
        return 1
    elif " no" in final_text or final_text.endswith('no'):
        return 0
    else:
        return -1  # unexpected case

# 4. Inference for all rows
predictions = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Running inference on all rows"):
    prompt = create_improved_prompt(row)
    pred = predict_attic_use(prompt)
    predictions.append(pred)

# 5. Add predictions to dataframe
df['pred_AND'] = predictions

# 6. Save the updated dataframe
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'hipped_predictions_table_real_AND.csv')
df.to_csv(output_path, index=False)

print(f"\n✅ All predictions saved successfully to: {output_path}")


Running inference on all rows: 100%|██████████| 370/370 [00:25<00:00, 14.68it/s]


✅ All predictions saved successfully to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/hipped_predictions_table_real_AND.csv





### Evaluation

In [9]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/hipped_predictions_table_real_AND.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_IMG_AND']
y_pred = df['pred_AND']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)

ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_hipped_table_real_GT_AND.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 0.7432

🧮 Confusion Matrix (Numbers):
[[197   1]
 [ 94  78]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       0.68      0.99      0.81       198
        Used       0.99      0.45      0.62       172

    accuracy                           0.74       370
   macro avg       0.83      0.72      0.71       370
weighted avg       0.82      0.74      0.72       370


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_hipped_table_real_GT_AND.png


## OR Case - Combined Image and Table Only Based Ground Truth

### Batch inference

In [6]:
import pandas as pd
import torch
from tqdm import tqdm  # for progress bar

# 1. Load the CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/data/tabular_data/hipped_table/attic_ground_truths_hipped_3200_GT_IMG.csv')

# 2. Create the improved prompt function
def create_improved_prompt(row):
    return (
        f"Given the following building attributes:\n"
        f"- Building ID: {row['gid']}\n"
        f"- Maximum roof slope: {row['slope_max']} degrees\n"
        f"- Maximum roof height: {row['roof_height_max']} meters\n\n"
        "If the maximum roof slope is greater than or equal to 30 degrees OR the maximum roof height is greater than or equal to 4 meters, "
        "predict that the attic is used as a living space.\n\n"
        "**At the beginning of your response, output only 'Yes' or 'No' on a new line. Do not repeat the input. Do not explain.**"
    )

# 3. Function to perform prediction for a given prompt
def predict_attic_use(prompt):
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10)

    prediction = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prediction = prediction.strip()

    final_text = prediction.strip().lower()

    if " yes" in final_text or final_text.endswith('yes'):
        return 1
    elif " no" in final_text or final_text.endswith('no'):
        return 0
    else:
        return -1  # unexpected case

# 4. Inference for all rows
predictions = []

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Running inference on all rows"):
    prompt = create_improved_prompt(row)
    pred = predict_attic_use(prompt)
    predictions.append(pred)

# 5. Add predictions to dataframe
df['pred_OR'] = predictions

# 6. Save the updated dataframe
output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'hipped_predictions_table_real_OR.csv')
df.to_csv(output_path, index=False)

print(f"\n✅ All predictions saved successfully to: {output_path}")


Running inference on all rows: 100%|██████████| 370/370 [00:41<00:00,  8.95it/s]


✅ All predictions saved successfully to: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/hipped_predictions_table_real_OR.csv





### Evaluation

In [8]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the predictions CSV
df = pd.read_csv('/mnt/data/oe215/env/guna/tabular_image_inference/outputs/predictions/hipped_predictions_table_real_OR.csv')

# 2. Define ground truth and model predictions
y_true = df['GT_IMG_OR']
y_pred = df['pred_OR']

# 3. Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✅ Accuracy: {accuracy:.4f}")

# 4. Show Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("\n🧮 Confusion Matrix (Numbers):")
print(cm)

# 5. Classification Report
report = classification_report(y_true, y_pred, target_names=["Not Used", "Used"])
print("\n📄 Classification Report:")
print(report)

# 4. Create figure
fig, ax = plt.subplots(figsize=(6,5))

# 5. Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Predicted Not Used", "Predicted Used"],
            yticklabels=["Actual Not Used", "Actual Used"],
            ax=ax)

ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

# 6. Tight Layout and Save
plt.tight_layout()

output_dir = '/mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix'
os.makedirs(output_dir, exist_ok=True)

save_path = os.path.join(output_dir, 'conf_mat_hipped_table_real_GT_OR.png')
plt.savefig(save_path)

# 7. Close the figure AFTER saving
plt.close()

print(f"\n✅ Confusion matrix plot saved successfully at: {save_path}")


✅ Accuracy: 0.7973

🧮 Confusion Matrix (Numbers):
[[171   1]
 [ 74 124]]

📄 Classification Report:
              precision    recall  f1-score   support

    Not Used       0.70      0.99      0.82       172
        Used       0.99      0.63      0.77       198

    accuracy                           0.80       370
   macro avg       0.84      0.81      0.79       370
weighted avg       0.86      0.80      0.79       370


✅ Confusion matrix plot saved successfully at: /mnt/data/oe215/env/guna/tabular_image_inference/outputs/evaluations/confusion_matrix/conf_mat_hipped_table_real_GT_OR.png
