In [None]:
%cd /content/ai-competition-baselines
!python -m src.stage1_binary.train \
  --config configs/stage1_binary.yaml \
  --data_root "{DATA_ROOT}" \
  --out_dir runs/stage1_binary

In [None]:
#CELL 4: Roboflow format to CSV format
import os
import shutil
import pandas as pd

roboflow_root = dataset.location
output_root = "/content/drive/MyDrive/MADE_Unmade/data_stage1"

for split in ["train", "val", "test"]:
    os.makedirs(os.path.join(output_root, "images", split), exist_ok=True)

def convert_split(split_name, label_map={"Made": 0, "Unmade": 1}):
    rows = []
    target_split = "val" if split_name == "valid" else split_name
    dst_dir = os.path.join(output_root, "images", target_split)

    for class_name, label in label_map.items():
        src_dir = os.path.join(roboflow_root, split_name, class_name)
        if not os.path.exists(src_dir):
            print(f"Warning: {src_dir} not found, skipping...")
            continue
        img_files = [f for f in os.listdir(src_dir) if f.lower().endswith((".jpg", ".jpeg", ".png"))]
        for img_file in img_files:
            shutil.copy2(os.path.join(src_dir, img_file), os.path.join(dst_dir, img_file))
            rows.append({"image_id": img_file, "label": label})
        print(f"  {class_name}: {len(img_files)} images")
    df = pd.DataFrame(rows)
    csv_name = f"{target_split}.csv"
    df.to_csv(os.path.join(output_root, csv_name), index=False)
    print(f"Created {csv_name} with {len(df)} images\n")
    return df

print("Converting train split...")
train_df = convert_split("train")
print("Converting valid split...")
val_df = convert_split("valid")
print("Converting test split...")
test_df = convert_split("test")
print("\nDataset Summary:")
print(f"  Train: {len(train_df)} images")
print(f"  Val:   {len(val_df)} images")
print(f"  Test:  {len(test_df)} images")

In [None]:
# CELL 19: Run Inference on Test Set
DATA_ROOT = "/content/drive/MyDrive/MADE_Unmade/data_stage1"
TEST_DIR = f"{DATA_ROOT}/images/test"
CKPT_PATH = "/content/drive/MyDrive/project/runs/stage1_binary/best_stage1.pt"
OUT_PATH = "/content/drive/MyDrive/project/submissions/stage1_test.csv"
CONFIG_PATH = "/content/drive/MyDrive/project/configs/stage1_binary.yaml"

!python -m src.stage1_binary.infer \
    --config "{CONFIG_PATH}" \
    --data_root "{DATA_ROOT}" \
    --ckpt "{CKPT_PATH}" \
    --test_dir "{TEST_DIR}" \
    --out "{OUT_PATH}"

print("\nTest predictions (first 10 rows):")
!head -n 10 "{OUT_PATH}"

In [None]:
# CELL 20: Run Inference on Validation Set
VAL_DIR = f"{DATA_ROOT}/images/val"
CKPT_PATH = "/content/drive/MyDrive/project/runs/stage1_binary/best_stage1.pt"
OUT_PATH = "/content/drive/MyDrive/project/submissions/stage1_val.csv"
CONFIG_PATH = "/content/drive/MyDrive/project/configs/stage1_binary.yaml"

!python -m src.stage1_binary.infer \
    --config "{CONFIG_PATH}" \
    --data_root "{DATA_ROOT}" \
    --ckpt "{CKPT_PATH}" \
    --test_dir "{VAL_DIR}" \
    --out "{OUT_PATH}"

print("\nValidation predictions (first 10 rows):")
!head -n 10 "{OUT_PATH}"

In [None]:
# CELL 21: Evaluate on Validation Set
GT_PATH = "/content/drive/MyDrive/MADE_Unmade/data_stage1/val.csv"
PRED_PATH = "/content/drive/MyDrive/project/submissions/stage1_val.csv"

!python -m src.stage1_binary.eval \
    --gt "{GT_PATH}" \
    --pred "{PRED_PATH}"

In [None]:
# CELL 22: View Training Summary
import json

TRAIN_SUMMARY_PATH = "/content/drive/MyDrive/project/runs/stage1_binary/train_summary.json"

with open(TRAIN_SUMMARY_PATH, 'r') as f:
    summary = json.load(f)

print("Training History:")
print("-" * 80)
for epoch_data in summary['history']:
    print(f"Epoch {epoch_data['epoch']}: "
          f"train_loss={epoch_data['train_loss']:.4f}, "
          f"val_loss={epoch_data['val_loss']:.4f}, "
          f"macro_f1={epoch_data['macro_f1']:.4f}, "
          f"accuracy={epoch_data['accuracy']:.4f}")
print("-" * 80)
print(f"Best macro_f1: {summary['best_primary']:.4f}")

In [None]:
# CELL 23: Visualize Sample Predictions
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import random

VAL_CSV = f"{DATA_ROOT}/val.csv"
PRED_CSV = "/content/drive/MyDrive/project/submissions/stage1_val.csv"
SAVE_PATH = "/content/drive/MyDrive/project/sample_predictions.png"

pred_df = pd.read_csv(PRED_CSV)
val_df = pd.read_csv(VAL_CSV)

sample_preds = pred_df.sample(n=min(6, len(pred_df)), random_state=42)

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.ravel()

label_names = {0: 'Made', 1: 'Unmade'}

for idx, (_, row) in enumerate(sample_preds.iterrows()):
    img_path = f"{DATA_ROOT}/images/val/{row['image_id']}"
    img = Image.open(img_path)

    gt_label = val_df[val_df['image_id'] == row['image_id']]['label'].values[0]
    pred_label = row['pred_label']
    confidence = row.get('confidence', 1.0)

    axes[idx].imshow(img)
    color = 'green' if gt_label == pred_label else 'red'
    title = f"GT: {label_names[gt_label]} | Pred: {label_names[pred_label]}\nConf: {confidence:.2f}"
    axes[idx].set_title(title, color=color, fontweight='bold')
    axes[idx].axis('off')

plt.tight_layout()
plt.savefig(SAVE_PATH, dpi=150, bbox_inches='tight')
plt.show()