# Roman Numeral Recognition - Complete Solution
This notebook performs data cleaning, augmentation, and training in one go.

## Instructions:
1. Upload this notebook to Google Colab
2. Runtime â†’ Change runtime type â†’ Select GPU
3. Upload your dataset.zip file when prompted
4. Run all cells
5. Download the trained model weights

In [None]:
# Upload and extract dataset
from google.colab import files
import zipfile
import os

print("Please upload your dataset folder as a zip file (containing train/ and val/ folders)")
print("If you don't have it zipped, you can zip the dataset folder first.\n")

uploaded = files.upload()

# Extract the uploaded zip
for filename in uploaded.keys():
    print(f"Extracting {filename}...")
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall('.')

# Check if dataset directory exists
if os.path.exists('dataset'):
    print("âœ“ Dataset extracted successfully!")
else:
    print("Creating dataset directory structure...")
    # Handle different zip structures
    !mkdir -p dataset

In [None]:
# Import required libraries
import shutil
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance, ImageFilter
import random
import tensorflow as tf
from tensorflow import keras

# Set random seeds
random.seed(42)
np.random.seed(42)
tf.random.set_seed(123)

print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))

## Step 1: Analyze Original Dataset

In [None]:
def count_images(base_path):
    """Count images in each class"""
    counts = {}
    for class_name in sorted(os.listdir(base_path)):
        class_path = os.path.join(base_path, class_name)
        if os.path.isdir(class_path) and not class_name.startswith('.'):
            images = [f for f in os.listdir(class_path) if not f.startswith('.')]
            counts[class_name] = len(images)
    return counts

train_counts = count_images('dataset/train')
val_counts = count_images('dataset/val')

print("Original Training set:")
for class_name, count in train_counts.items():
    print(f"  {class_name}: {count}")
print(f"  Total: {sum(train_counts.values())}\n")

print("Original Validation set:")
for class_name, count in val_counts.items():
    print(f"  {class_name}: {count}")
print(f"  Total: {sum(val_counts.values())}")

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
axes[0].bar(train_counts.keys(), train_counts.values(), color='steelblue')
axes[0].set_title('Original Training Set Distribution', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Class')
axes[0].set_ylabel('Number of Images')
axes[0].tick_params(axis='x', rotation=45)

axes[1].bar(val_counts.keys(), val_counts.values(), color='coral')
axes[1].set_title('Validation Set Distribution', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Class')
axes[1].set_ylabel('Number of Images')
axes[1].tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.show()

## Step 2: Detect and Remove Outliers

In [None]:
def get_image_features(img_path):
    """Extract features from an image"""
    img = Image.open(img_path).convert('L')
    img_array = np.array(img)
    return [
        np.mean(img_array),
        np.std(img_array),
        np.min(img_array),
        np.max(img_array),
    ]

def find_outliers(base_path):
    """Find potential outliers using IQR method"""
    outliers_info = {}
    
    for class_name in sorted(os.listdir(base_path)):
        class_path = os.path.join(base_path, class_name)
        if not os.path.isdir(class_path) or class_name.startswith('.'):
            continue
        
        images = [f for f in os.listdir(class_path) if not f.startswith('.')]
        features = []
        image_paths = []
        
        for img_name in images:
            img_path = os.path.join(class_path, img_name)
            try:
                feat = get_image_features(img_path)
                features.append(feat)
                image_paths.append(img_path)
            except:
                pass
        
        features = np.array(features)
        Q1 = np.percentile(features, 25, axis=0)
        Q3 = np.percentile(features, 75, axis=0)
        IQR = Q3 - Q1
        
        outlier_mask = np.any((features < (Q1 - 1.5 * IQR)) | (features > (Q3 + 1.5 * IQR)), axis=1)
        outlier_indices = np.where(outlier_mask)[0]
        
        outliers_info[class_name] = [image_paths[i] for i in outlier_indices]
        print(f"Class {class_name}: Found {len(outlier_indices)} outliers out of {len(images)} images")
    
    return outliers_info

print("Detecting outliers...")
outliers = find_outliers('dataset/train')
total_outliers = sum(len(v) for v in outliers.values())
print(f"\nTotal outliers detected: {total_outliers}")

In [None]:
def create_cleaned_dataset(base_path, outliers_dict, output_path, removal_rate=0.3):
    """Remove outliers to create cleaned dataset"""
    os.makedirs(output_path, exist_ok=True)
    removed_count = 0
    kept_count = 0
    
    for class_name in sorted(os.listdir(base_path)):
        class_path = os.path.join(base_path, class_name)
        if not os.path.isdir(class_path) or class_name.startswith('.'):
            continue
        
        output_class_path = os.path.join(output_path, class_name)
        os.makedirs(output_class_path, exist_ok=True)
        
        outlier_set = set(outliers_dict.get(class_name, []))
        
        for img_name in os.listdir(class_path):
            if img_name.startswith('.'):
                continue
            img_path = os.path.join(class_path, img_name)
            
            if img_path in outlier_set and random.random() < removal_rate:
                removed_count += 1
                continue
            
            shutil.copy(img_path, output_class_path)
            kept_count += 1
    
    print(f"Cleaned dataset: Kept {kept_count}, Removed {removed_count}")
    return output_path

print("Creating cleaned dataset...")
cleaned_train = create_cleaned_dataset('dataset/train', outliers, 'dataset_cleaned/train', removal_rate=0.3)

## Step 3: Balance Classes with Data Augmentation

In [None]:
def augment_image(img, aug_type):
    """Apply augmentation"""
    if aug_type == 'rotate':
        return img.rotate(random.randint(-15, 15), fillcolor=255)
    elif aug_type == 'brightness':
        return ImageEnhance.Brightness(img).enhance(random.uniform(0.7, 1.3))
    elif aug_type == 'contrast':
        return ImageEnhance.Contrast(img).enhance(random.uniform(0.8, 1.2))
    elif aug_type == 'blur':
        return img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.5)))
    elif aug_type == 'shift':
        shift_x = random.randint(-3, 3)
        shift_y = random.randint(-3, 3)
        return img.transform(img.size, Image.AFFINE, (1, 0, shift_x, 0, 1, shift_y), fillcolor=255)
    return img

def balance_and_augment(cleaned_path, output_path, target_per_class=280):
    """Balance all classes to target size"""
    os.makedirs(output_path, exist_ok=True)
    augmentation_types = ['rotate', 'brightness', 'contrast', 'blur', 'shift']
    
    for class_name in sorted(os.listdir(cleaned_path)):
        class_path = os.path.join(cleaned_path, class_name)
        if not os.path.isdir(class_path) or class_name.startswith('.'):
            continue
        
        output_class_path = os.path.join(output_path, class_name)
        os.makedirs(output_class_path, exist_ok=True)
        
        images = [f for f in os.listdir(class_path) if not f.startswith('.')]
        current_count = len(images)
        
        # Copy originals
        for img_name in images:
            shutil.copy(os.path.join(class_path, img_name), output_class_path)
        
        # Augment to reach target
        needed = target_per_class - current_count
        if needed > 0:
            for i in range(needed):
                img_name = random.choice(images)
                img = Image.open(os.path.join(class_path, img_name))
                aug_img = augment_image(img, random.choice(augmentation_types))
                aug_img.save(os.path.join(output_class_path, f"aug_{i}_{img_name}"))
        
        print(f"Class {class_name}: {current_count} â†’ {target_per_class} (+{needed} augmented)")

print("Balancing and augmenting...")
balance_and_augment(cleaned_train, 'dataset_augmented/train', target_per_class=280)

In [None]:
# Copy validation set
shutil.copytree('dataset/val', 'dataset_augmented/val', dirs_exist_ok=True)

# Create data_original for training
if os.path.exists('data_original'):
    shutil.rmtree('data_original')
shutil.copytree('dataset_augmented', 'data_original')

print("\nâœ“ Data preparation complete!")

# Final stats
final_train = count_images('data_original/train')
final_val = count_images('data_original/val')
print(f"\nFinal Training: {sum(final_train.values())} images")
print(f"Final Validation: {sum(final_val.values())} images")
print(f"Total: {sum(final_train.values()) + sum(final_val.values())} images")

## Step 4: Train the Model

In [None]:
# Training setup (from train.py)
batch_size = 8
directory = "./data_original"

train = tf.keras.preprocessing.image_dataset_from_directory(
    directory + "/train",
    labels="inferred",
    label_mode="categorical",
    class_names=["i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x"],
    shuffle=True,
    seed=123,
    batch_size=batch_size,
    image_size=(32, 32),
)

valid = tf.keras.preprocessing.image_dataset_from_directory(
    directory + "/val",
    labels="inferred",
    label_mode="categorical",
    class_names=["i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x"],
    shuffle=True,
    seed=123,
    batch_size=batch_size,
    image_size=(32, 32),
)

print(f"Training batches: {train.cardinality().numpy()}")
print(f"Validation batches: {valid.cardinality().numpy()}")

In [None]:
# Build model (from train.py)
base_model = tf.keras.applications.ResNet50(
    input_shape=(32, 32, 3),
    include_top=False,
    weights=None,
)
base_model = tf.keras.Model(
    base_model.inputs, outputs=[base_model.get_layer("conv2_block3_out").output]
)

inputs = tf.keras.Input(shape=(32, 32, 3))
x = tf.keras.applications.resnet.preprocess_input(inputs)
x = base_model(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x[0])
x = tf.keras.layers.Dense(10)(x)
model = tf.keras.Model(inputs, x)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

print("\nModel architecture:")
model.summary()

In [None]:
# Train
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "best_model.weights.h5",
    monitor="val_accuracy",
    mode="max",
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

print("\nStarting training...\n")
history = model.fit(
    train,
    validation_data=valid,
    epochs=5,
    callbacks=[checkpoint],
)

# Load best weights
model.load_weights("best_model.weights.h5")

# Final evaluation
loss, acc = model.evaluate(valid)
print(f"\n{'='*60}")
print(f"FINAL VALIDATION ACCURACY: {acc*100:.2f}%")
print(f"{'='*60}")

if acc >= 0.93:
    print("\nðŸŽ‰ EXCELLENT! Achieved >93% - Bonus points!")
elif acc >= 0.90:
    print("\nâœ“ SUCCESS! Achieved >90% accuracy!")
else:
    print(f"\nâš  Close! Need {(0.90-acc)*100:.2f}% more to reach 90%")

# Save for submission
model.save_weights("task_61.h5")
print("\nWeights saved to: task_61.h5 and best_model.weights.h5")

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.axhline(y=0.90, color='r', linestyle='--', label='90% Target')
plt.axhline(y=0.93, color='g', linestyle='--', label='93% Bonus')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Step 5: Download Model Weights

In [None]:
# Download the trained weights
from google.colab import files

print("Downloading model weights...")
files.download('best_model.weights.h5')
files.download('task_61.h5')
print("\nâœ“ Download complete! Submit these weights for evaluation.")

## Next Steps:

1. If accuracy is **>90%**: Submit `best_model.weights.h5` or `task_61.h5`
2. If accuracy is **<90%**: Try adjusting:
   - Increase `removal_rate` in outlier removal (try 0.4-0.5)
   - Increase `target_per_class` for more augmentation (try 300-350)
   - Increase training epochs (try 8-10)
   - Manually review outliers and remove obvious mislabeled images

3. Re-run the relevant cells and retrain!