In [None]:
import os
import pandas as pd
import copy
from pdf2image import convert_from_path
from PIL import Image

import matplotlib.pyplot as plt
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as T

In [None]:
CSV_FILE       = "/kaggle/input/musicsheets/preprocessed_images/dataset.csv"        # CSV with 'primary_genre' and 'pdf' columns
PDF_FOLDER     = "/kaggle/input/musicsheets/preprocessed_images/preprocessed_images/"       # folder containing PDF files
IMG_SIZE       = (512, 512)
BATCH_SIZE     = 12
VAL_SPLIT      = 0.2
NUM_EPOCHS     = 10
LEARNING_RATE  = 1e-3
RANDOM_SEED    = 42
torch.manual_seed(RANDOM_SEED)
# Create a generator for DataLoader shuffling
g = torch.Generator()
g.manual_seed(42)  # Ensures same shuffle order across runs

EARLY_STOP_PATIENCE = 3  # number of epochs with no improvement to stop

## Data loading pipeline

In [None]:
df = pd.read_csv(CSV_FILE)

In [None]:
# Get unique genres
unique_genres = df["primary_genre"].unique()
# We create mapping for string labels to ints
genre_to_idx  = {genre: idx for idx, genre in enumerate(unique_genres)}
idx_to_genre  = {idx: genre for genre, idx in genre_to_idx.items()}

num_classes = len(genre_to_idx)
print("Discovered classes:", genre_to_idx)

### Dataset class creation

In [None]:
class SheetMusicImageDataset(Dataset):
    """
    Reads a CSV with columns: 'primary_genre', 'pdf' (originally).
    Now, it loads the corresponding PNG directly from a folder.
    """
    def __init__(self, csv_path, png_folder, transform=None, label_map=None):
        self.df = pd.read_csv(csv_path)
        self.png_folder = png_folder
        self.transform = transform
        self.label_map = label_map

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # Extract label
        genre_str  = row["primary_genre"]
        label      = self.label_map[genre_str]  # integer label

        # Extract the PNG filename (previously PDF)
        pdf_filename = row["pdf"]
        png_filename = os.path.basename(pdf_filename).replace(".pdf", ".png") # Convert PDF name to PNG

        png_path = os.path.join(self.png_folder, png_filename)

        # ✅ Load PNG Image Directly (No More PDF Conversion)
        try:
            image = Image.open(png_path).convert("RGB")  # Open PNG instead of converting PDF
        except Exception as e:
            raise RuntimeError(f"Error reading PNG {png_path}: {e}")

        # ✅ Apply transformations (resize, tensor conversion, etc.)
        if self.transform:
            image = self.transform(image)

        return image, label


### Transformations for images

In [None]:
# We'll define separate transforms for train vs val if we want data augmentation in training.
import torchvision.transforms as T

train_transform = T.Compose([
    T.Lambda(lambda img: img.convert("RGB")),  # ✅ Ensure images are RGB (3 channels)
    T.Resize(IMG_SIZE),
    T.RandomRotation(degrees=5),  # Small rotation only
    T.ToTensor(),
])

val_transform = T.Compose([
    T.Lambda(lambda img: img.convert("RGB")),  # ✅ Ensure images are RGB
    T.Resize(IMG_SIZE),
    T.ToTensor(),
])

test_transform = T.Compose([
    T.Lambda(lambda img: img.convert("RGB")),  # ✅ Ensure images are RGB
    T.Resize(IMG_SIZE),
    T.ToTensor(),
])


### Dataset creation

In [None]:
full_dataset = SheetMusicImageDataset(
    csv_path=CSV_FILE,
    png_folder=PDF_FOLDER,
    transform=None,  # We'll set transforms after splitting
    label_map=genre_to_idx
)

### Split and loading

In [None]:
from torch.utils.data import random_split, DataLoader

# Define split sizes
dataset_length = len(full_dataset)  # 33,000
train_size = int(0.8 * dataset_length)  # 80% Train
val_size = int(0.1 * dataset_length)  # 10% Validation
test_size = dataset_length - train_size - val_size  # Remaining for Test

# Split dataset
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size], generator = g)

# Assign transforms to each subset
train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_transform
test_dataset.dataset.transform = test_transform  # Define this if needed

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, generator=g)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Print stats
print(f"Total samples: {dataset_length}, Train: {train_size}, Val: {val_size}, Test: {test_size}")
# STRATIFIED SPLIT TODO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


### Visualization to check loading (sanity check)

In [None]:
# # Pick a random index for a random sample
# random_idx = random.randint(0, len(full_dataset) - 1)

# # Retrieve the sample
# sample_img, sample_label = full_dataset[random_idx]

# # We apply val_transform for display if not applied, just to ensure consistent sizing
# if isinstance(sample_img, Image.Image):
#     sample_img = val_transform(sample_img)

# # Map label back to string
# genre_name = idx_to_genre[sample_label] if sample_label in idx_to_genre else sample_label

# # Plot
# plt.figure(figsize=(6,6))             
# plt.imshow(sample_img.squeeze(0), cmap='gray')
# plt.title(f"Random Loaded Sample\nLabel: {genre_name}", fontsize=14)
# plt.axis('off')                      
# plt.show()

In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torchvision.models as models
# from tqdm import tqdm
# import os

# # Device setup
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")

# # Hyperparameters
# learning_rate = 0.0005
# epochs = 20
# BATCH_SIZE = 64
# NUM_CLASSES = len(genre_to_idx)

# # Model configurations
# model_names = ["resnet50", "efficientnet_b0", "mobilenet_v3_small"]
# model_constructors = {
#     "resnet50": models.resnet50,
#     "efficientnet_b0": models.efficientnet_b0,
#     "mobilenet_v3_small": models.mobilenet_v3_small
# }

# os.makedirs("saved_models", exist_ok=True)

# for model_name in model_names:
#     print(f"\nTraining {model_name}...")

#     model = model_constructors[model_name](pretrained=True)

#     for param in model.parameters():
#         param.requires_grad = False

#     # Adjust the output layer
#     if "resnet" in model_name:
#         model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
#     elif "efficientnet" in model_name or "mobilenet" in model_name:
#         model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, NUM_CLASSES)

#     model = model.to(device)

#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#     for epoch in range(epochs):
#         model.train()
#         running_loss = 0.0

#         with tqdm(total=len(train_loader), desc=f"{model_name} Epoch {epoch+1}/{epochs}") as pbar:
#             for images, labels in train_loader:
#                 images, labels = images.to(device), labels.to(device)
#                 optimizer.zero_grad()
#                 outputs = model(images)
#                 loss = criterion(outputs, labels)
#                 loss.backward()
#                 optimizer.step()

#                 running_loss += loss.item()
#                 pbar.update(1)

#         print(f"Epoch {epoch+1} Loss: {running_loss/len(train_loader):.4f}")

#     # Save trained model
#     save_path = f"saved_models/{model_name}_model.pth"
#     torch.save(model.state_dict(), save_path)
#     print(f"✅ Saved {model_name} to {save_path}")


In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_names = ["resnet50", "efficientnet_b0", "mobilenet_v3_small"]
model_constructors = {
    "resnet50": models.resnet50,
    "efficientnet_b0": models.efficientnet_b0,
    "mobilenet_v3_small": models.mobilenet_v3_small
}

NUM_CLASSES = len(genre_to_idx)
criterion = nn.CrossEntropyLoss()

all_results = {}

for model_name in model_names:
    model = model_constructors[model_name](pretrained=False)

    if "resnet" in model_name:
        model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
    elif "efficientnet" in model_name or "mobilenet" in model_name:
        model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, NUM_CLASSES)

    model.load_state_dict(torch.load(f"/kaggle/input/musicsheets/{model_name}_model.pth"))
    model = model.to(device)

    model.eval()
    val_losses, all_preds, all_labels = [], [], []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_losses.append(loss.item())
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_val_loss = sum(val_losses) / len(val_losses)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average="macro")
    accuracy = accuracy_score(all_labels, all_preds)

    all_results[model_name] = {
        "val_loss": avg_val_loss,
        "val_precision": precision,
        "val_recall": recall,
        "val_f1": f1,
        "val_accuracy": accuracy
    }

# Save results to CSV
results_df = pd.DataFrame(all_results).T
results_df.to_csv("final_model_evaluation.csv")
print(results_df)

# Plot Loss, F1 Score, and Accuracy
fig, axes = plt.subplots(1, 3, figsize=(20, 5))

# Validation Loss Plot
results_df['val_loss'].plot(kind='bar', ax=axes[0], color='salmon')
axes[0].set_title("Validation Loss")
axes[0].set_ylabel("Loss")

# F1 Score Plot
results_df['val_f1'].plot(kind='bar', ax=axes[1], color='skyblue')
axes[1].set_title('F1 Score')
axes[1].set_ylabel("F1 Score")

# Accuracy Plot
results_df['val_accuracy'].plot(kind='bar', ax=axes[2], color='lightgreen')
axes[2].set_title("Accuracy")
axes[2].set_ylabel("Accuracy")

plt.tight_layout()
plt.savefig("final_model_metrics.png")
plt.show()

print("✅ Final results and plots saved successfully.")


## Convert desired PDFs to PNG images

In [None]:
# import os
# from pdf2image import convert_from_path

# # Paths
# PDF_DIR = "archives/pdf/"  # Folder containing PDFs
# IMAGE_DIR = "preprocessed_images/"  # Folder to store preprocessed images

# import os
# import pandas as pd
# from pdf2image import convert_from_path

# # Paths
# PDF_DIR = "archives/pdf/"  # Folder containing PDFs
# IMAGE_DIR = "preprocessed_images/"  # Folder to store preprocessed images
# CSV_PATH = "dataset.csv"  # CSV file with PDF paths

# # Ensure output directory exists
# os.makedirs(IMAGE_DIR, exist_ok=True)

# # Load dataset CSV
# df = pd.read_csv(CSV_PATH)

# # ✅ Normalize filenames (strip spaces, remove paths)
# required_pdfs = set(df["pdf"].str.strip().apply(lambda x: os.path.basename(x)))  # Only filename (no path)

# print("Preprocessing PDFs into images...")

# for root, _, files in os.walk(PDF_DIR):
#     for pdf_file in files:
#         pdf_file_cleaned = pdf_file.strip()  # Ensure no extra spaces
#         if pdf_file_cleaned.endswith(".pdf") and pdf_file_cleaned in required_pdfs:  # ✅ Corrected filename comparison
#             pdf_path = os.path.join(root, pdf_file_cleaned)
#             images = convert_from_path(pdf_path)  # Convert PDF to images
            
#             # Save first page only
#             img_save_path = os.path.join(IMAGE_DIR, pdf_file_cleaned.replace(".pdf", ".png"))
#             images[0].save(img_save_path, "PNG")  # Save first page as image
#             print(f"✅ Saved: {img_save_path}")

# print("✅ PDF to PNG conversion complete!")


## Model Definition (change)

In [None]:
# class SimpleCNN(nn.Module): 
#     def __init__(self, num_classes):
#         super(SimpleCNN, self).__init__()
#         # Input shape: (1, 512, 512) if using Grayscale
#         self.net = nn.Sequential(
#             nn.Conv2d(1, 16, kernel_size=3, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(2),  # -> (16, 256, 256)
            
#             nn.Conv2d(16, 32, kernel_size=3, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(2),  # -> (32, 128, 128)
            
#             nn.Conv2d(32, 64, kernel_size=3, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(2),  # -> (64, 64, 64)
#         )
        
#         # After 3 pool layers, each dimension is divided by 2^3 = 8
#         # So for a 512×512 input, we now have 64×64×64 features
#         # Flatten shape is 64 * 64 * 64 = 262144
#         self.fc = nn.Sequential(
#             nn.Flatten(),
#             nn.Linear(64 * (IMG_SIZE[0]//8) * (IMG_SIZE[1]//8), 128),
#             nn.ReLU(),
#             nn.Linear(128, num_classes)
#         )

#     def forward(self, x):
#         x = self.net(x)
#         x = self.fc(x)
#         return x

# model = SimpleCNN(num_classes=num_classes)


## Training

### Training setup

In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print('Using:', device)
# model.to(device)

# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


# # Use ReduceLROnPlateau to reduce LR if val_loss stops improving.
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)

# # Early stopping variables
# best_val_loss = float("inf")
# best_model_wts = copy.deepcopy(model.state_dict())
# no_improve_count = 0

# # Lists to track metrics for plotting
# train_losses = []
# val_losses   = []
# train_accs   = []
# val_accs     = []

### Training loop

In [None]:
# for epoch in range(NUM_EPOCHS):
#     # --- TRAIN ---
#     model.train()
#     running_loss, running_correct, total = 0.0, 0, 0
    
#     for images, labels in train_loader:
#         images, labels = images.to(device), labels.to(device)

#         optimizer.zero_grad()
#         outputs = model(images)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()

#         # stats
#         running_loss += loss.item() * images.size(0)
#         _, predicted = torch.max(outputs, 1)
#         running_correct += (predicted == labels).sum().item()
#         total += labels.size(0)
    
#     train_epoch_loss = running_loss / total
#     train_epoch_acc  = running_correct / total
#     train_losses.append(train_epoch_loss)
#     train_accs.append(train_epoch_acc)

#     # --- VALIDATION ---
#     model.eval()
#     val_running_loss, val_correct, val_total = 0.0, 0, 0
    
#     with torch.no_grad():
#         for images, labels in val_loader:
#             images, labels = images.to(device), labels.to(device)
#             outputs = model(images)
#             loss = criterion(outputs, labels)

#             val_running_loss += loss.item() * images.size(0)
#             _, predicted = torch.max(outputs, 1)
#             val_correct += (predicted == labels).sum().item()
#             val_total += labels.size(0)

#     val_epoch_loss = val_running_loss / val_total
#     val_epoch_acc  = val_correct / val_total
#     val_losses.append(val_epoch_loss)
#     val_accs.append(val_epoch_acc)

#     print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
#           f"Train Loss: {train_epoch_loss:.4f} | Train Acc: {train_epoch_acc:.4f} | "
#           f"Val Loss: {val_epoch_loss:.4f}   | Val Acc: {val_epoch_acc:.4f}")

#     # We'll step the scheduler based on validation loss
#     scheduler.step(val_epoch_loss)

#     # --- EARLY STOPPING CHECK ---
#     if val_epoch_loss < best_val_loss:
#         best_val_loss = val_epoch_loss
#         best_model_wts = copy.deepcopy(model.state_dict())
#         no_improve_count = 0
#     else:
#         no_improve_count += 1
#         print(f"No improvement for {no_improve_count} epoch(s)")

#         if no_improve_count >= EARLY_STOP_PATIENCE:
#             print("Early stopping triggered!")
#             break

# print("Training complete!")

# # Load best weights
# model.load_state_dict(best_model_wts)

### Plot curves

In [None]:
# plt.figure(figsize=(12,5))

# # --- LOSS ---
# plt.subplot(1,2,1)
# plt.plot(train_losses, label='Train Loss', marker='o')
# plt.plot(val_losses,   label='Val Loss', marker='o')
# plt.title("Loss Over Epochs")
# plt.xlabel("Epoch")
# plt.ylabel("Loss")
# plt.legend()

# # --- ACCURACY ---
# plt.subplot(1,2,2)
# plt.plot(train_accs, label='Train Acc', marker='o')
# plt.plot(val_accs,   label='Val Acc', marker='o')
# plt.title("Accuracy Over Epochs")
# plt.xlabel("Epoch")
# plt.ylabel("Accuracy")
# plt.legend()

# plt.tight_layout