In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# --- dependencies ---
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict, Features, ClassLabel, Image as HFImage
from transformers import AutoImageProcessor, ConvNextV2ForImageClassification, TrainingArguments, Trainer
import torch
from torchvision import transforms as T
from PIL import Image
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [None]:
DATASET_PARENT = Path(
    r"/content/drive/MyDrive/Projects/6_Project Phoenix_Cervical Cancer Cell Classification/Preprocessed Datasets/Augmented Datasets/Augmented Dataset - Limited Enhancement"
)

In [None]:
# --- find all NLM_CLAHE directories (case-insensitive) ---
nlm_dirs = set()

# 1) immediate child search: look for X/<class>/NLM_CLAHE
for child in DATASET_PARENT.iterdir():
    if not child.is_dir():
        continue
    # search child for a folder named NLM_CLAHE (case-insensitive)
    for sub in child.iterdir():
        if sub.is_dir() and sub.name.lower() == "nlm_clahe":
            nlm_dirs.add(sub.resolve())
            break

# 2) recursive fallback: in case structure is deeper or different
for p in DATASET_PARENT.rglob("*"):
    if p.is_dir() and p.name.lower() == "nlm_clahe":
        nlm_dirs.add(p.resolve())

if not nlm_dirs:
    raise FileNotFoundError(
        "No 'NLM_CLAHE' directories found under DATASET_PARENT. "
        "Check folder names and capitalization."
    )

# --- collect BMP files from each NLM_CLAHE and map to class name (parent folder) ---
rows = []
seen_paths = set()   # dedupe absolute paths

for nlm in sorted(nlm_dirs, key=lambda x: str(x)):
    class_name = nlm.parent.name    # parent folder is the class label
    # gather BMP files (case-insensitive)
    bmp_files = [p.resolve() for p in nlm.iterdir() if p.is_file() and p.suffix.lower() == ".bmp"]
    if not bmp_files:
        # warn but continue
        print(f"Warning: no .bmp files found in: {nlm}  (class = '{class_name}')")
        continue
    for p in bmp_files:
        sp = str(p)
        if sp in seen_paths:
            continue
        seen_paths.add(sp)
        rows.append((sp, class_name))

# --- build DataFrame ---
df = pd.DataFrame(rows, columns=["image_path", "label_name"])
if df.empty:
    raise RuntimeError("No .bmp image files were found in any discovered NLM_CLAHE directories.")

# stable sorted class ordering -> map to integer labels
class_names = sorted(df["label_name"].unique().tolist())
label_to_id = {n: i for i, n in enumerate(class_names)}
df["label"] = df["label_name"].map(label_to_id)

# optional: shuffle rows (helps downstream splitting)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# summary prints
print("Dataset parent:", DATASET_PARENT)
print("Discovered NLM_CLAHE directories (count):", len(nlm_dirs))
for p in sorted(nlm_dirs):
    print("  -", p)
print("\nFound classes (alphabetical):", class_names)
print("Total images found:", len(df))
print("Counts per class:")
print(df.groupby("label_name").size().sort_values(ascending=False))

# save csv to dataset parent for convenience
csv_out = DATASET_PARENT / "sipakmed_file_list.csv"
df.to_csv(csv_out, index=False)
print(f"\nSaved file list to: {csv_out}")

Dataset parent: /content/drive/MyDrive/Projects/6_Project Phoenix_Cervical Cancer Cell Classification/Preprocessed Datasets/Augmented Datasets/Augmented Dataset - Limited Enhancement
Discovered NLM_CLAHE directories (count): 5
  - /content/drive/MyDrive/Projects/6_Project Phoenix_Cervical Cancer Cell Classification/Preprocessed Datasets/Augmented Datasets/Augmented Dataset - Limited Enhancement/im_Dyskeratotic/NLM_CLAHE
  - /content/drive/MyDrive/Projects/6_Project Phoenix_Cervical Cancer Cell Classification/Preprocessed Datasets/Augmented Datasets/Augmented Dataset - Limited Enhancement/im_Koilocytotic/NLM_CLAHE
  - /content/drive/MyDrive/Projects/6_Project Phoenix_Cervical Cancer Cell Classification/Preprocessed Datasets/Augmented Datasets/Augmented Dataset - Limited Enhancement/im_Metaplastic/NLM_CLAHE
  - /content/drive/MyDrive/Projects/6_Project Phoenix_Cervical Cancer Cell Classification/Preprocessed Datasets/Augmented Datasets/Augmented Dataset - Limited Enhancement/im_Parabasal

### Train, Test and Validation Split

In [None]:
# stratified split
train_df, temp_df = train_test_split(
    df, test_size=0.2, stratify=df['label'], random_state=42
)
val_df, test_df = train_test_split(
    temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42
)

print("Train size:", len(train_df))
print("Validation size:", len(val_df))
print("Test size:", len(test_df))

# Optional: check class distribution
print("\nTrain class counts:\n", train_df['label_name'].value_counts())
print("\nValidation class counts:\n", val_df['label_name'].value_counts())
print("\nTest class counts:\n", test_df['label_name'].value_counts())

Train size: 3239
Validation size: 405
Test size: 405

Train class counts:
 label_name
im_Superficial-Intermediate    665
im_Koilocytotic                660
im_Dyskeratotic                650
im_Metaplastic                 634
im_Parabasal                   630
Name: count, dtype: int64

Validation class counts:
 label_name
im_Superficial-Intermediate    83
im_Koilocytotic                83
im_Dyskeratotic                81
im_Metaplastic                 79
im_Parabasal                   79
Name: count, dtype: int64

Test class counts:
 label_name
im_Superficial-Intermediate    83
im_Koilocytotic                82
im_Dyskeratotic                82
im_Metaplastic                 80
im_Parabasal                   78
Name: count, dtype: int64


### Converting dataset into huggingface DatasetDict

In [None]:
# Define features for HF dataset
features = Features({
    "image": HFImage(),                # image will be lazy-loaded
    "label": ClassLabel(names=sorted(df['label_name'].unique()))
})

def df_to_ds(dframe):
    d = Dataset.from_dict({
        "image": dframe["image_path"].tolist(),
        "label": dframe["label"].tolist()
    })
    return d.cast(features)

dataset = DatasetDict({
    "train": df_to_ds(train_df.reset_index(drop=True)),
    "validation": df_to_ds(val_df.reset_index(drop=True)),
    "test": df_to_ds(test_df.reset_index(drop=True))
})

# Quick check
print(dataset)
print(dataset['train'][0])

Casting the dataset:   0%|          | 0/3239 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/405 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/405 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 3239
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 405
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 405
    })
})
{'image': <PIL.BmpImagePlugin.BmpImageFile image mode=RGB size=256x256 at 0x7C2DD8ABC440>, 'label': 2}


In [None]:
from transformers import ConvNextV2ForImageClassification, AutoImageProcessor

model_name = "facebook/convnextv2-tiny-22k-384"

processor = AutoImageProcessor.from_pretrained(model_name)

# Load the model WITHOUT classifier weights

model = ConvNextV2ForImageClassification.from_pretrained(
    model_name,
    num_labels=5,  # your 5 classes
    id2label={i: name for i, name in enumerate(dataset['train'].features['label'].names)},
    label2id={name: i for i, name in enumerate(dataset['train'].features['label'].names)},
    ignore_mismatched_sizes=True  # randomly initialize classifier
)

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/115M [00:00<?, ?B/s]

Some weights of ConvNextV2ForImageClassification were not initialized from the model checkpoint at facebook/convnextv2-tiny-22k-384 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([5]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([5, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
mean, std = processor.image_mean, processor.image_std

# Transformations remain the same
train_transform = T.Compose([
    T.Resize((224,224)),
    T.RandomHorizontalFlip(),
    T.RandomRotation(10),
    T.ToTensor(),
    T.Normalize(mean=mean, std=std)
])

val_transform = T.Compose([
    T.Resize((224,224)),
    T.ToTensor(),
    T.Normalize(mean=mean, std=std)
])

# Create functions to apply the transforms
def apply_train_transforms(examples):
    """Applies training transformations to a batch of images."""
    examples["pixel_values"] = [train_transform(image.convert("RGB")) for image in examples["image"]]
    return examples

def apply_val_transforms(examples):
    """Applies validation/testing transformations to a batch of images."""
    examples["pixel_values"] = [val_transform(image.convert("RGB")) for image in examples["image"]]
    return examples

# Use .map() to create the new processed datasets
train_ds = dataset["train"].map(apply_train_transforms, batched=True, remove_columns=["image"])
val_ds = dataset["validation"].map(apply_val_transforms, batched=True, remove_columns=["image"])
test_ds = dataset["test"].map(apply_val_transforms, batched=True, remove_columns=["image"])

Map:   0%|          | 0/3239 [00:00<?, ? examples/s]

Map:   0%|          | 0/405 [00:00<?, ? examples/s]

Map:   0%|          | 0/405 [00:00<?, ? examples/s]

### Defining evaluation metrics

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, average='weighted')
    recall = recall_score(labels, predictions, average='weighted')
    f1 = f1_score(labels, predictions, average='weighted')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

### Setting training arguments

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./convnextv2_cervical",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    eval_strategy="steps",  # <-- ADD THIS LINE
    save_strategy="steps",
    save_steps=200,
    eval_steps=200,
    logging_steps=50,
    num_train_epochs=5,
    learning_rate=5e-5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
)

In [None]:
from transformers import Trainer

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        """
        Override compute_loss to filter out unexpected kwargs before passing to model.
        """
        # Filter out keys that the model doesn't expect
        filtered_inputs = {k: v for k, v in inputs.items()
                          if k in ['pixel_values', 'labels']}

        # Call the model with filtered inputs
        outputs = model(**filtered_inputs)

        if self.args.past_index >= 0:
            self._past = outputs[self.args.past_index]

        if isinstance(outputs, dict) and "loss" not in outputs:
            raise ValueError(
                "The model did not return a loss from the inputs, only the following keys: "
                f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
            )

        loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0]

        return (loss, outputs) if return_outputs else loss

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,         # Use the new mapped train_ds
    eval_dataset=val_ds,          # Use the new mapped val_ds
    processing_class=processor,
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmeetbhatt2304[0m ([33mmeetbhatt2304-blink-analytics[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
200,0.1876,0.217699,0.925926,0.927402,0.925926,0.924408
400,0.0968,0.143249,0.965432,0.966461,0.965432,0.965639
600,0.0354,0.240427,0.953086,0.954211,0.953086,0.952691
800,0.0064,0.22058,0.953086,0.953867,0.953086,0.952287
1000,0.0032,0.176038,0.967901,0.968163,0.967901,0.967942


TrainOutput(global_step=1015, training_loss=0.11772264032349394, metrics={'train_runtime': 2496.0396, 'train_samples_per_second': 6.488, 'train_steps_per_second': 0.407, 'total_flos': 4.0765406427196416e+17, 'train_loss': 0.11772264032349394, 'epoch': 5.0})

In [None]:
!zip -r /content/convnextv2_cervical.zip /content/convnextv2_cervical

  adding: content/convnextv2_cervical/ (stored 0%)
  adding: content/convnextv2_cervical/runs/ (stored 0%)
  adding: content/convnextv2_cervical/runs/Sep24_12-39-37_4a5d508c5135/ (stored 0%)
  adding: content/convnextv2_cervical/runs/Sep24_12-39-37_4a5d508c5135/events.out.tfevents.1758717581.4a5d508c5135.1170.0 (deflated 63%)
  adding: content/convnextv2_cervical/runs/Sep24_12-39-37_4a5d508c5135/events.out.tfevents.1758720120.4a5d508c5135.1170.1 (deflated 30%)
  adding: content/convnextv2_cervical/checkpoint-600/ (stored 0%)
  adding: content/convnextv2_cervical/checkpoint-600/scaler.pt (deflated 64%)
  adding: content/convnextv2_cervical/checkpoint-600/rng_state.pth (deflated 26%)
  adding: content/convnextv2_cervical/checkpoint-600/training_args.bin (deflated 53%)
  adding: content/convnextv2_cervical/checkpoint-600/scheduler.pt (deflated 61%)
  adding: content/convnextv2_cervical/checkpoint-600/model.safetensors (deflated 7%)
  adding: content/convnextv2_cervical/checkpoint-600/opti

In [None]:
# Evaluate on test set
print("\nEvaluating on test set...")
test_results = trainer.evaluate(test_ds)
print("Test Results:", test_results)

# Save the model and processor
model_save_path = "./saved_convnextv2_model"
trainer.save_model(model_save_path)
processor.save_pretrained(model_save_path)
print(f"Model and processor saved to {model_save_path}")


Evaluating on test set...


Test Results: {'eval_loss': 0.21494273841381073, 'eval_accuracy': 0.9629629629629629, 'eval_precision': 0.963248930682078, 'eval_recall': 0.9629629629629629, 'eval_f1': 0.963007560520247, 'eval_runtime': 42.9747, 'eval_samples_per_second': 9.424, 'eval_steps_per_second': 0.605, 'epoch': 5.0}
Model and processor saved to ./saved_convnextv2_model


### Load the saved model and processor

In [None]:
from transformers import ConvNextV2ForImageClassification, AutoImageProcessor
from PIL import Image

model_save_path = "./convnextv2_cervical/checkpoint-1015"

# Load the processor
processor = AutoImageProcessor.from_pretrained(model_save_path)

# Load the model
model = ConvNextV2ForImageClassification.from_pretrained(model_save_path)

### Define a function to preprocess the image

In [None]:
def preprocess_image(image_path, processor):
    """
    Loads and preprocesses an image for the model.

    Args:
        image_path (str): Path to the image file.
        processor: The image processor object.

    Returns:
        torch.Tensor: The preprocessed image tensor.
    """
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt")
    return inputs.pixel_values

### Define a function for inference

In [None]:
def classify_image(image_path, model, processor):
    """
    Classifies a single image using the loaded model.

    Args:
        image_path (str): Path to the image file.
        model: The loaded model.
        processor: The image processor.

    Returns:
        str: The predicted class label.
    """
    pixel_values = preprocess_image(image_path, processor)

    # Perform inference
    with torch.no_grad():
        outputs = model(pixel_values)

    # Get the predicted class
    logits = outputs.logits
    predicted_class_id = logits.argmax(-1).item()
    predicted_label = model.config.id2label[predicted_class_id]

    return predicted_label

### Upload an image and classify it

In [None]:
from google.colab import files

uploaded = files.upload()

for filename in uploaded.keys():
    print(f"Uploaded file: {filename}")
    predicted_label = classify_image(filename, model, processor)
    print(f"Predicted class for {filename}: {predicted_label}")