## Project Summary  

This project is part of the **Deep Learning Practice (DLP)** course, focusing on **Image Classification** using deep learning models. The dataset consists of images from the world of **flora and fauna**, and the goal is to build a model that classifies these images with the **best F1 score**.  

The final model's performance will be evaluated based on its classification accuracy.

# **Installation**

In [None]:
# Evaluating machine learning models on various metrics
!pip install evaluate

In [None]:
import torch  # PyTorch library for tensor computations and deep learning
import torchvision.transforms as transforms  # Transformations for image preprocessing
from torchvision.datasets import ImageFolder  # Dataset loader for image classification
from torchvision.models import vgg19_bn, resnet18  # Pretrained models for feature extraction and classification
from torch.utils.data import DataLoader  # DataLoader for batching and shuffling datasets
import torch.nn.functional as F  # Functional interface for PyTorch operations
import os  # OS module for handling file paths and directories
from torch.utils.data import DataLoader, random_split  # Additional data utilities for dataset splitting
import torch.nn as nn  # Neural network module for defining model architectures
import torch.optim as optim  # Optimization algorithms for training models
from transformers import AutoImageProcessor, AutoModelForImageClassification, TrainingArguments, Trainer  # Hugging Face Transformers for image classification
import evaluate  # Library for evaluation metrics

from datasets import load_dataset, ClassLabel  # Dataset utilities for handling and processing datasets
from PIL import Image  # Image handling and manipulation

In [None]:
# Set device to GPU if available, otherwise fallback to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# **Data Pre-processing**

In [None]:
# Load dataset from disk (Modify paths accordingly)
dataset_path = "/kaggle/input/deep-learning-practice-week-9-image-c-lassifica"
dataset = load_dataset("imagefolder", data_dir=dataset_path, split={"train": "train", "test": "test"})

In [None]:
# Split dataset into training and testing sets
train_dataset = dataset['train']
test_dataset = dataset['test']

In [None]:
# Load dataset again for extracting class names
dataset_path = "/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/train"
dataset = load_dataset("imagefolder", data_dir=dataset_path)

# Extract class names (folder names) in sorted order
class_names = sorted(dataset["train"].features["label"].names)

# Create a ClassLabel mapping
class_label = ClassLabel(names=class_names)

# Assign labels based on the dataset's existing 'label' field
def add_labels(example):
    example["label"] = class_label.str2int(class_names[example["label"]])  # Map label index to integer
    return example

In [None]:
# Apply the 'add_labels' function to the 'train' dataset
train_dataset = dataset["train"].map(add_labels)

# Shuffle the training dataset with a fixed seed for reproducibility
train_dataset.shuffle(seed=42)

# Display the first element from the 'test' dataset
test_dataset[0]

# **Model Initialization**

In [None]:
# Specify the model checkpoint for the Vision Transformer (ViT) pre-trained model
model_checkpoint = "google/vit-base-patch16-224-in21k"  # Vision Transformer (ViT)

# Load the image processor for the Vision Transformer model to handle image preprocessing
processor = AutoImageProcessor.from_pretrained(model_checkpoint)

# Load the pre-trained model for image classification, specifying the number of output labels (adjust this for your dataset)
model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint, num_labels=10  # Adjust for your dataset
).to(device)  # Move the model to the specified device (CPU/GPU)

# **Data Transformation**

In [None]:
# Define a function to transform the images in the dataset
def transform_images(example):
    image = example["image"]

    # Ensure the image is in PIL format if it's not already
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)

    # Convert grayscale images to RGB format (3-channel)
    if image.mode != "RGB":
        image = image.convert("RGB")

    # Process the image into tensor format using the image processor
    # 'squeeze(0)' is used to remove the batch dimension, so the shape is (C, H, W)
    pixel_values = processor(image, return_tensors="pt")["pixel_values"].squeeze(0)  # Shape: (C, H, W)

    # Add the processed image as 'pixel_values' to the example dictionary
    example["pixel_values"] = pixel_values
    return example

# Apply the 'transform_images' function to the training dataset
# 'remove_columns=["image"]' removes the original image column from the dataset after transformation
train_dataset = train_dataset.map(transform_images, remove_columns=["image"])

# **Data Splitting**

In [None]:
# Split the dataset into training and validation sets with 80% for training and 20% for validation
train_test_split = train_dataset.train_test_split(test_size=0.2, seed=42)

# Extract the new training and validation datasets from the split
train_dataset = train_test_split["train"]
val_dataset = train_test_split["test"]


# **Model Evaluation**

In [None]:
# Define the F1 Score metric to evaluate model performance
metric = evaluate.load("f1")
# Display the contents of the training dataset
print(train_dataset)

In [None]:
# Define the function to compute metrics during evaluation
def compute_metrics(eval_pred):
    # Unpack the logits (model's raw predictions) and labels (true labels)
    logits, labels = eval_pred

    # Convert logits to predicted class labels by taking the argmax along the last dimension (for classification)
    predictions = np.argmax(logits, axis=-1)

    # Compute and return the F1 score using the 'metric' object, with macro averaging
    return metric.compute(predictions=predictions, references=labels, average="macro")

# **Model Training**

In [None]:
# Set up training arguments using the TrainingArguments class
training_args = TrainingArguments(
    output_dir="./results",  # Directory to save the model and results
    evaluation_strategy="epoch",  # Evaluate the model at the end of each epoch
    save_strategy="epoch",  # Save the model checkpoint at the end of each epoch
    learning_rate=5e-5,  # Set the learning rate for the optimizer
    per_device_train_batch_size=8,  # Batch size per device for training
    per_device_eval_batch_size=8,  # Batch size per device for evaluation
    num_train_epochs=2,  # Number of epochs to train the model
    weight_decay=0.01,  # Weight decay for regularization to prevent overfitting
    metric_for_best_model="f1",  # Metric used to select the best model during training
    load_best_model_at_end=True,  # Load the best model after training based on the evaluation metric
    report_to="none"  # Disable reporting to external platforms like TensorBoard or Weights & Biases
)

In [None]:
# Define the Trainer, which will handle the training and evaluation process
trainer = Trainer(
    model=model,  # The model to be trained
    args=training_args,  # The training arguments defined earlier (like learning rate, batch size, etc.)
    train_dataset=train_dataset,  # The training dataset
    eval_dataset=val_dataset,  # The validation dataset
    tokenizer=processor,  # The processor used for tokenizing and processing the images
    compute_metrics=compute_metrics  # The function to compute evaluation metrics (e.g., F1 score)
)

In [None]:
# Start the training process using the defined Trainer object
trainer.train()

In [None]:
# Apply the 'transform_images' function to the test dataset to process images
# 'remove_columns=["image"]' removes the original image column after transformation
test_dataset = test_dataset.map(transform_images, remove_columns=["image"])

In [None]:
# Use the trainer to make predictions on the test dataset
predictions = trainer.predict(test_dataset)

# Get the predicted class labels by taking the argmax of the model's raw predictions
# This will convert logits (raw outputs) to predicted class indices
test_preds = np.argmax(predictions.predictions, axis=-1)

In [None]:
# Define the directory where the test images are stored
image_dir = "/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test"

# Get a sorted list of image file names (without extensions) from the specified directory
# This assumes that the images are in ".jpg" format and that the files are named with a consistent format
test_image_ids = [os.path.splitext(f)[0] for f in sorted(os.listdir(image_dir)) if f.endswith(".jpg")]

# **Submission**

In [None]:
# Create a DataFrame to store the image IDs and their corresponding predicted labels
submission_df = pd.DataFrame({"Image_ID": test_image_ids, "Label": test_preds})

# Save the DataFrame to a CSV file, without including the index column
submission_file = "/kaggle/working/submission.csv"
submission_df.to_csv(submission_file, index=False)

# Print the file path where the submission has been saved
print(f"Submission file saved: {submission_file}")

In [None]:
# Preview the first few rows of the submission DataFrame
submission_df.head()

In [None]:
# Load the CSV file into a DataFrame
df = pd.read_csv("/kaggle/working/submission.csv")

# Preview the first 10 rows of the DataFrame
df.head(10)

In [None]:
# Check if the 'submission.csv' file exists in the specified path and print the result
print(os.path.exists("/kaggle/working/submission.csv"))