<a href="https://colab.research.google.com/github/Nishthamaybeme/Butterfly-Species-Prediction/blob/main/vit_butterfly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
 from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms

class LeedsButterflyDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_dir = os.path.join(root_dir, 'images')
        self.image_files = [f for f in os.listdir(self.image_dir) if f.endswith('.png')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(img_path).convert('RGB')

        # Fix the label extraction: get first 3 digits as category ID
        category_id = int(self.image_files[idx][:3])  # Extract the category ID (first 3 digits)
        label = category_id - 1  # Category IDs start from 1, but labels start from 0

        if self.transform:
            image = self.transform(image)

        return image, label

# Set dataset path
DATASET_PATH = '/content/drive/MyDrive/crime game/leedsbutterfly'

# Define transformation (resize to 224x224 for ViT input)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images for ViT
    transforms.ToTensor(),          # Convert to PyTorch Tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # Normalize
])

# Load dataset
dataset = LeedsButterflyDataset(DATASET_PATH, transform=transform)

# Create data loader
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Check if dataset is loaded correctly
print(f"Dataset contains {len(dataset)} samples.")


Dataset contains 832 samples.


In [None]:
from transformers import ViTForImageClassification, ViTFeatureExtractor
import torch
from torch import nn
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained ViT model and feature extractor
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=10)
model = model.to(device)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

# Load the feature extractor for ViT
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Training loop
epochs = 10  # Number of epochs

for epoch in range(epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images).logits  # Logits are the raw predictions
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader)}")


Epoch [1/10], Loss: 1.4790197610855103
Epoch [2/10], Loss: 0.4723818382391563
Epoch [3/10], Loss: 0.23945709088673958
Epoch [4/10], Loss: 0.16730924982291
Epoch [5/10], Loss: 0.13031962886452675
Epoch [6/10], Loss: 0.1058353830415469
Epoch [7/10], Loss: 0.08791764195148762
Epoch [8/10], Loss: 0.0742521326129253
Epoch [9/10], Loss: 0.06351242644282487
Epoch [10/10], Loss: 0.054906462104274675


In [None]:
import os

# Define the directory path
save_directory = '/content/drive/My Drive/new_directory'

# Create the directory if it does not exist
os.makedirs(save_directory, exist_ok=True)

# Define the full save path for the model
save_path = os.path.join(save_directory, 'vit_butterfly_model1.pth')

# Save the trained model
torch.save(model.state_dict(), save_path)


In [None]:
# Re-initialize the model (same architecture as before)
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', num_labels=10)

# Load the saved weights
model.load_state_dict(torch.load('//content/drive/MyDrive/new_directory/vit_butterfly_model1.pth'))

# Move the model to the device (GPU or CPU)
model.to(device)

# Set the model to evaluation mode
model.eval()


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  model.load_state_dict(torch.load('//content/drive/MyDrive/new_directory/vit_butterfly_model1.pth'))


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [None]:
from PIL import Image
from torchvision import transforms
import torch

# Define the image preprocessing pipeline
transform = transforms.Compose([
    transforms.Resize((224, 224)),   # Resize to the input size of ViT
    transforms.ToTensor(),           # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize to the ImageNet mean and std
])

# Load a new image
image_path = '/content/drive/MyDrive/images.jpg'  # Replace with your image path
image = Image.open(image_path).convert('RGB')

# Apply the preprocessing transform
image = transform(image).unsqueeze(0)  # Add a batch dimension

# Move the image to the device
image = image.to(device)

# Make the prediction
with torch.no_grad():
    outputs = model(image).logits
    _, predicted = torch.max(outputs, 1)

# Output the predicted class label
print(f"Predicted class: {predicted.item()}")


Predicted class: 0


In [None]:
# Map class indices to butterfly species (scientific and common names)
class_names = {
    0: ('Danaus plexippus', 'Monarch Butterfly'),
    1: ('Heliconius charitonius', 'Zebra Longwing'),
    2: ('Heliconius erato', 'Red Postman'),
    3: ('Junonia coenia', 'Common Buckeye'),
    4: ('Lycaena phlaeas', 'Small Copper'),
    5: ('Nymphalis antiopa', 'Mourning Cloak'),
    6: ('Papilio cresphontes', 'Giant Swallowtail'),
    7: ('Pieris rapae', 'Cabbage White'),
    8: ('Vanessa atalanta', 'Red Admiral'),
    9: ('Vanessa cardui', 'Painted Lady')
}

# Get the predicted class index (for example, class index 3)
predicted_class_idx = predicted.item()

# Get the scientific and common names for the predicted class
predicted_scientific_name, predicted_common_name = class_names[predicted_class_idx]

# Print both names
print(f"Predicted butterfly species:")
print(f"Scientific Name: {predicted_scientific_name}")
print(f"Common Name: {predicted_common_name}")


Predicted butterfly species:
Scientific Name: Danaus plexippus
Common Name: Monarch Butterfly


In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming you have the true labels (y_true) and predicted labels (y_pred)
# Example:
# y_true = [true species labels]
# y_pred = [predicted species labels from your model]

# Calculate Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Classification report (includes precision, recall, and F1-score for each class)
print("\nClassification Report:")
print(classification_report(y_true, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=species_classes, yticklabels=species_classes)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()


NameError: name 'y_true' is not defined