In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import random
import shutil

# Define dataset paths
dataset_path = "/content/drive/MyDrive/Vehicles_Datasets"  # Original dataset
output_path = "/content/drive/MyDrive/Vehicles_Datasets_Split"  # New split dataset
train_ratio = 0.8  # 80% train, 20% validation

# Create train and val directories
train_dir = os.path.join(output_path, "train")
val_dir = os.path.join(output_path, "val")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Iterate over each category (class)
for category in os.listdir(dataset_path):
    category_path = os.path.join(dataset_path, category)
    if os.path.isdir(category_path):  # Ensure it's a directory
        images = os.listdir(category_path)
        random.shuffle(images)  # Shuffle images

        split_idx = int(len(images) * train_ratio)
        train_images = images[:split_idx]
        val_images = images[split_idx:]

        # Create class subdirectories in train and val folders
        train_category_dir = os.path.join(train_dir, category)
        val_category_dir = os.path.join(val_dir, category)
        os.makedirs(train_category_dir, exist_ok=True)
        os.makedirs(val_category_dir, exist_ok=True)

        # Copy images to train and val folders
        for img in train_images:
            shutil.copy(os.path.join(category_path, img), os.path.join(train_category_dir, img))
        for img in val_images:
            shutil.copy(os.path.join(category_path, img), os.path.join(val_category_dir, img))

        print(f"Category '{category}' split completed: {len(train_images)} train, {len(val_images)} val")

print("Dataset splitting complete!")


In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset

class VehicleDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with all the images, organized into class folders.
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.image_paths = []
        self.labels = []

        # Get all image paths and corresponding labels
        for label, category in enumerate(self.classes):
            category_path = os.path.join(root_dir, category)
            for image_name in os.listdir(category_path):
                self.image_paths.append(os.path.join(category_path, image_name))
                self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(image_path).convert("RGB")  # Open image and convert to RGB

        if self.transform:
            image = self.transform(image)

        return {"pixel_values": image, "label": label}


In [None]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 for ViT
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),  # Normalize to [-1, 1]
])


In [None]:
from torch.utils.data import DataLoader

dataset_path = "/content/drive/MyDrive/Vehicles_Datasets_Split"

# Create datasets
train_dataset = VehicleDataset(root_dir=f"{dataset_path}/train", transform=transform)
val_dataset = VehicleDataset(root_dir=f"{dataset_path}/val", transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


# Loading ViT Model

In [None]:
from transformers import ViTForImageClassification
import torch

# Load ViT model
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=len(train_dataset.classes)  # Number of classes in your dataset
)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)


# Model Traning

In [None]:
import torch
from torch.optim import AdamW
from tqdm import tqdm

optimizer = AdamW(model.parameters(), lr=1e-4)
loss_fn = torch.nn.CrossEntropyLoss()

epochs = 10

for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}"):
        inputs = batch["pixel_values"].to(device)
        labels = torch.tensor(batch["label"]).to(device)

        optimizer.zero_grad()
        outputs = model(inputs).logits
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        correct += (outputs.argmax(dim=1) == labels).sum().item()

    accuracy = correct / len(train_dataset)
    print(f"Epoch {epoch + 1}: Loss = {total_loss:.4f}, Accuracy = {accuracy:.4f}")


# Evaluation 

In [None]:
model.eval()
correct = 0

with torch.no_grad():
    for batch in tqdm(val_loader, desc="Evaluating"):
        inputs = batch["pixel_values"].to(device)
        labels = torch.tensor(batch["label"]).to(device)

        outputs = model(inputs).logits
        correct += (outputs.argmax(dim=1) == labels).sum().item()

accuracy = correct / len(val_dataset)
print(f"Validation Accuracy: {accuracy:.4f}")


# Save ViT Model into drive

In [None]:
from transformers import AutoImageProcessor

# Recreate the processor with the same parameters as used during training
processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

# Save the processor to your model directory
processor.save_pretrained("/content/drive/MyDrive/Trained_ViT_Model")

# model.save_pretrained("/content/drive/MyDrive/Trained_ViT_Model")

# Call model and testing

In [1]:
from transformers import ViTForImageClassification, AutoImageProcessor
import torch
from PIL import Image, ImageDraw, ImageFont
import os
from IPython.display import display

# Load the trained model and processor
model_path = "/content/drive/MyDrive/Trained_ViT_Model"
model = ViTForImageClassification.from_pretrained(model_path)
processor = AutoImageProcessor.from_pretrained(model_path)

# Move the model to device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Set the threshold for "unknown" classification
confidence_threshold = 0.5  # Adjust based on your dataset and use case

# Load and preprocess the image
# image_path = "/content/drive/MyDrive/Testing_Images/bike.jpg"
# image_path = "/content/drive/MyDrive/Testing_Images/motorbike.jpg"
# image_path = "/content/drive/MyDrive/Testing_Images/car.jpg"
# image_path = "/content/drive/MyDrive/Testing_Images/airplane.jpg"
# image_path = "/content/drive/MyDrive/Testing_Images/ship.jpg"
image_path = "/content/drive/MyDrive/Testing_Images/human.jpg" 
if not os.path.exists(image_path):
    raise FileNotFoundError(f"Image file not found: {image_path}")

image = Image.open(image_path).convert("RGB")

# Ensure proper preprocessing using the same processor used for training
inputs = processor(images=image, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}

# Perform inference
model.eval()
with torch.no_grad():
    outputs = model(**inputs)

print('\n==========================[ Vehicles Classification using ViT model ]==========================\n')

# Get the raw logits (model outputs before applying softmax)
logits = outputs.logits
print("Logits:", logits)  # Print raw logits

# Apply softmax to get probabilities
probabilities = torch.softmax(logits, dim=1)
max_prob, predicted_class_index = torch.max(probabilities, dim=1)

# Define class names (update with your own class labels)
class_names = ["cars", "ships", "motorbikes", "airplane", "bicycles"]

# Check confidence threshold
if max_prob.item() < confidence_threshold:
    predicted_label = "unknown"
    predicted_class_index = -1  # Assign -1 for unknown classes
else:
    predicted_label = class_names[predicted_class_index.item()]
    predicted_class_index = torch.argmax(logits, dim=1).item()

# Print the result
print(f"Predicted class index: {predicted_class_index}")
print(f"Predicted class name: {predicted_label}")
print(f"Confidence score: {max_prob.item()}")

# Save and display the labeled image
output_image_path = "/content/drive/MyDrive/Testing_Images/predicted_images.png"
image.save(output_image_path)
print(f"Labeled image saved at: {output_image_path}")

# Resize the image (for example, to 600x600)
resized_image = image.resize((600, 600))

# Display the image inline (for Jupyter or Colab)
display(resized_image)


ModuleNotFoundError: No module named 'transformers'