In [None]:
# Mounting google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
for folder in os.listdir('/content/drive/MyDrive/Pollens/train'):
    print(folder)
    for file in os.listdir('/content/drive/MyDrive/Pollens/train/' + folder):
        print(file)
        break


In [None]:

import os
import random
import shutil
import pandas as pd
import torchvision
from torchvision.transforms import ToTensor
import torchvision.transforms as T
import os
import tqdm
import PIL
import torch




In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

# Define transforms for the training and testing data
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Load data
data_dir = '/content/drive/MyDrive/Pollens/'
image_datasets = {x: datasets.ImageFolder(root=os.path.join(data_dir, x), transform=data_transforms[x]) for x in ['train', 'test']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4) for x in ['train', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


import torch.nn as nn
import torch.optim as optim
from transformers import ViTForImageClassification

# Initialize the Tiny ViT model
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', num_labels=len(class_names))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training function
def train_model(model, criterion, optimizer, dataloaders, num_epochs=25):
    train_accuracies = []
    test_accuracies = []
    train_losses = []

    for epoch in range(num_epochs):
        model.train()

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in tqdm(dataloaders['train']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs).logits
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dataset_sizes['train']
        epoch_acc = running_corrects.double() / dataset_sizes['train']

        train_accuracies.append(epoch_acc.item())
        print(f'Epoch {epoch}/{num_epochs-1} Train Loss: {epoch_loss:.4f} Train Acc: {epoch_acc:.4f}')
        train_losses.append(epoch_loss)

        # Evaluate on test data
        model.eval()
        test_corrects = 0

        for inputs, labels in dataloaders['test']:
            inputs = inputs.to(device)
            labels = labels.to(device)

            with torch.no_grad():
                outputs = model(inputs).logits
                _, preds = torch.max(outputs, 1)

                test_corrects += torch.sum(preds == labels.data)

        test_acc = test_corrects.double() / dataset_sizes['test']
        test_accuracies.append(test_acc.item())
        print(f'Epoch {epoch}/{num_epochs-1} Test Acc: {test_acc:.4f}')

    return model, train_accuracies, test_accuracies, train_losses

# Train the model
model, train_accuracies, test_accuracies, train_loss = train_model(model, criterion, optimizer, dataloaders, num_epochs=10)



In [None]:

from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Evaluation on test set
model.eval()
all_labels = []
all_preds = []

for inputs, labels in dataloaders['test']:
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs).logits
        _, preds = torch.max(outputs, 1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

# Classification report
print('Classification Report:')
print(classification_report(all_labels, all_preds, target_names=class_names))

# Confusion matrix
conf_matrix = confusion_matrix(all_labels, all_preds)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()


# Plot training and testing accuracies
plt.figure(figsize=(10, 5))
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Testing Accuracies')
plt.show()


In [None]:
# Plot training loss
plt.figure(figsize=(10, 5))
plt.plot(train_loss, label='Train Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training Loss')
plt.show()