In [1]:
import matplotlib.pyplot as plt
import torch
import torchvision
from torch import nn

from torch import nn
from torchvision import transforms

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [2]:
#loading the pretrained weights
pretrained_vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT

#initializing the model with the weights
pretrained_vit = torchvision.models.vit_b_16(weights=pretrained_vit_weights).to(device)

for param in pretrained_vit.parameters():
    param.requires_grad = False

class_names = ['Bacterialblight','Blast','Brownspot','Tungro']
pretrained_vit.heads = nn.Linear(in_features = 768 , out_features = len(class_names)).to(device)
pretrained_vit

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [3]:
pretrained_vit_transforms = pretrained_vit_weights.transforms()
pretrained_vit_transforms


ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [4]:
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    pretrained_vit_weights.transforms()
])

In [5]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define the transformations for the training data
train_transforms = transform

# Define the transformations for the test data (only basic transformations)
test_transforms = pretrained_vit_transforms

# Path to your dataset
data_dir = 'data'

# Load the dataset
full_dataset = datasets.ImageFolder(data_dir)

# Split the dataset into train and test sets (80% train, 20% test)
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

valid_size = int(0.5*len(test_dataset))
test_size = len(test_dataset)-valid_size
valid_dataset, test_dataset = random_split(test_dataset, [valid_size, test_size])
valid_dataset.dataset.transform = test_transforms
test_dataset.dataset.transform = test_transforms

valid_dataloader = DataLoader(valid_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)


In [6]:
len(test_dataloader)

19

In [7]:
train_dataset.dataset.transform = test_transforms
no_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
train_dataset.dataset.transform = train_transforms
yes_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [8]:
from torch.utils.data import ConcatDataset
dataset1 = no_dataloader.dataset
dataset2 = yes_dataloader.dataset

# Combine datasets
combined_dataset = ConcatDataset([dataset1, dataset2])

# Create a new DataLoader for the combined dataset
new_dataloader = DataLoader(combined_dataset, batch_size=32, shuffle=True)

In [9]:
len(new_dataloader)

297

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrained_vit.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(pretrained_vit.parameters(), lr=1e-3)

# Number of epochs
epochs = 5

# Training and validation loop
for epoch in range(epochs):
    # Training phase
    pretrained_vit.train()
    train_loss, train_correct = 0, 0
    for images, labels in tqdm(new_dataloader):
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = pretrained_vit(images)
        loss = loss_fn(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track accuracy and loss
        train_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        train_correct += (predicted == labels).sum().item()

    train_loss /= len(new_dataloader.dataset)
    train_acc = train_correct / len(new_dataloader.dataset)

    # Validation phase
    pretrained_vit.eval()
    valid_loss, valid_correct = 0, 0
    with torch.no_grad():
        for images, labels in valid_dataloader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = pretrained_vit(images)
            loss = loss_fn(outputs, labels)

            # Track accuracy and loss
            valid_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            valid_correct += (predicted == labels).sum().item()

    valid_loss /= len(valid_dataloader.dataset)
    valid_acc = valid_correct / len(valid_dataloader.dataset)

    # Print statistics
    print(f'Epoch {epoch+1}/{epochs}:')
    print(f'Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc*100:.2f}%')
    print(f'Test Loss: {valid_loss:.4f} | Test Accuracy: {valid_acc*100:.2f}%\n')

# Save the trained model
torch.save(pretrained_vit.state_dict(), 'vit_model.pth')


  from .autonotebook import tqdm as notebook_tqdm
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 297/297 [1:23:43<00:00, 16.92s/it]


Epoch 1/5:
Train Loss: 0.2710 | Train Accuracy: 92.21%
Test Loss: 0.1200 | Test Accuracy: 97.30%



100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 297/297 [1:19:42<00:00, 16.10s/it]


Epoch 2/5:
Train Loss: 0.0859 | Train Accuracy: 98.23%
Test Loss: 0.0678 | Test Accuracy: 98.82%



100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 297/297 [1:40:18<00:00, 20.27s/it]


Epoch 3/5:
Train Loss: 0.0529 | Train Accuracy: 99.17%
Test Loss: 0.0465 | Test Accuracy: 99.16%



100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 297/297 [1:39:30<00:00, 20.10s/it]


Epoch 4/5:
Train Loss: 0.0377 | Train Accuracy: 99.46%
Test Loss: 0.0337 | Test Accuracy: 99.66%



100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 297/297 [1:54:28<00:00, 23.13s/it]


Epoch 5/5:
Train Loss: 0.0290 | Train Accuracy: 99.59%
Test Loss: 0.0262 | Test Accuracy: 99.66%



In [28]:
!pip install scikit-learn

ERROR: Could not find a version that satisfies the requirement scikit-learn (from versions: none)
ERROR: No matching distribution found for scikit-learn


In [27]:
from sklearn.metrics import f1_score, recall_score, precision_score

# Move the model to evaluation mode
pretrained_vit.eval()

test_loss, test_correct = 0, 0
all_labels = []
all_predictions = []

# Disable gradient calculations for validation/testing
with torch.no_grad():
    for images, labels in test_dataloader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = pretrained_vit(images)
        loss = loss_fn(outputs, labels)

        # Track accuracy and loss
        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        test_correct += (predicted == labels).sum().item()

        # Store all predictions and labels
        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate test loss and accuracy
test_loss /= len(test_dataloader.dataset)
test_acc = test_correct / len(test_dataloader.dataset)

# Calculate F1 score, recall, and precision
f1 = f1_score(all_labels, all_predictions, average='weighted')
recall = recall_score(all_labels, all_predictions, average='weighted')
precision = precision_score(all_labels, all_predictions, average='weighted')

# Output the results
print(f'Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc*100:.2f}%')
print(f'F1 Score: {f1:.4f} | Recall: {recall:.4f} | Precision: {precision:.4f}')


ModuleNotFoundError: No module named 'sklearn'

In [13]:
torch.save(pretrained_vit,'new_model.pth')

In [24]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image
import os

def load_image_from_folder(image_path):
    # Open image and apply transformations
    image = Image.open(image_path)
    transformed_image = test_transforms(image).unsqueeze(0)  # Add batch dimension
    return transformed_image
def predict_class(image_tensor):
    # Move the tensor to the appropriate device
    image_tensor = image_tensor.to(device)
    
    # Pass through model and get predictions
    with torch.no_grad():
        outputs = pretrained_vit(image_tensor)
        _, predicted = torch.max(outputs, 1)
    
    return predicted.item()
image_tensor = load_image_from_folder('data/Tungro/TUNGRO2_029.jpg')

# Predict class
predicted_class = predict_class(image_tensor)

print(f"Predicted class: {class_names[predicted_class]}")

Predicted class: Tungro


In [30]:
len(new_dataloader)

297

In [11]:
#loading and resnet model
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Define the ResNet model with pretrained weights
resnet_model = models.resnet50(pretrained=True)

# Freeze the weights of all layers except the final layer
for param in resnet_model.parameters():
    param.requires_grad = False

# Replace the final fully connected layer (fc) for the new task
# Assuming you're classifying into 'num_classes' categories
num_classes = 4  # Change this to your number of output classes
resnet_model.fc = nn.Linear(resnet_model.fc.in_features, num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet_model = resnet_model.to(device)

# Define loss function and optimizer (only optimizing the final layer)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet_model.fc.parameters(), lr=0.001)

# Assuming you have a DataLoader for training
# Example: dataloader with batch size of 32 and image size 224x224
# train_dataloader is assumed to be defined
# train_dataloader = DataLoader(your_dataset, batch_size=32, shuffle=True)

# Training Loop
num_epochs = 5  # You can adjust this

for epoch in range(num_epochs):
    resnet_model.train()  # Set model to training mode
    running_loss = 0.0

    for inputs, labels in new_dataloader:
        # Move inputs and labels to the device (GPU or CPU)
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = resnet_model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize only the final layer
        loss.backward()
        optimizer.step()

        # Track loss
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(new_dataloader):.4f}')

print("Training complete!")


Epoch [1/5], Loss: 0.4561
Epoch [2/5], Loss: 0.2331
Epoch [3/5], Loss: 0.1877
Epoch [4/5], Loss: 0.1672
Epoch [5/5], Loss: 0.1533
Training complete!


In [12]:
torch.save(resnet_model,'resnet_model.pth')

In [14]:
from sklearn.metrics import f1_score, recall_score, precision_score

# Move the model to evaluation mode
resnet_model.eval()

test_loss, test_correct = 0, 0
all_labels = []
all_predictions = []
loss_fn = nn.CrossEntropyLoss()
# Disable gradient calculations for validation/testing
with torch.no_grad():
    for images, labels in test_dataloader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = resnet_model(images)
        loss = loss_fn(outputs, labels)

        # Track accuracy and loss
        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        test_correct += (predicted == labels).sum().item()

        # Store all predictions and labels
        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate test loss and accuracy
test_loss /= len(test_dataloader.dataset)
test_acc = test_correct / len(test_dataloader.dataset)

# Calculate F1 score, recall, and precision
f1 = f1_score(all_labels, all_predictions, average='weighted')
recall = recall_score(all_labels, all_predictions, average='weighted')
precision = precision_score(all_labels, all_predictions, average='weighted')

# Output the results
print(f'Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc*100:.2f}%')
print(f'F1 Score: {f1:.4f} | Recall: {recall:.4f} | Precision: {precision:.4f}')


Test Loss: 0.0961 | Test Accuracy: 96.30%
F1 Score: 0.9630 | Recall: 0.9630 | Precision: 0.9635


In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm  # tqdm for progress bar

# Define the ViT model with pretrained weights
vit_model = models.vit_b_16(pretrained=True)

# Define the ResNet model with pretrained weights
resnet_model = models.resnet50(pretrained=True)

# Freeze the weights of all layers in both models except the final layers
for param in vit_model.parameters():
    param.requires_grad = False

for param in resnet_model.parameters():
    param.requires_grad = False

# Replace the final layers of both models
num_classes = 4  # Adjust to your number of classes
vit_model.heads = nn.Identity()  # Remove classification head from ViT
resnet_model.fc = nn.Identity()  # Remove fully connected layer from ResNet

# Combined Model with Feedforward Network
class CombinedModel(nn.Module):
    def __init__(self, vit, resnet):
        super(CombinedModel, self).__init__()
        self.vit = vit
        self.resnet = resnet
        # Fully connected layer after concatenating features
        self.fc = nn.Sequential(
            nn.Linear(768 + 2048, 512),  # Concatenate ViT and ResNet features
            nn.ReLU(),
            nn.Linear(512, num_classes)  # Output layer for classification
        )

    def forward(self, x):
        # Extract features from ViT and ResNet
        vit_features = self.vit(x)
        resnet_features = self.resnet(x)
        
        # Concatenate the features
        combined_features = torch.cat((vit_features, resnet_features), dim=1)
        
        # Pass through the feedforward network
        output = self.fc(combined_features)
        return output

# Initialize combined model
combined_model = CombinedModel(vit_model, resnet_model)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
combined_model = combined_model.to(device)

# Define loss function and optimizer (training only the FC layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(combined_model.fc.parameters(), lr=0.001)

# Training setup
num_epochs = 5  # Adjust to your preference

# Training Loop with tqdm progress bar
for epoch in range(num_epochs):
    combined_model.train()  # Set model to training mode
    running_loss = 0.0
    total_correct = 0
    total_samples = 0

    # Using tqdm to add a progress bar to the dataloader
    with tqdm(total=len(new_dataloader), desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch') as pbar:
        for batch_idx, (inputs, labels) in enumerate(new_dataloader):
            # Move inputs and labels to the device (GPU or CPU)
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = combined_model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Track running loss and accuracy
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

            # Update tqdm progress bar
            pbar.update(1)
            pbar.set_postfix({'loss': loss.item(), 'accuracy': total_correct/total_samples})

    # Print final epoch stats
    epoch_loss = running_loss / len(new_dataloader)
    epoch_accuracy = total_correct / total_samples
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

print("Training complete!")


Epoch 1/5: 100%|███████████████████████████████████████████████████████████████| 297/297 [2:55:48<00:00, 35.52s/batch, loss=0.0401, accuracy=0.955]


Epoch [1/5], Loss: 0.1320, Accuracy: 0.9552


Epoch 2/5: 100%|██████████████████████████████████████████████████████████████| 297/297 [1:58:35<00:00, 23.96s/batch, loss=0.00033, accuracy=0.991]


Epoch [2/5], Loss: 0.0264, Accuracy: 0.9909


Epoch 3/5: 100%|██████████████████████████████████████████████████████████████| 297/297 [1:56:30<00:00, 23.54s/batch, loss=0.00102, accuracy=0.996]


Epoch [3/5], Loss: 0.0101, Accuracy: 0.9963


Epoch 4/5: 100%|████████████████████████████████████████████████████████████████| 297/297 [1:56:14<00:00, 23.48s/batch, loss=0.343, accuracy=0.989]


Epoch [4/5], Loss: 0.0274, Accuracy: 0.9890


Epoch 5/5: 100%|██████████████████████████████████████████████████████████████| 297/297 [1:56:11<00:00, 23.47s/batch, loss=0.00862, accuracy=0.996]

Epoch [5/5], Loss: 0.0111, Accuracy: 0.9964
Training complete!





In [18]:
torch.save(combined_model,'combined_model.pth')

In [19]:
from sklearn.metrics import f1_score, recall_score, precision_score

# Move the model to evaluation mode
combined_model.eval()

test_loss, test_correct = 0, 0
all_labels = []
all_predictions = []
loss_fn = nn.CrossEntropyLoss()
# Disable gradient calculations for validation/testing
with torch.no_grad():
    for images, labels in test_dataloader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = combined_model(images)
        loss = loss_fn(outputs, labels)

        # Track accuracy and loss
        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        test_correct += (predicted == labels).sum().item()

        # Store all predictions and labels
        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate test loss and accuracy
test_loss /= len(test_dataloader.dataset)
test_acc = test_correct / len(test_dataloader.dataset)

# Calculate F1 score, recall, and precision
f1 = f1_score(all_labels, all_predictions, average='weighted')
recall = recall_score(all_labels, all_predictions, average='weighted')
precision = precision_score(all_labels, all_predictions, average='weighted')

# Output the results
print(f'Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc*100:.2f}%')
print(f'F1 Score: {f1:.4f} | Recall: {recall:.4f} | Precision: {precision:.4f}')


Test Loss: 0.0006 | Test Accuracy: 100.00%
F1 Score: 1.0000 | Recall: 1.0000 | Precision: 1.0000


In [20]:
from sklearn.metrics import f1_score, recall_score, precision_score

# Move the model to evaluation mode
combined_model.eval()

test_loss, test_correct = 0, 0
all_labels = []
all_predictions = []
loss_fn = nn.CrossEntropyLoss()
# Disable gradient calculations for validation/testing
with torch.no_grad():
    for images, labels in valid_dataloader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = combined_model(images)
        loss = loss_fn(outputs, labels)

        # Track accuracy and loss
        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        test_correct += (predicted == labels).sum().item()

        # Store all predictions and labels
        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate test loss and accuracy
test_loss /= len(valid_dataloader.dataset)
test_acc = test_correct / len(valid_dataloader.dataset)

# Calculate F1 score, recall, and precision
f1 = f1_score(all_labels, all_predictions, average='weighted')
recall = recall_score(all_labels, all_predictions, average='weighted')
precision = precision_score(all_labels, all_predictions, average='weighted')

# Output the results
print(f'Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc*100:.2f}%')
print(f'F1 Score: {f1:.4f} | Recall: {recall:.4f} | Precision: {precision:.4f}')


Test Loss: 0.0009 | Test Accuracy: 100.00%
F1 Score: 1.0000 | Recall: 1.0000 | Precision: 1.0000
