In [None]:
import torch
print(torch.cuda.device_count())  # Should print 2
print(torch.cuda.get_device_name(0))  
print(torch.cuda.get_device_name(1))

# cuda is a technology from NVIDIA that allows you to use your graphics card (GPU

2
Tesla T4
Tesla T4


In [None]:
import kagglehub     # dataset ko download krne mein help krta hai 
import torch         # used to tain and build a AI model
import torch.nn as nn    #to build structure of AI model like input layer,output layer, hidden layer
import torch.optim as optim      #
import torchvision.transforms as transforms    #helps you preprocess your images 
import torchvision.models as models            #gives you access to pre-trained AI models for image recognition, like ResNet
from torchvision.datasets import ImageFolder   #This helps you load images from folders 
from torch.utils.data import DataLoader        #organize your dataset into smaller batches so you can feed it to your model in chunks.
from einops import rearrange       # smart reshapeing tools
import os         #helps to acces files from system

In [3]:
# Download latest dataset version
path = kagglehub.dataset_download("vipoooool/new-plant-diseases-dataset")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/new-plant-diseases-dataset


In [4]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [5]:
# Dataset paths
dataset_path = "/kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)"
print("Contents of dataset:", os.listdir(dataset_path))

Contents of dataset: ['valid', 'train']


In [None]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),    # convert all the image in same size
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])    # adjust color values of the image 
])


In [None]:

# Load datasets
train_dataset = ImageFolder(root=f"{dataset_path}/train", transform=transform)
val_dataset = ImageFolder(root=f"{dataset_path}/valid", transform=transform)
# dataset ko load krta hai file se aur usee clean krta hai useing transform. 

In [None]:

# Data loaders (optimized)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=4, pin_memory=True)

# dataset ko load krta hai aur fir batch size fix8 mtlb per turn pe model 8 photos ko pic krege for taring
# dataset jldi load hoo jaye uske liye 4 CPU threads  use krr rhe hai 
#pin memory train ko speed krr rha hai 

In [None]:

num_classes = len(train_dataset.classes)
print("Number of classes:", num_classes)

#to detect the number of classes in training dataset we can use len function 

Number of classes: 38


In [None]:
# CNN Feature Extractor (ResNet50)
class CNNFeatureExtractor(nn.Module):
    def __init__(self):
        super(CNNFeatureExtractor, self).__init__()
        base_model = models.resnet50(pretrained=True)
        self.feature_extractor = nn.Sequential(*list(base_model.children())[:-2])
    
    def forward(self, x):
        return self.feature_extractor(x)  # Output: (batch_size, 2048, 7, 7)
    
    # ResNet50 pretrained model use krr rhe hai for feature extraction.
    # defining class for feature extraction 
    # _int_ constructor use krr rhe hai 
    # super function model ko proper neural network ki tarah behave krne mein help krr rha hai 
    # yaha sirf hamm CNN ka feature extration part use krr rhe esliye -2 kiye hai 

In [None]:
# Transformer Encoder Block
class TransformerEncoderBlock(nn.Module):
    def __init__(self, dim, heads=8, ff_dim=2048, dropout=0.1):
        super(TransformerEncoderBlock, self).__init__()
        self.self_attn = nn.MultiheadAttention(embed_dim=dim, num_heads=heads, dropout=dropout)
        self.norm1 = nn.LayerNorm(dim)
        self.ffn = nn.Sequential(
            nn.Linear(dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, dim)
        )
        self.norm2 = nn.LayerNorm(dim)    #•	Socho: Yeh input vector ko le raha hai aur usse ek badi feature space mein map kar raha hai — 
                                            #      jahan pe model zyada complex patterns samajh sakta hai.

    def forward(self, x):
        attn_out, _ = self.self_attn(x, x, x)
        x = self.norm1(x + attn_out)
        ffn_out = self.ffn(x)   
        return self.norm2(x + ffn_out)         #Maan lo tumhare paas ek photo hai. Tum us photo ka 
                                            #zoom-in version dekhte ho (expand karte ho), Fir 
                                            #important features mark karte ho (ReLU), 
                                            # Aur fir zoom-out karke sirf important cheezein lekar aage bhejte ho (compress back).
    

    #	What is a Transformer Encoder Block?
#Looks at all parts of the input at once
#Figures out which parts are important and how they relate to each other
	#Processes those relationships to better understand the image or data

    # dim= size of each input vector
    # heads = model kitne different types se data ko rad krr rhe hai 
    # ff_dim= size of inner feedforward layer
    #	self.norm1 = nn.LayerNorm(dim)
#This normalizes the data (makes it consistent) after attention. Helps the model stay stable and learn better.





In [12]:
# CNN + Transformer Hybrid Model
class CNNTransformerModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNTransformerModel, self).__init__()
        self.cnn = CNNFeatureExtractor()
        self.patch_dim = 2048
        self.seq_length = 7 * 7
        self.transformer = TransformerEncoderBlock(dim=self.patch_dim, heads=8, ff_dim=2048)
        self.fc = nn.Linear(self.patch_dim, num_classes)

    def forward(self, x):
        cnn_features = self.cnn(x)
        patches = rearrange(cnn_features, "b c h w -> (h w) b c")
        transformer_out = self.transformer(patches)
        output = self.fc(transformer_out.mean(dim=0))
        return output

In [15]:
# Initialize model with multi-GPU support
model = CNNTransformerModel(num_classes)
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
model.to(device)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 187MB/s] 


DataParallel(
  (module): CNNTransformerModel(
    (cnn): CNNFeatureExtractor(
      (feature_extractor): Sequential(
        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (4): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, af

In [16]:
# Loss function & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [17]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=7):
    scaler = torch.amp.GradScaler("cuda")  # Updated AMP syntax
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for images, labels in train_loader:
            images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            optimizer.zero_grad()
            
            with torch.amp.autocast("cuda"):  # Updated AMP syntax
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            total_loss += loss.item()
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")

        # Validation
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
                with torch.amp.autocast("cuda",enabled=False):  # Added AMP for validation
                    outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f"Validation Accuracy: {100 * correct / total:.2f}%")
        

In [18]:
# Train model
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=6)

Epoch [1/6], Loss: 0.2874
Validation Accuracy: 96.63%
Epoch [2/6], Loss: 0.1225
Validation Accuracy: 98.00%
Epoch [3/6], Loss: 0.0886
Validation Accuracy: 97.18%
Epoch [4/6], Loss: 0.0713
Validation Accuracy: 98.32%
Epoch [5/6], Loss: 0.0595
Validation Accuracy: 98.41%
Epoch [6/6], Loss: 0.0515
Validation Accuracy: 98.08%


In [19]:
torch.save(model.state_dict(), "cnn_transformer_model.pth")


In [20]:
torch.save(model, "cnn_transformer_full_model.pth")


In [23]:
!zip -r model.zip cnn_transformer_full_model.pth

  adding: cnn_transformer_full_model.pth (deflated 7%)


In [24]:
from IPython.display import FileLink
FileLink(r'model.zip')


In [25]:
torch.save(model.state_dict(), "cnn_transformer_model.pth")

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from einops import rearrange
import os

# Number of classes (as per your dataset)
num_classes = 38

# CNN Feature Extractor (ResNet50)
class CNNFeatureExtractor(nn.Module):
    def __init__(self):
        super(CNNFeatureExtractor, self).__init__()
        base_model = models.resnet50(pretrained=True)
        self.feature_extractor = nn.Sequential(*list(base_model.children())[:-2])
    
    def forward(self, x):
        return self.feature_extractor(x)  # Output: (batch_size, 2048, 7, 7)

# Transformer Encoder Block
class TransformerEncoderBlock(nn.Module):
    def __init__(self, dim, heads=8, ff_dim=2048, dropout=0.1):
        super(TransformerEncoderBlock, self).__init__()
        self.self_attn = nn.MultiheadAttention(embed_dim=dim, num_heads=heads, dropout=dropout)
        self.norm1 = nn.LayerNorm(dim)
        self.ffn = nn.Sequential(
            nn.Linear(dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, dim)
        )
        self.norm2 = nn.LayerNorm(dim)

    def forward(self, x):
        attn_out, _ = self.self_attn(x, x, x)
        x = self.norm1(x + attn_out)
        ffn_out = self.ffn(x)
        return self.norm2(x + ffn_out)

# CNN + Transformer Hybrid Model
class CNNTransformerModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNTransformerModel, self).__init__()
        self.cnn = CNNFeatureExtractor()
        self.patch_dim = 2048
        self.seq_length = 7 * 7
        self.transformer = TransformerEncoderBlock(dim=self.patch_dim, heads=8, ff_dim=2048)
        self.fc = nn.Linear(self.patch_dim, num_classes)

    def forward(self, x):
        cnn_features = self.cnn(x)
        patches = rearrange(cnn_features, "b c h w -> (h w) b c")
        transformer_out = self.transformer(patches)
        output = self.fc(transformer_out.mean(dim=0))
        return output

# Device setting
device = torch.device("cpu")  # Your laptop does not have CUDA support, so using CPU
print("Using device:", device)

# Step 1: Initialize model
model = CNNTransformerModel(num_classes)

# Step 2: Load model's state_dict (if available) with strict=False in case of missing keys
try:
    model.load_state_dict(torch.load("cnn_transformer_model.pth", map_location=device), strict=False)
    print("Model loaded successfully")
except RuntimeError as e:
    print("Error loading model:", e)

# Step 3: Set the model to evaluation mode
model.to(device)
model.eval()

# Optional: You can now run evaluations using the model
# Example code to make predictions:
# outputs = model(inputs)  # inputs should be a batch of data in tensor format


ModuleNotFoundError: No module named 'einops'