In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models

In [7]:
train_data_path = "training_data"
val_data_path = "validation_data"

In [8]:
# Define transforms for data augmentation and normalization
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [9]:
# Create datasets
train_dataset = datasets.ImageFolder(train_data_path, transform=train_transform)
val_dataset = datasets.ImageFolder(val_data_path, transform=val_transform)


In [10]:
# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)


In [12]:
# Initialize the model
model = models.resnet34(weights=False)
num_classes = 3
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)


In [13]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


In [14]:

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation loop
    model.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()

        accuracy = total_correct / total_samples
        print(f'Epoch {epoch + 1}/{num_epochs}, Validation Accuracy: {accuracy:.4f}')


Epoch 1/5, Validation Accuracy: 0.4646
Epoch 2/5, Validation Accuracy: 0.6212
Epoch 3/5, Validation Accuracy: 0.7273
Epoch 4/5, Validation Accuracy: 0.7626
Epoch 5/5, Validation Accuracy: 0.7172


In [15]:

# Save the trained model
torch.save(model.state_dict(), 'your_trained_model.pth')

In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models

# Paths to your training and validation datasets
train_data_path = "training_data"
val_data_path = "validation_data"

# Define transforms for data augmentation and normalization
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = datasets.ImageFolder(train_data_path, transform=train_transform)
val_dataset = datasets.ImageFolder(val_data_path, transform=val_transform)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# Initialize the model with pretrained weights
model = models.resnet34(pretrained=True)

# Freeze all layers in the network
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the last few layers for fine-tuning
for param in model.layer4.parameters():
    param.requires_grad = True

# Replace the final fully connected layer with a new one (unfrozen by default)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 3)  # Assuming 3 classes

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD([
    {'params': model.layer4.parameters(), 'lr': 0.001},
    {'params': model.fc.parameters(), 'lr': 0.01}
], momentum=0.9)

# Optional: Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Print loss every epoch
    print(f'Epoch {epoch + 1}/{num_epochs}, Training Loss: {running_loss/len(train_loader):.4f}')

    # Decay Learning Rate
    scheduler.step()

    # Validation loop
    model.eval()
    total_correct = 0
    total_samples = 0
    for inputs, labels in val_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()

    accuracy = total_correct / total_samples
    print(f'Epoch {epoch + 1}/{num_epochs}, Validation Accuracy: {accuracy:.4f}')

# Save the trained model
torch.save(model.state_dict(), 'your_trained_model.pth')


Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to C:\Users\Admin/.cache\torch\hub\checkpoints\resnet34-b627a593.pth
100%|██████████████████████████████████████████████████████████████████████████████| 83.3M/83.3M [04:55<00:00, 296kB/s]


Epoch 1/10, Training Loss: 0.5678
Epoch 1/10, Validation Accuracy: 0.9141
Epoch 2/10, Training Loss: 0.3870
Epoch 2/10, Validation Accuracy: 0.8434
Epoch 3/10, Training Loss: 0.3928
Epoch 3/10, Validation Accuracy: 0.9242
Epoch 4/10, Training Loss: 0.3673
Epoch 4/10, Validation Accuracy: 0.9444
Epoch 5/10, Training Loss: 0.4350
Epoch 5/10, Validation Accuracy: 0.9192
Epoch 6/10, Training Loss: 0.3529
Epoch 6/10, Validation Accuracy: 0.8990
Epoch 7/10, Training Loss: 0.3168
Epoch 7/10, Validation Accuracy: 0.9242
Epoch 8/10, Training Loss: 0.1968
Epoch 8/10, Validation Accuracy: 0.9293
Epoch 9/10, Training Loss: 0.2067
Epoch 9/10, Validation Accuracy: 0.9394
Epoch 10/10, Training Loss: 0.1815
Epoch 10/10, Validation Accuracy: 0.9343


## Grad-CAM to visualize the activations of your model on a given image:

In [51]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import cv2
import numpy as np

# ... [rest of your code] ...
# Load the trained model
model = models.resnet34(weights=None)  # Replace pretrained=False with weights=None
num_classes = 3
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)

# Load the trained weights
model.load_state_dict(torch.load('your_trained_model.pth', map_location=torch.device('cpu')))
model.eval()


class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.feature = None
        self.gradient = None
        self.register_hooks()

    def hook_fn_forward(self, module, input, output):
        self.feature = output

    def hook_fn_backward(self, module, grad_in, grad_out):
        self.gradient = grad_out[0]

    def register_hooks(self):
        # Use register_full_backward_hook
        self.target_layer.register_forward_hook(self.hook_fn_forward)
        self.target_layer.register_full_backward_hook(self.hook_fn_backward)

    def __call__(self, input_image):
        # Forward pass
        self.model.zero_grad()
        output = self.model(input_image)

        # Backward pass to get the gradient
        target = torch.argmax(output)
        output[:, target].backward()

        # Get the gradient of the target layer
        gradient = self.gradient

        # Global average pooling on gradients
        weights = torch.mean(gradient, dim=(1, 2), keepdim=True)

        # Weighted sum of feature maps
        cam = torch.sum(weights * self.feature, dim=1)[0]  # Sum over channels and select first sample

        # Convert to numpy and apply ReLU
        cam = cam.cpu().data.numpy()
        cam = np.maximum(cam, 0)

        # Normalize the CAM
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)

        return cam

def get_grad_cam_image(model, image_path, target_layer, save_path):
    # Load and preprocess the image
    image = Image.open(image_path)
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_image = preprocess(image).unsqueeze(0)

    # Create Grad-CAM instance and generate CAM
    grad_cam = GradCAM(model=model, target_layer=target_layer)
    cam = grad_cam(input_image)

    # Normalize the CAM and convert to 8-bit format
    cam = cam - np.min(cam)
    cam = cam / np.max(cam)
    cam = np.uint8(255 * cam)

    # Resize the CAM and convert it to a heatmap
    original_image = np.array(image.resize((224, 224)))
    cam_resized = cv2.resize(cam, (original_image.shape[1], original_image.shape[0]))
    heatmap = cv2.applyColorMap(cam_resized, cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)

    # Overlay the heatmap on the original image
    result = heatmap * 0.4 + original_image * 0.6
    result = result.astype('uint8')

    # Convert to PIL image and save
    result_pil = Image.fromarray(result)
    result_pil.save(save_path)

    return result_pil

# Specify the paths
image_path = 'vadai_image033.png'
save_path = 'grad_cam_result3.png'

# Enable gradient tracking
torch.set_grad_enabled(True)

# Choose a target layer (you can experiment with different layers)
target_layer = model.layer4[-1].conv2

# Get the Grad-CAM result and save the image
result_image = get_grad_cam_image(model, image_path, target_layer, save_path)

# Show the image
result_image.show()    



In [46]:
import torch
from torchvision import models, transforms
from PIL import Image

# Define the path to your image and model
image_path = "image004.png"  # Change to the path of your new image
model_path = 'your_trained_model.pth'  # Change to the path of your saved model

# Define the same transforms that you used for your validation dataset
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Define a function for image prediction
def predict_image(image_path, model_path):
    # Load the image
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0)  # Add batch dimension

    # Load the trained model
    model = models.resnet34(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 3)  # Assuming 3 classes
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()

    # Predict the class
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
    
    # You can add a class index-to-name mapping based on your dataset
    class_names = ['dosa', 'idly', 'vada']  # Replace with your actual class names
    predicted_class = class_names[predicted[0]]

    return predicted_class

# Call the function and print the prediction
predicted_class = predict_image(image_path, model_path)
print(f"The model predicts this image as: {predicted_class}")


The model predicts this image as: idly
