# Batch1-Coding Solution

# Question 1: Image Preprocessing for Inference (PyTorch)
# Write a function to load an image and preprocess it for inference. 

In [2]:
pip install torchvision

Collecting torchvision
  Downloading torchvision-0.17.2-cp311-cp311-macosx_10_13_x86_64.whl.metadata (6.6 kB)
Collecting torch==2.2.2 (from torchvision)
  Downloading torch-2.2.2-cp311-none-macosx_10_9_x86_64.whl.metadata (25 kB)
Collecting filelock (from torch==2.2.2->torchvision)
  Using cached filelock-3.20.2-py3-none-any.whl.metadata (2.1 kB)
Collecting sympy (from torch==2.2.2->torchvision)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch==2.2.2->torchvision)
  Downloading networkx-3.6.1-py3-none-any.whl.metadata (6.8 kB)
Collecting fsspec (from torch==2.2.2->torchvision)
  Using cached fsspec-2025.12.0-py3-none-any.whl.metadata (10 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy->torch==2.2.2->torchvision)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torchvision-0.17.2-cp311-cp311-macosx_10_13_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m11.9 MB/s[0m 

In [3]:
from PIL import Image
from torchvision import transforms

def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
    ])
    return transform(image).unsqueeze(0)  # Add batch dimension


# Question 2: Predict on New Image with a Trained Model
# Perform prediction and get the class label.

In [6]:
import torch
import torchvision.models as models

# Load pretrained model
model = models.resnet18(pretrained=True)
model.eval()

input_image = preprocess_image("test.jpg")

with torch.no_grad():
    output = model(input_image)
    predicted_class = output.argmax(1).item()

print("Predicted Class:", predicted_class)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/harikrishnan/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

100.0%


FileNotFoundError: [Errno 2] No such file or directory: 'test.jpg'

# Question 3: Build a CNN to classify CIFAR-10 images (PyTorch)
# Create a CNN model that classifies images from the CIFAR-10 dataset with accuracy above 60%.

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# CNN model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(64*8*8, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Training (1 epoch shown for simplicity)
for epoch in range(1):
    for images, labels in trainloader:
        outputs = net(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100.0%


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


# Question 4: Identify Overfitting from Training Logs and Solve It
# Problem: You notice the training accuracy increases but validation accuracy stagnates. Modify the model using dropout and early stopping(use mnist dataset)

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train, y_test = to_categorical(y_train), to_categorical(y_test)

model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(patience=3, restore_best_weights=True)

model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=30, callbacks=[early_stop])


2026-01-06 11:05:51.716804: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  if not hasattr(np, "object"):
  super().__init__(**kwargs)


Epoch 1/30
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9118 - loss: 0.2922 - val_accuracy: 0.9645 - val_loss: 0.1138
Epoch 2/30
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9570 - loss: 0.1435 - val_accuracy: 0.9715 - val_loss: 0.0916
Epoch 3/30
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9648 - loss: 0.1168 - val_accuracy: 0.9758 - val_loss: 0.0825
Epoch 4/30
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9699 - loss: 0.0980 - val_accuracy: 0.9769 - val_loss: 0.0744
Epoch 5/30
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9737 - loss: 0.0850 - val_accuracy: 0.9764 - val_loss: 0.0752
Epoch 6/30
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9767 - loss: 0.0763 - val_accuracy: 0.9784 - val_loss: 0.0717
Epoch 7/30
[1m1

<keras.src.callbacks.history.History at 0x123511d50>

# Question 5: Transfer Learning with Pretrained VGG16 (Cats vs Dogs)
# Problem: Use VGG16 for binary classification with fine-tuning

In [8]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout

base_model = VGG16(include_top=False, input_shape=(224, 224, 3), weights='imagenet')
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step
