# **1. Initializations and Dataset Download**

In [None]:
!pip install opendatasets --quiet
import opendatasets as od
od.download("https://www.kaggle.com/datasets/andrewmvd/animal-faces")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: mukeshchahar
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/andrewmvd/animal-faces
Downloading animal-faces.zip to ./animal-faces


100%|██████████| 696M/696M [00:07<00:00, 98.5MB/s]





# **2. Imports**

In [None]:
import torch # Main PyTorch Library
from torch import nn # Used for creating the layers and loss function
from torch.optim import Adam # Adam Optimizer
import torchvision.transforms as transforms # Transform function used to modify and preprocess all the images
import torchvision.models as models # Import models module from torchvision
from torch.utils.data import Dataset, DataLoader # Dataset class and DataLoader for creating the objects
from sklearn.preprocessing import LabelEncoder # Label Encoder to encode the classes from strings to numbers
import matplotlib.pyplot as plt # Used for visualizing the images and plotting the training progress
from PIL import Image # Used to read the images from the directory
import pandas as pd # Used to read/create dataframes (csv) and process tabular data
import numpy as np # preprocessing and numerical/mathematical operations
import os # Used to read the images path from the directory
from sklearn.metrics import classification_report, confusion_matrix

# import torchvision.models as models # This import is moved to Zcdk3pY4pncn
from torchsummary import summary

import seaborn as sns
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu" # detect the GPU if any, if not use CPU, change cuda to mps if you have a mac
print("Device available: ", device)

Device available:  cpu


# **3. Reading Data Paths**

In [None]:
image_path = [] # Empty array where we will fill the paths of the images
labels = [] # Empty array where we will fill the labes of the images

for i in os.listdir("/content/animal-faces/afhq/"): # Looping through the first directory which has the train and val images
  for label in os.listdir(f"/content/animal-faces/afhq/{i}"): # Looping through the first directory which has the labels folders
    for image in os.listdir(f"/content/animal-faces/afhq/{i}/{label}"): # Looping through all the images of this label
      labels.append(label) # Append/Put this label in the list
      image_path.append(f"/content/animal-faces/afhq/{i}/{label}/{image}") # Append/Put this image path in the list to read it later

data_df = pd.DataFrame(zip(image_path, labels), columns = ['image_paths', 'labels']) # Create a dataframe contains the images paths and the labels
data_df.head() # Visualizing the first 5 rows

Unnamed: 0,image_paths,labels
0,/content/animal-faces/afhq/train/cat/pixabay_c...,cat
1,/content/animal-faces/afhq/train/cat/pixabay_c...,cat
2,/content/animal-faces/afhq/train/cat/pixabay_c...,cat
3,/content/animal-faces/afhq/train/cat/flickr_ca...,cat
4,/content/animal-faces/afhq/train/cat/pixabay_c...,cat


# **4. Data Inspection**

In [None]:
print("Classes are: ")
print(data_df["labels"].unique())
print()
print("Classes ditrubution are: ")
print(data_df["labels"].value_counts())

Classes are: 
['cat' 'dog' 'wild']

Classes ditrubution are: 
labels
cat     5653
dog     5239
wild    5238
Name: count, dtype: int64


# **5. Data Split**

In [None]:
train=data_df.sample(frac=0.7,random_state=7) # Create training of 70% of the data
test=data_df.drop(train.index) # Create testing by removing the 70% of the train data which will result in 30%

val=test.sample(frac=0.5,random_state=7) # Create validation of 50% of the testing data
test=test.drop(val.index) # Create testing by removing the 50% of the validation data which will result in 50%

# **6. Preprocessing Objects**

In [None]:
label_encoder = LabelEncoder() # Encode the string classes to numeric
label_encoder.fit(data_df['labels']) # Fit the label encoder on our own data

transform = transforms.Compose([
    transforms.Resize((128, 128)), # One size for all images
    transforms.ToTensor(), # Convert images to PyTorch tensors
    transforms.ConvertImageDtype(torch.float) # The values are in floating point numbers
    ]) # Transform all images into one clear format (preprocess all images to same properties)

# **7. Custom Dataset Class**

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.labels = torch.tensor(label_encoder.transform(dataframe['labels'])).to(device)

    def __len__(self):
        return self.dataframe.shape[0]

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
          image = self.transform(image).to(device)

        return image, label

# **7. Create Dataset Objects**

In [None]:
train_dataset = CustomImageDataset(dataframe=train, transform=transform)
val_dataset = CustomImageDataset(dataframe=val, transform=transform)
test_dataset = CustomImageDataset(dataframe=test, transform=transform)

In [None]:
train_dataset.__len__()

In [None]:
train_dataset.__getitem__(1)

In [None]:
label_encoder.inverse_transform([0])

# **8. Visualize Images**

In [None]:
n_rows = 3
n_cols = 3
f, axarr = plt.subplots(n_rows, n_cols)
for row in range(n_rows):
    for col in range(n_cols):
      image = Image.open(data_df.sample(n = 1)['image_paths'].iloc[0]).convert("RGB")
      axarr[row, col].imshow(image)
      axarr[row, col].axis('off')

plt.show()

# **10. Hyperparameters**

In [None]:
LR = 1e-4
BATCH_SIZE = 16
EPOCHS = 10

# **10. DataLoaders**

In [None]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
# Load pretrained ResNet50 and modify for transfer learning
def create_resnet50_model(num_classes=3, freeze_layers=True):
    """
    Create ResNet50 model for transfer learning

    Args:
        num_classes: Number of output classes (3 for cat, dog, wild)
        freeze_layers: Whether to freeze early layers
    """
    # Load pretrained ResNet50
    model = models.resnet50(pretrained=True)

    # Freeze early layers for transfer learning
    if freeze_layers:
        for param in model.parameters():
            param.requires_grad = False

        # Unfreeze the last few layers for fine-tuning
        for param in model.layer4.parameters():
            param.requires_grad = True

    # Replace the final fully connected layer
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_features, 512),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, num_classes)
    )

    return model

In [None]:
# Create the model
model = create_resnet50_model(num_classes=3, freeze_layers=True)
model = model.to(device)

In [None]:
# Print model summary
print("="*50)
print("MODEL SUMMARY")
print("="*50)
try:
    summary(model, (3, 128, 128))
except:
    # Alternative way to show model info
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Non-trainable parameters: {total_params - trainable_params:,}")

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=1e-4)

# Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
# Training function
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    train_bar = tqdm(train_loader, desc='Training')
    for batch_idx, (data, target) in enumerate(train_bar):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

        train_bar.set_postfix({
            'Loss': f'{running_loss/(batch_idx+1):.4f}',
            'Acc': f'{100.*correct/total:.2f}%'
        })

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

In [None]:
# Validation function
def validate_epoch(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        val_bar = tqdm(val_loader, desc='Validation')
        for data, target in val_bar:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += criterion(output, target).item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

            val_bar.set_postfix({
                'Loss': f'{val_loss/len(val_loader):.4f}',
                'Acc': f'{100.*correct/total:.2f}%'
            })

    val_loss /= len(val_loader)
    val_acc = 100. * correct / total
    return val_loss, val_acc

In [None]:
# Training loop
print("\n" + "="*50)
print("STARTING TRAINING")
print("="*50)

In [None]:
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

best_val_acc = 0.0
best_model_state = None

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    print("-" * 30)

    # Train
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)

    # Validate
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)

    # Update scheduler
    scheduler.step()

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = model.state_dict().copy()

    # Store metrics
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
    print(f"Learning Rate: {scheduler.get_last_lr()[0]:.6f}")

In [None]:
# Test function
def test_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    all_predicted = []
    all_targets = []

    with torch.no_grad():
        test_bar = tqdm(test_loader, desc='Testing')
        for data, target in test_bar:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

            all_predicted.extend(predicted.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

            test_bar.set_postfix({'Acc': f'{100.*correct/total:.2f}%'})

    test_acc = 100. * correct / total
    return test_acc, all_predicted, all_targets

In [None]:
# Test the model
print("\n" + "="*50)
print("TESTING MODEL")
print("="*50)

test_acc, predictions, targets = test_model(model, test_loader, device)
print(f"Test Accuracy: {test_acc:.2f}%")

In [None]:
# Get class names
class_names = label_encoder.classes_

In [None]:
# Print classification report
print("\n" + "="*50)
print("CLASSIFICATION REPORT")
print("="*50)
print(classification_report(targets, predictions, target_names=class_names))

In [None]:
# Plotting Training Progress
plt.figure(figsize=(15, 5))

In [None]:
# Plot 1: Training and Validation Loss
plt.subplot(1, 3, 1)
plt.plot(range(1, EPOCHS+1), train_losses, 'b-', label='Training Loss')
plt.plot(range(1, EPOCHS+1), val_losses, 'r-', label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)

In [None]:
# Plot 2: Training and Validation Accuracy
plt.subplot(1, 3, 2)
plt.plot(range(1, EPOCHS+1), train_accuracies, 'b-', label='Training Accuracy')
plt.plot(range(1, EPOCHS+1), val_accuracies, 'r-', label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.grid(True)

In [None]:
# Plot 3: Confusion Matrix
plt.subplot(1, 3, 3)
cm = confusion_matrix(targets, predictions)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')

plt.tight_layout()
plt.show()

In [None]:
# Inference function for single images
def predict_single_image(model, image_path, transform, device, label_encoder):
    """
    Predict class for a single image

    Args:
        model: Trained model
        image_path: Path to the image
        transform: Image transformation
        device: Device to run inference on
        label_encoder: Label encoder for class names
    """
    model.eval()

    # Load and preprocess image
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        probabilities = torch.nn.functional.softmax(output, dim=1)
        _, predicted = torch.max(output, 1)

        predicted_class = label_encoder.inverse_transform([predicted.cpu().numpy()[0]])[0]
        confidence = probabilities[0][predicted].item() * 100

        return predicted_class, confidence, probabilities.cpu().numpy()[0]

In [None]:
# Take a random sample from test set for inference example
sample_image_path = test.iloc[0]['image_paths']
true_label = test.iloc[0]['labels']

predicted_class, confidence, all_probs = predict_single_image(
    model, sample_image_path, transform, device, label_encoder
)

print(f"Image: {sample_image_path}")
print(f"True Label: {true_label}")
print(f"Predicted: {predicted_class}")
print(f"Confidence: {confidence:.2f}%")
print(f"All class probabilities:")
for i, class_name in enumerate(class_names):
    print(f"  {class_name}: {all_probs[i]*100:.2f}%")


In [None]:
# Display the sample image with prediction
plt.figure(figsize=(8, 6))
img = Image.open(sample_image_path)
plt.imshow(img)
plt.axis('off')
plt.title(f'True: {true_label} | Predicted: {predicted_class} ({confidence:.1f}%)')
plt.show()