# Fine-tuning MobileNetV3 for Pet Emotion Classification

This notebook fine-tunes a pretrained MobileNetV3 model on two pet emotion datasets to classify emotions: **happy**, **sad**, and **angry**.

## Datasets:
1. Dog Emotion Dataset (4000 images)
2. Pet's Facial Expression Dataset (1000 images)

We'll combine both datasets and focus only on the three target emotions.

In [None]:
# Install required packages
!pip install timm torch torchvision pandas pillow scikit-learn matplotlib seaborn

In [None]:
# Import required libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import shutil
from pathlib import Path

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

import warnings

warnings.filterwarnings("ignore")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## Data Loading and Preparation

First, let's load both datasets and combine them for training.

In [None]:
# Dataset paths (adjust these based on your Kaggle input paths)
DOG_EMOTION_PATH = "/kaggle/input/dog-emotion/Dog Emotion"
PET_EXPRESSION_PATH = "/kaggle/input/pets-facial-expression-image-dataset"

# Create a combined dataset directory
COMBINED_DATA_PATH = "/kaggle/working/combined_pet_emotions"
os.makedirs(COMBINED_DATA_PATH, exist_ok=True)

# Create directories for our target emotions
emotions = ["happy", "sad", "angry"]
for emotion in emotions:
    os.makedirs(os.path.join(COMBINED_DATA_PATH, emotion), exist_ok=True)

print("Created directories for combined dataset")

In [None]:
# Function to copy images to combined dataset
def copy_images_to_combined_dataset():
    image_count = 0

    # Process Dog Emotion dataset
    print("Processing Dog Emotion dataset...")
    for emotion in emotions:
        source_dir = os.path.join(DOG_EMOTION_PATH, emotion)
        target_dir = os.path.join(COMBINED_DATA_PATH, emotion)

        if os.path.exists(source_dir):
            for img_file in os.listdir(source_dir):
                if img_file.lower().endswith((".jpg", ".jpeg", ".png")):
                    source_path = os.path.join(source_dir, img_file)
                    target_path = os.path.join(target_dir, f"dog_{img_file}")
                    shutil.copy2(source_path, target_path)
                    image_count += 1

    # Process Pet Expression dataset
    print("Processing Pet Expression dataset...")
    emotion_mapping = {"happy": "happy", "Sad": "sad", "Angry": "angry"}

    for source_emotion, target_emotion in emotion_mapping.items():
        source_dir = os.path.join(PET_EXPRESSION_PATH, source_emotion)
        target_dir = os.path.join(COMBINED_DATA_PATH, target_emotion)

        if os.path.exists(source_dir):
            for img_file in os.listdir(source_dir):
                if img_file.lower().endswith((".jpg", ".jpeg", ".png")):
                    source_path = os.path.join(source_dir, img_file)
                    target_path = os.path.join(target_dir, f"pet_{img_file}")
                    shutil.copy2(source_path, target_path)
                    image_count += 1

    return image_count


# Copy all images
total_images = copy_images_to_combined_dataset()
print(f"Total images copied: {total_images}")

# Check distribution
for emotion in emotions:
    emotion_dir = os.path.join(COMBINED_DATA_PATH, emotion)
    count = len(
        [
            f
            for f in os.listdir(emotion_dir)
            if f.lower().endswith((".jpg", ".jpeg", ".png"))
        ]
    )
    print(f"{emotion}: {count} images")

## Data Transforms and Dataset Setup

Define data augmentation and normalization transforms for training and validation.

In [None]:
# Define transforms
train_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

val_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

# Create datasets
full_dataset = ImageFolder(root=COMBINED_DATA_PATH, transform=train_transforms)
print(f"Total dataset size: {len(full_dataset)}")
print(f"Classes: {full_dataset.classes}")
print(f"Class to index mapping: {full_dataset.class_to_idx}")

# Split dataset (80% train, 20% validation)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Create validation dataset with different transforms
val_dataset.dataset.transform = val_transforms

print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")

In [None]:
# Create data loaders
batch_size = 32

train_loader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True
)

val_loader = DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True
)

print(f"Number of training batches: {len(train_loader)}")
print(f"Number of validation batches: {len(val_loader)}")

## Model Setup

Load the pretrained MobileNetV3 model and modify it for our 3-class classification task.

In [None]:
# Load pretrained MobileNetV3 model
model = timm.create_model("mobilenetv3_large_100", pretrained=True, num_classes=3)

# Move model to device
model = model.to(device)

# Print model summary
print("Model loaded successfully!")
print(f"Model device: {next(model.parameters()).device}")

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

## Training Setup

Define loss function, optimizer, and training configuration.

In [None]:
# Training configuration
num_epochs = 20
learning_rate = 0.001
weight_decay = 1e-4

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

print("Training setup complete!")
print(f"Number of epochs: {num_epochs}")
print(f"Learning rate: {learning_rate}")
print(f"Batch size: {batch_size}")

## Training Loop

Train the model with validation monitoring.

In [None]:
# Training function
def train_model(
    model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs
):
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    best_val_acc = 0.0
    best_model_state = None

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        print("-" * 60)

        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

            if (batch_idx + 1) % 50 == 0:
                print(
                    f"Batch [{batch_idx + 1}/{len(train_loader)}], Loss: {loss.item():.4f}"
                )

        train_acc = 100 * train_correct / train_total
        train_loss = train_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_acc = 100 * val_correct / val_total
        val_loss = val_loss / len(val_loader)

        # Store metrics
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict().copy()

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        print(f"Best Val Acc: {best_val_acc:.2f}%")

        scheduler.step()

    return {
        "train_losses": train_losses,
        "train_accuracies": train_accuracies,
        "val_losses": val_losses,
        "val_accuracies": val_accuracies,
        "best_model_state": best_model_state,
        "best_val_acc": best_val_acc,
    }


# Start training
print("Starting training...")
training_results = train_model(
    model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs
)
print(
    f"\nTraining completed! Best validation accuracy: {training_results['best_val_acc']:.2f}%"
)

## Training Results Visualization

Plot training and validation metrics.

In [None]:
# Plot training results
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Plot losses
ax1.plot(training_results["train_losses"], label="Train Loss", color="blue")
ax1.plot(training_results["val_losses"], label="Validation Loss", color="red")
ax1.set_title("Training and Validation Loss")
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Loss")
ax1.legend()
ax1.grid(True)

# Plot accuracies
ax2.plot(training_results["train_accuracies"], label="Train Accuracy", color="blue")
ax2.plot(training_results["val_accuracies"], label="Validation Accuracy", color="red")
ax2.set_title("Training and Validation Accuracy")
ax2.set_xlabel("Epoch")
ax2.set_ylabel("Accuracy (%)")
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

# Print final metrics
print(f"Final Train Accuracy: {training_results['train_accuracies'][-1]:.2f}%")
print(f"Final Validation Accuracy: {training_results['val_accuracies'][-1]:.2f}%")
print(f"Best Validation Accuracy: {training_results['best_val_acc']:.2f}%")

## Model Evaluation

Load the best model and evaluate on validation set with detailed metrics.

In [None]:
# Load best model
model.load_state_dict(training_results["best_model_state"])
model.eval()

# Evaluate on validation set
all_predictions = []
all_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert to numpy arrays
all_predictions = np.array(all_predictions)
all_labels = np.array(all_labels)

# Classification report
class_names = ["angry", "happy", "sad"]
print("Classification Report:")
print(classification_report(all_labels, all_predictions, target_names=class_names))

# Confusion matrix
cm = confusion_matrix(all_labels, all_predictions)
plt.figure(figsize=(8, 6))
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=class_names,
    yticklabels=class_names,
)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

## Save the Model

Save the trained model for future use.

In [None]:
# Create model directory
model_dir = "/kaggle/working/pet_emotion_model"
os.makedirs(model_dir, exist_ok=True)

# Save the complete model
model_path = os.path.join(model_dir, "mobilenetv3_pet_emotion_classifier.pth")
torch.save(
    {
        "model_state_dict": training_results["best_model_state"],
        "model_architecture": "mobilenetv3_large_100",
        "num_classes": 3,
        "class_names": class_names,
        "class_to_idx": full_dataset.class_to_idx,
        "best_val_acc": training_results["best_val_acc"],
        "train_transforms": str(train_transforms),
        "val_transforms": str(val_transforms),
    },
    model_path,
)

# Save model configuration
config_path = os.path.join(model_dir, "model_config.txt")
with open(config_path, "w") as f:
    f.write(f"Model: MobileNetV3 Large 100\n")
    f.write(f"Number of classes: 3\n")
    f.write(f"Class names: {class_names}\n")
    f.write(f"Class to index mapping: {full_dataset.class_to_idx}\n")
    f.write(f"Best validation accuracy: {training_results['best_val_acc']:.2f}%\n")
    f.write(f"Total training images: {len(train_dataset)}\n")
    f.write(f"Total validation images: {len(val_dataset)}\n")
    f.write(f"Training epochs: {num_epochs}\n")
    f.write(f"Batch size: {batch_size}\n")
    f.write(f"Learning rate: {learning_rate}\n")

print(f"Model saved successfully to: {model_path}")
print(f"Model configuration saved to: {config_path}")

# Show saved files
print("\nSaved files:")
for file in os.listdir(model_dir):
    file_path = os.path.join(model_dir, file)
    file_size = os.path.getsize(file_path) / (1024 * 1024)  # Size in MB
    print(f"  {file}: {file_size:.2f} MB")

## Model Loading Example

Example of how to load and use the saved model for inference.

In [None]:
# Example: Load the saved model for inference
def load_trained_model(model_path):
    """Load the trained model for inference"""
    checkpoint = torch.load(model_path, map_location=device)

    # Create model architecture
    model = timm.create_model("mobilenetv3_large_100", pretrained=False, num_classes=3)
    model.load_state_dict(checkpoint["model_state_dict"])
    model.to(device)
    model.eval()

    return model, checkpoint


# Load the model
loaded_model, checkpoint = load_trained_model(model_path)
print("Model loaded successfully!")
print(f"Best validation accuracy: {checkpoint['best_val_acc']:.2f}%")
print(f"Class names: {checkpoint['class_names']}")


# Example inference function
def predict_emotion(model, image_path, transform):
    """Predict emotion for a single image"""
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(image_tensor)
        probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
        predicted_class = torch.argmax(outputs, dim=1).item()

    return predicted_class, probabilities.cpu().numpy()


print("\nModel is ready for inference!")
print("Use the predict_emotion function to classify new pet images.")

## Summary

### Training Results:
- **Model**: MobileNetV3 Large 100
- **Classes**: angry, happy, sad
- **Best Validation Accuracy**: {:.2f}%
- **Total Images**: Combined from Dog Emotion and Pet Expression datasets
- **Training Strategy**: Transfer learning with data augmentation

### Files Saved:
- `mobilenetv3_pet_emotion_classifier.pth`: Complete model checkpoint
- `model_config.txt`: Model configuration and metadata

The model is now ready for deployment and can classify pet emotions into three categories: angry, happy, and sad.