# Transfer learning practice

In [78]:
import os
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from PIL import Image
import shutil
import torchvision.ops as ops


In [79]:
data_dir = ''
train_dir = os.path.join(data_dir, 'train')
validation_dir = os.path.join(data_dir, 'test')


In [80]:
train_info = pd.read_csv('train.csv')

X = train_info[['filename', 'xmin', 'ymin', 'xmax', 'ymax']]
y = train_info['class_id']

data = pd.concat([X, y], axis=1)	

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

train_data = pd.concat([X_train, y_train], axis=1)
validation_data = pd.concat([X_val, y_val], axis=1)

In [81]:
root_dir = 'data'
train_dir = os.path.join(root_dir, 'train')
validation_dir = os.path.join(root_dir, 'validation')

In [82]:
def create_train_test_img_folder(train_df, test_df):
    splits = {'train': train_df, 'test': test_df}
    for split, df in splits.items():
        for i, row in df.iterrows():
            source_file = f'images/{row["filename"]}'
            destination_folder = f'data/{split}'
            os.makedirs(destination_folder, exist_ok=True)
            destination_file = os.path.join(destination_folder)
            shutil.copy(source_file, destination_file)


create_train_test_img_folder(train_data, validation_data)

In [83]:
class BoundingBoxDataset(Dataset):
    def __init__(self, data, root_dir, transform=None):
        self.data = data
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_name = os.path.join(self.root_dir, row['filename'])
        image = plt.imread(img_name)
        
        # Apply transformations if any
        if self.transform:
            image = self.transform(image)

        # Extract bounding box coordinates
        bbox = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']])
        
        label = torch.tensor(row['class_id'] - 1)  # Subtract 1 to make labels 0-based

        return image, label, bbox


In [84]:
train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop((255, 255), scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

validation_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((255, 255)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


# Load train and test data
train_dataset = BoundingBoxDataset(train_data, root_dir=train_dir, transform=train_transforms)
validation_dataset = BoundingBoxDataset(validation_data, root_dir=validation_dir, transform=validation_transforms)

# Data loaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)


## Device and model

In [85]:
# Loss functions
classification_criterion = nn.CrossEntropyLoss()
regression_criterion = nn.SmoothL1Loss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [86]:
class MultiTaskVGG16(nn.Module):
    def __init__(self, base_model):
        super(MultiTaskVGG16, self).__init__()
        
        # Base feature extractor
        self.features = base_model.features
        self.avgpool = base_model.avgpool
        self.flatten = nn.Flatten()
        
        # Shared fully connected layers
        self.shared_fc = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        
        # Classification head
        self.classifier = nn.Linear(4096, 2)  # For 2 classes: penguin and turtle
        
        # Bounding box regression head
        self.regressor = nn.Linear(4096, 4)  # For 4 coordinates: xmin, ymin, xmax, ymax

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.shared_fc(x)
        
        class_output = self.classifier(x)
        bbox_output = self.regressor(x)
        
        return class_output, bbox_output




In [87]:
class MultiTaskInceptionV3(nn.Module):
    def __init__(self):
        super(MultiTaskInceptionV3, self).__init__()
        
        # Load the Inception_v3 model
        base_model = models.inception_v3(pretrained=True, aux_logits=True)
        
        # Use the model's Conv2d layers up to the last pooling layer as the feature extractor
        self.features = nn.Sequential(
            base_model.Conv2d_1a_3x3,
            base_model.Conv2d_2a_3x3,
            base_model.Conv2d_2b_3x3,
            base_model.maxpool1,
            base_model.Conv2d_3b_1x1,
            base_model.Conv2d_4a_3x3,
            base_model.maxpool2,
            base_model.Mixed_5b,
            base_model.Mixed_5c,
            base_model.Mixed_5d,
            base_model.Mixed_6a,
            base_model.Mixed_6b,
            base_model.Mixed_6c,
            base_model.Mixed_6d,
            base_model.Mixed_6e,
            base_model.Mixed_7a,
            base_model.Mixed_7b,
            base_model.Mixed_7c,
        )
        
        # Average pooling and flatten
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        
        # Fully connected shared layers
        self.shared_fc = nn.Sequential(
            nn.Linear(2048, 4096),  # Inception outputs 2048 feature maps
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        
        # Classification head
        self.classifier = nn.Linear(4096, 2)  # For 2 classes: penguin and turtle
        
        # Bounding box regression head
        self.regressor = nn.Linear(4096, 4)  # For 4 coordinates: xmin, ymin, xmax, ymax

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.shared_fc(x)
        
        class_output = self.classifier(x)
        bbox_output = self.regressor(x)
        
        return class_output, bbox_output

In [88]:
def train(model, train_loader, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels, bboxes in tqdm(train_loader):
        images, labels, bboxes = images.to(device), labels.to(device), bboxes.to(device)
        
        optimizer.zero_grad()
        class_outputs, bbox_outputs = model(images)
        
        # Compute losses
        class_loss = classification_criterion(class_outputs, labels)
        bbox_loss = regression_criterion(bbox_outputs, bboxes.float())
        loss = class_loss + bbox_loss  # Combine losses
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    return epoch_loss



def calculate_iou(pred_boxes, true_boxes):
    return ops.box_iou(pred_boxes, true_boxes).diag().mean().item()

def evaluate(model, test_loader, device):
    model.eval()
    running_loss = 0.0
    all_preds, all_labels = [], []
    iou_scores = []  # Store IoU scores for bounding boxes
    
    with torch.no_grad():
        for images, labels, bboxes in tqdm(test_loader):
            images, labels, bboxes = images.to(device), labels.to(device), bboxes.to(device)
            
            # Forward pass
            class_outputs, bbox_outputs = model(images)
            
            # Compute losses
            class_loss = classification_criterion(class_outputs, labels)
            bbox_loss = regression_criterion(bbox_outputs, bboxes.float())
            loss = class_loss + bbox_loss
            running_loss += loss.item() * images.size(0)
            
            # Store predictions and labels for accuracy calculation
            preds = class_outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            # Calculate IoU and append to list
            iou = calculate_iou(bbox_outputs, bboxes)
            iou_scores.append(iou)
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)  # Classification accuracy
    average_iou = sum(iou_scores) / len(iou_scores)  # Average IoU across the dataset
    epoch_loss = running_loss / len(test_loader.dataset)
    
    return epoch_loss, accuracy, average_iou



In [89]:
base_model = models.vgg16(pretrained=True)

# Freeze the base model layers
for param in base_model.parameters():
    param.requires_grad = False

# Initialize model
model = MultiTaskVGG16(base_model).to(device)
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.0001)



In [None]:
import torch

def train_model(model, train_loader, validation_loader, optimizer, device, num_epochs=10):
    best_accuracy = 0.0
    best_IoU = 1.0
    
    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        
        train_loss = train(model, train_loader, optimizer, device)
        val_loss, val_accuracy, test_iou = evaluate(model, validation_loader, device)
        
        print(f"Train Loss: {train_loss:.4f}")
        print(f"Validation Loss: {val_loss:.4f}")
        print(f"Validation Accuracy: {val_accuracy:.4f}")
        print(f"Test Average IoU: {test_iou:.4f}")
        
        # Save the model if it has the best accuracy so far
        if val_accuracy > best_accuracy and test_iou < best_IoU:
            best_accuracy = val_accuracy
            best_IoU = test_iou
            # torch.save(model.state_dict(), "best_model_full.pth")
            # print("Saved Best Model!")

In [91]:
# train_model(model, train_loader, validation_loader, optimizer, device, num_epochs=10)

In [92]:
# Initialize the modified InceptionV3 model
model = MultiTaskInceptionV3().to(device)

# Freeze the base model layers
for param in model.features.parameters():
    param.requires_grad = False

# Optimizer (only fine-tuning the final layers)
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.0001)

# Train the model
train_model(model, train_loader, validation_loader, optimizer, device, num_epochs=10)




Epoch 1/10


100%|██████████| 24/24 [00:49<00:00,  2.04s/it]
100%|██████████| 6/6 [00:09<00:00,  1.63s/it]


Train Loss: 222.5013
Validation Loss: 183.5184
Validation Accuracy: 0.9684
Test Average IoU: 0.0000
Saved Best Model!
Epoch 2/10


100%|██████████| 24/24 [00:49<00:00,  2.06s/it]
100%|██████████| 6/6 [00:09<00:00,  1.65s/it]


Train Loss: 124.1427
Validation Loss: 111.5600
Validation Accuracy: 0.4947
Test Average IoU: 0.0221
Epoch 3/10


100%|██████████| 24/24 [00:50<00:00,  2.08s/it]
 50%|█████     | 3/6 [00:06<00:06,  2.23s/it]


KeyboardInterrupt: 

In [None]:
# model.load_state_dict(torch.load("best_model_full.pth"))

# # Evaluate on the test set
# validation_loss, validation_accuracy, val_iou = evaluate(model, validation_loader, device)

# print(f"Test Loss: {validation_loss:.4f}")
# print(f"Test Accuracy: {validation_accuracy:.4f}")
# print(f"Test Average IoU: {val_iou:.4f}")


  model.load_state_dict(torch.load("best_model_full.pth"))
100%|██████████| 6/6 [00:15<00:00,  2.51s/it]

Test Loss: 53.6148
Test Accuracy: 1.0000
Test Average IoU: 0.1496





In [None]:
# # Inference and saving to CSV
# def predict_and_save(model, validation_loader, device, output_file="submission.csv"):
#     model.eval()
#     predictions = []
#     seen_filenames = set()  # Track unique filenames to avoid duplicates
    
#     with torch.no_grad():
#         for images, labels, bboxes in validation_loader:  # Accepts images, labels, and bboxes
#             images = images.to(device)
#             class_outputs, bbox_outputs = model(images)
            
#             # Get predicted class (1-based indexing)
#             class_preds = class_outputs.argmax(dim=1).cpu().numpy() + 1  
#             # Get bounding box predictions
#             bbox_preds = bbox_outputs.cpu().numpy()
            
#             # Assuming filenames are stored as part of the dataset's DataFrame
#             for filename, class_id, bbox_pred, label, bbox_true in zip(
#                 validation_loader.dataset.data['filename'], class_preds, bbox_preds, labels.cpu().numpy(), bboxes.cpu().numpy()
#             ):
#                 # Only add unique filenames
#                 if filename not in seen_filenames:
#                     predictions.append([filename, class_id, *bbox_pred, label, *bbox_true])
#                     seen_filenames.add(filename)  # Mark this filename as seen
    
#     # Save predictions to CSV
#     submission_df = pd.DataFrame(predictions, columns=[
#         "filename", "pred_class_id", "pred_xmin", "pred_ymin", "pred_xmax", "pred_ymax",
#         "true_class_id", "true_xmin", "true_ymin", "true_xmax", "true_ymax"
#     ])
#     submission_df.to_csv(output_file, index=False)
    
#     print(f"Saved predictions to {output_file}")

# # Run the prediction and save function
# predict_and_save(model, train_loader, device, output_file="submission_full_test_2.csv")



# Test

In [None]:


# # Custom dataset for validation/test set without labels
# class TestDataset(Dataset):
#     def __init__(self, csv_file, img_dir, transform=None):
#         self.data = pd.read_csv(csv_file)
#         self.img_dir = img_dir
#         self.transform = transform

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         filename = self.data.iloc[idx]['filename']
#         img_path = os.path.join(self.img_dir, filename)
#         image = Image.open(img_path).convert("RGB")
        
#         if self.transform:
#             image = self.transform(image)
        
#         return image, filename  # Only return image and filename


In [None]:
# # Define transformations
# data_transforms = transforms.Compose([
#     transforms.Resize((255, 255)),
#     transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
# ])

# # Paths
# csv_file = 'test.csv'
# img_dir = 'images'

# # Create the dataset and loader
# test_dataset = TestDataset(csv_file, img_dir, transform=data_transforms)
# test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
# # Inference and saving to CSV for unlabeled validation/test set
# def predict_and_save_unlabeled(model, loader, device, output_file="test_predictions.csv"):
#     model.eval()
#     predictions = []
#     seen_filenames = set()  # Track unique filenames to avoid duplicates
    
#     with torch.no_grad():
#         for images, filenames in loader:  # Accepts images and filenames only
#             images = images.to(device)
#             class_outputs, bbox_outputs = model(images)
            
#             # Get predicted class (1-based indexing)
#             class_preds = class_outputs.argmax(dim=1).cpu().numpy() + 1  
#             # Get bounding box predictions
#             bbox_preds = bbox_outputs.cpu().numpy()
            
#             # Save predictions for each file
#             for filename, class_id, bbox_pred in zip(filenames, class_preds, bbox_preds):
#                 # Only add unique filenames
#                 if filename not in seen_filenames:
#                     predictions.append([filename, class_id, *bbox_pred])
#                     seen_filenames.add(filename)  # Mark this filename as seen
    
#     # Save predictions to CSV
#     submission_df = pd.DataFrame(predictions, columns=[
#         "filename", "pred_class_id", "pred_xmin", "pred_ymin", "pred_xmax", "pred_ymax"
#     ])
#     submission_df.to_csv(output_file, index=False)
    
#     print(f"Saved predictions to {output_file}")

# # Run the prediction and save function for validation/test data
# predict_and_save_unlabeled(model, test_loader, device, output_file="test_predictions_perfect.csv")


Saved predictions to test_predictions_perfect.csv
