# Transfer learning practice

In [20]:
import os
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import shutil
import os
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from tqdm import tqdm


In [21]:
# Define directory paths
data_dir = 'AF_dataset'
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')


In [22]:
path = 'AF_dataset'
train_info = pd.read_csv(f'{path}/train.csv')

X = train_info[['filename', 'xmin', 'ymin', 'xmax', 'ymax']]
y = train_info['class_id']

data = pd.concat([X, y], axis=1)	

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

train_data = pd.concat([X_train, y_train], axis=1)
test_data = pd.concat([X_test, y_test], axis=1)

In [23]:
def create_train_test_img_folder(train_df, test_df):
    splits = {'train': train_df, 'test': test_df}
    for split, df in splits.items():
        for i, row in df.iterrows():
            source_file = f'{path}/images/{row["filename"]}'
            class_name = 'pinguin' if row['class_id'] == 1 else 'turtle'
            destination_folder = f'data/{split}'
            os.makedirs(destination_folder, exist_ok=True)
            destination_file = os.path.join(destination_folder)
            shutil.copy(source_file, destination_file)


create_train_test_img_folder(train_data, test_data)

In [24]:
root_dir = 'data'
train_dir = os.path.join(root_dir, 'train')
test_dir = os.path.join(root_dir, 'test')

In [25]:
class BoundingBoxDataset(Dataset):
    def __init__(self, data, root_dir, transform=None):
        self.data = data
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_name = os.path.join(self.root_dir, row['filename'])
        image = plt.imread(img_name)
        
        # Apply transformations if any
        if self.transform:
            image = self.transform(image)

        # Extract bounding box coordinates
        bbox = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']])
        
        # Extract class label
        label = torch.tensor(row['class_id'] - 1)  # Subtract 1 to make labels 0-based

        return image, label, bbox


In [26]:
# Data transformations
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((255, 255)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load train and test data
train_dataset = BoundingBoxDataset(train_data, root_dir=train_dir, transform=data_transforms)
test_dataset = BoundingBoxDataset(test_data, root_dir=test_dir, transform=data_transforms)

# Data loaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [27]:
# Load pretrained VGG-16 model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = models.vgg16(pretrained=True)

# Freeze the base model layers
for param in base_model.parameters():
    param.requires_grad = False

# Modify the classifier for dual outputs
class MultiTaskVGG16(nn.Module):
    def __init__(self):
        super(MultiTaskVGG16, self).__init__()
        
        # Base feature extractor
        self.features = base_model.features
        self.avgpool = base_model.avgpool
        self.flatten = nn.Flatten()
        
        # Shared fully connected layers
        self.shared_fc = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        
        # Classification head
        self.classifier = nn.Linear(4096, 2)  # For 2 classes: penguin and turtle
        
        # Bounding box regression head
        self.regressor = nn.Linear(4096, 4)  # For 4 coordinates: xmin, ymin, xmax, ymax

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.shared_fc(x)
        
        class_output = self.classifier(x)
        bbox_output = self.regressor(x)
        
        return class_output, bbox_output

# Initialize model
model = MultiTaskVGG16().to(device)




In [28]:
# Loss functions
classification_criterion = nn.CrossEntropyLoss()
regression_criterion = nn.MSELoss()

# Optimizer (only fine-tuning the final layers)
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)


In [29]:
# Training function
def train(model, train_loader, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels, bboxes in tqdm(train_loader):
        images, labels, bboxes = images.to(device), labels.to(device), bboxes.to(device)
        
        optimizer.zero_grad()
        class_outputs, bbox_outputs = model(images)
        
        # Compute losses
        class_loss = classification_criterion(class_outputs, labels)
        bbox_loss = regression_criterion(bbox_outputs, bboxes.float())
        loss = class_loss + bbox_loss  # Combine losses
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    return epoch_loss

# Evaluation function
def evaluate(model, test_loader, device):
    model.eval()
    running_loss = 0.0
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for images, labels, bboxes in tqdm(test_loader):
            images, labels, bboxes = images.to(device), labels.to(device), bboxes.to(device)
            
            class_outputs, bbox_outputs = model(images)
            
            # Compute losses
            class_loss = classification_criterion(class_outputs, labels)
            bbox_loss = regression_criterion(bbox_outputs, bboxes.float())
            loss = class_loss + bbox_loss
            
            running_loss += loss.item() * images.size(0)
            
            # Store predictions and labels for accuracy calculation
            preds = class_outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    epoch_loss = running_loss / len(test_loader.dataset)
    return epoch_loss, accuracy


In [None]:
# Training loop
num_epochs = 12
best_accuracy = 0.0
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    
    train_loss = train(model, train_loader, optimizer, device)
    val_loss, val_accuracy = evaluate(model, test_loader, device)
    
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Validation Loss: {val_loss:.4f}")
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    # Save the model if it has the best accuracy so far
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        torch.save(model.state_dict(), "best_model_full.pth")
        print("Saved Best Model!")



Epoch 1/12


100%|██████████| 24/24 [01:03<00:00,  2.67s/it]
100%|██████████| 6/6 [00:14<00:00,  2.38s/it]


Train Loss: 29775.4597
Validation Loss: 13989.8376
Validation Accuracy: 0.7684
Saved Best Model!
Epoch 2/12


100%|██████████| 24/24 [01:04<00:00,  2.69s/it]
100%|██████████| 6/6 [00:14<00:00,  2.37s/it]


Train Loss: 8899.8194
Validation Loss: 6521.0091
Validation Accuracy: 0.8737
Saved Best Model!
Epoch 3/12


100%|██████████| 24/24 [01:04<00:00,  2.70s/it]
100%|██████████| 6/6 [00:13<00:00,  2.33s/it]


Train Loss: 3498.8880
Validation Loss: 5411.6317
Validation Accuracy: 0.7789
Epoch 4/12


100%|██████████| 24/24 [01:04<00:00,  2.68s/it]
100%|██████████| 6/6 [00:14<00:00,  2.38s/it]


Train Loss: 2302.1399
Validation Loss: 4987.1106
Validation Accuracy: 0.8947
Saved Best Model!
Epoch 5/12


100%|██████████| 24/24 [01:04<00:00,  2.67s/it]
100%|██████████| 6/6 [00:14<00:00,  2.34s/it]


Train Loss: 2006.2944
Validation Loss: 4753.4367
Validation Accuracy: 0.8632
Epoch 6/12


100%|██████████| 24/24 [01:04<00:00,  2.69s/it]
100%|██████████| 6/6 [00:14<00:00,  2.35s/it]


Train Loss: 1909.3367
Validation Loss: 4863.8355
Validation Accuracy: 0.8211
Epoch 7/12


100%|██████████| 24/24 [01:04<00:00,  2.69s/it]
100%|██████████| 6/6 [00:14<00:00,  2.35s/it]


Train Loss: 1775.9570
Validation Loss: 4556.9384
Validation Accuracy: 0.8421
Epoch 8/12


100%|██████████| 24/24 [01:04<00:00,  2.69s/it]
100%|██████████| 6/6 [00:14<00:00,  2.41s/it]


Train Loss: 1899.5427
Validation Loss: 4235.0560
Validation Accuracy: 0.9789
Saved Best Model!
Epoch 9/12


100%|██████████| 24/24 [01:05<00:00,  2.71s/it]
100%|██████████| 6/6 [00:14<00:00,  2.34s/it]


Train Loss: 2057.3741
Validation Loss: 4674.9051
Validation Accuracy: 0.8737
Epoch 10/12


100%|██████████| 24/24 [01:04<00:00,  2.69s/it]
100%|██████████| 6/6 [00:14<00:00,  2.36s/it]


Train Loss: 2084.9178
Validation Loss: 5241.3605
Validation Accuracy: 0.9684
Epoch 11/12


100%|██████████| 24/24 [01:04<00:00,  2.70s/it]
100%|██████████| 6/6 [00:14<00:00,  2.34s/it]


Train Loss: 1653.3189
Validation Loss: 4915.6958
Validation Accuracy: 0.9158
Epoch 12/12


100%|██████████| 24/24 [01:04<00:00,  2.71s/it]
100%|██████████| 6/6 [00:14<00:00,  2.35s/it]

Train Loss: 1660.5214
Validation Loss: 4389.2209
Validation Accuracy: 0.9579





In [31]:
model.load_state_dict(torch.load("best_model_full.pth"))

# Evaluate on the test set
test_loss, test_accuracy = evaluate(model, test_loader, device)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


  model.load_state_dict(torch.load("best_model_full.pth"))
100%|██████████| 6/6 [00:14<00:00,  2.48s/it]

Test Loss: 4235.0560
Test Accuracy: 0.9789





In [None]:
# Inference and saving to CSV
def predict_and_save(model, test_loader, device, output_file="submission.csv"):
    model.eval()
    predictions = []
    seen_filenames = set()  # Track unique filenames to avoid duplicates
    
    with torch.no_grad():
        for images, labels, bboxes in test_loader:  # Accepts images, labels, and bboxes
            images = images.to(device)
            class_outputs, bbox_outputs = model(images)
            
            # Get predicted class (1-based indexing)
            class_preds = class_outputs.argmax(dim=1).cpu().numpy() + 1  
            # Get bounding box predictions
            bbox_preds = bbox_outputs.cpu().numpy()
            
            # Assuming filenames are stored as part of the dataset's DataFrame
            for filename, class_id, bbox_pred, label, bbox_true in zip(
                test_loader.dataset.data['filename'], class_preds, bbox_preds, labels.cpu().numpy(), bboxes.cpu().numpy()
            ):
                # Only add unique filenames
                if filename not in seen_filenames:
                    predictions.append([filename, class_id, *bbox_pred, label, *bbox_true])
                    seen_filenames.add(filename)  # Mark this filename as seen
    
    # Save predictions to CSV
    submission_df = pd.DataFrame(predictions, columns=[
        "filename", "pred_class_id", "pred_xmin", "pred_ymin", "pred_xmax", "pred_ymax",
        "true_class_id", "true_xmin", "true_ymin", "true_xmax", "true_ymax"
    ])
    submission_df.to_csv(output_file, index=False)
    
    print(f"Saved predictions to {output_file}")

# Run the prediction and save function
predict_and_save(model, test_loader, device, output_file="submission_full_test.csv")



Saved predictions to submission_full_test.csv


In [None]:
for images in test_loader:
    print(images)

[tensor([[[[2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          ...,
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489]],

         [[2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          ...,
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286]],

         [[2.6400, 2.6400, 2.6400,  ..., 2.6400, 2.6400, 2.6400],
          [2.6400, 2.6400, 2.6400,  ..., 2.6400, 2.6400, 2.6400],
          [2.6400, 2.6400, 2.6400,  ..., 