# 1 Data Download

In [1]:
# Download Dataset
import requests
import zipfile
from pathlib import Path
import gdown

# Setup paths and folders names and urls
data_path = Path("data/")
image_path = data_path / "rotated_cropped/"

dataset_url = 'https://drive.google.com/uc?id=1M_UZ1tfAyssGO5_L1K2nMa-YFS4c-xMT'
train_url = 'https://drive.google.com/uc?id=1bI6yuNlP6-NqMup_fWEouIonRfcQq9u5'


# If the image folder doesn't exist, download it
if image_path.is_dir():
    print(f"{image_path} directory exists.")
else:
    print(f"Did not find {image_path} directory, creating one...")
    data_path.mkdir(parents=True, exist_ok=True)

    print("Downloading train.csv...")
    gdown.download(train_url, str(data_path / "train.csv"), quiet=False)

    print("Downloading Dataset...")
    zip_path = str(data_path / "rotated_cropped.zip")
    gdown.download(dataset_url, zip_path, quiet=False)

    # Unzip data
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        print("Unzipping dataset data...")
        zip_ref.extractall(image_path)

Did not find data/rotated_cropped directory, creating one...
Downloading train.csv...


Downloading...
From: https://drive.google.com/uc?id=1bI6yuNlP6-NqMup_fWEouIonRfcQq9u5
To: /content/data/train.csv
100%|██████████| 104k/104k [00:00<00:00, 96.7MB/s]


Downloading Dataset...


Downloading...
From: https://drive.google.com/uc?id=1M_UZ1tfAyssGO5_L1K2nMa-YFS4c-xMT
To: /content/data/rotated_cropped.zip
100%|██████████| 288M/288M [00:02<00:00, 101MB/s]


Unzipping dataset data...


## 2 Data Import

In [2]:
import pandas as pd
import numpy as np
import io
import os
from PIL import Image

from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms

In [3]:
# Path to the image directory and CSV file
train_dir = "./data/rotated_cropped"
csv_file = "./data/train.csv"

# Reading the CSV file using Pandas
data = pd.read_csv(csv_file)
data.head()

Unnamed: 0,Image,whaleID
0,w_7812.jpg,whale_48813
1,w_4598.jpg,whale_09913
2,w_3828.jpg,whale_45062
3,w_8734.jpg,whale_74162
4,w_3251.jpg,whale_99558


In [4]:
# Build a whale id lookup dict with the respective probability tensors
n = len(data['whaleID'].unique())
whale_id_dict = {}

for idx, whale_id in enumerate(sorted(data['whaleID'].unique())):
    tensor = torch.zeros(n, dtype=torch.float32)
    tensor[idx] = 1
    whale_id_dict[whale_id] = tensor

def load_image(img_name, folder):
    img_path = os.path.join(folder, img_name)
    if not os.path.exists(img_path):
        return None
    return Image.open(img_path).convert('RGB')

# Sparse Tensor with only one 1 set
whale_id_dict['whale_06967'].shape

# Clean train data to ensure that the corresponding
data_clean = data[data['Image'].isin(os.listdir(train_dir))]
data_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4206 entries, 0 to 4543
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Image    4206 non-null   object
 1   whaleID  4206 non-null   object
dtypes: object(2)
memory usage: 98.6+ KB


## 3 Dataset and Initialisation

In [5]:
# Define the Dataset class
class WhaleDataset:
    def __init__(self, dataframe, img_folder, whale_tensor_dict, transform=None):
        self.dataframe = dataframe
        self.img_folder = img_folder
        self.whale_tensor_dict = whale_tensor_dict
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image_name = row['Image']
        whale_id = row['whaleID']

        try:
            img_path = os.path.join(self.img_folder, image_name)
            img = Image.open(img_path).convert('RGB')

            if self.transform:
                img = self.transform(img)

            label = self.whale_tensor_dict[whale_id]
            return img, label
        except Exception as e:
            print(e)
            print(whale_id, image_name)


class VGG19(nn.Module):
    def __init__(self, num_classes=1000):
        super(VGG19, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))

        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        logits = self.classifier(x)
        probas = nn.functional.softmax(logits, dim=1)
        return logits, probas

In [6]:
num_classes = 447
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 100
batch_size = 64
learning_rate = 0.01

model = VGG19(num_classes).to(device)

# Define transformations
transform = transforms.Compose([
        transforms.Resize((227,227)),
        transforms.ToTensor()
])

# Create dataset and dataloaders
train_dataset = WhaleDataset(data_clean, train_dir, whale_id_dict, transform)
test_dataset = WhaleDataset(data_clean.sample(n=1000, random_state=1), train_dir, whale_id_dict, transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)

In [8]:
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        logits, probas = model(images)
        loss = criterion(logits, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/100], Step [66/66], Loss: 6.0070
Epoch [2/100], Step [66/66], Loss: 5.8604
Epoch [3/100], Step [66/66], Loss: 5.7862
Epoch [4/100], Step [66/66], Loss: 5.7407
Epoch [5/100], Step [66/66], Loss: 5.7109
Epoch [6/100], Step [66/66], Loss: 5.6909
Epoch [7/100], Step [66/66], Loss: 5.6776
Epoch [8/100], Step [66/66], Loss: 5.6676
Epoch [9/100], Step [66/66], Loss: 5.6607
Epoch [10/100], Step [66/66], Loss: 5.6557
Epoch [11/100], Step [66/66], Loss: 5.6522
Epoch [12/100], Step [66/66], Loss: 5.6497
Epoch [13/100], Step [66/66], Loss: 5.6478
Epoch [14/100], Step [66/66], Loss: 5.6465
Epoch [15/100], Step [66/66], Loss: 5.6456
Epoch [16/100], Step [66/66], Loss: 5.6449
Epoch [17/100], Step [66/66], Loss: 5.6444
Epoch [18/100], Step [66/66], Loss: 5.6441
Epoch [19/100], Step [66/66], Loss: 5.6438
Epoch [20/100], Step [66/66], Loss: 5.6435
Epoch [21/100], Step [66/66], Loss: 5.6433
Epoch [22/100], Step [66/66], Loss: 5.6432
Epoch [23/100], Step [66/66], Loss: 5.6431
Epoch [24/100], Step

In [9]:
def check_correct(probas, labels):
    # Create a new tensor with zeros
    one_hot_max = torch.zeros_like(probas)
    # Find the index of the maximum value along dimension 1
    max_index = torch.argmax(probas, dim=1)
    # Set the maximum value to 1
    one_hot_vector = one_hot_max.scatter_(1, max_index.unsqueeze(1), 1)
    return (torch.all(one_hot_vector == labels, dim=1)).sum().item()

In [11]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        logits, probas = model(images)
        total += labels.size(0)
        correct += check_correct(probas, labels)
        del images, labels, probas

    print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))

Accuracy of the network on the 1000 test images: 1.3 %


In [12]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
!ls "/content/drive/MyDrive/AI Project"

3_TrainDetection.ipynb  data  vgg.pt


In [14]:
model_save_name = 'vgg19_2811.pt'
path = f"/content/drive/MyDrive/AI Project/{model_save_name}"
torch.save(model.state_dict(), path)

In [None]:
model_save_name = 'vgg19_2811.pt'
path = f"/content/drive/MyDrive/AI Project/{model_save_name}"
model = VGG19(num_classes)
model.load_state_dict(torch.load(path))
model = model.to(device)
model.eval()