In [1]:
import pandas as pd
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torchvision.models import vgg16
import os

In [2]:
torch.manual_seed(0)
np.random.seed(0)

## Reading Data

In [3]:
data = pd.read_csv("../input/happy-whale-train-csv/train.csv")
data.info()

In [4]:
#Fix some typos
data.species.replace({"globis": "short_finned_pilot_whale",
                          "pilot_whale": "short_finned_pilot_whale",
                          "kiler_whale": "killer_whale",
                          "bottlenose_dolpin": "bottlenose_dolphin"}, inplace=True)

In [5]:
data['species'].value_counts()

In [6]:
def data_clean_and_load(data):
    # Select the 6 species
    # Dolphin: Bottlenose Dolphin, Dusky Dolphin, Spinner Dolphin
    # Whales: Beluga, Blue Whale, Killer Whale
    data_cleaned = pd.get_dummies(data[['image', 'species']], columns = ['species'])[['image',
                                                                                      'species_bottlenose_dolphin',
                                                                                      'species_beluga',
                                                                                      'species_blue_whale',
                                                                                      'species_killer_whale',
                                                                                      'species_dusky_dolphin',
                                                                                      'species_spinner_dolphin']]
    # Drop the instances that don't belong to the 5 species
    drop_index = data_cleaned[data_cleaned.drop(columns = 'image').sum(axis = 1) != 1].index
    data_cleaned = data_cleaned.drop(drop_index)
    
    # Read all images and convert them into np.array
    X = []
    for img_name in data_cleaned['image']:
        image = Image.open('../input/jpeg-happywhale-128x128/train_images-128-128/train_images-128-128/' + img_name)
        X.append(np.asarray(image))
    X = np.stack(X, axis = 0).reshape(-1, 3, 128, 128)
    
    # Target
    Y = data.drop(columns = ['image', 'individual_id']).drop(drop_index).to_numpy().flatten()
    return X, Y
    
X, Y = data_clean_and_load(data)

In [None]:
X.shape

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=0)

In [8]:
from imblearn.over_sampling import SMOTE
from collections import Counter
from sklearn.preprocessing import OneHotEncoder

In [9]:
enc2 = OneHotEncoder()
Y_train = enc2.fit_transform(Y_train[:,np.newaxis]).toarray()
Y_val = enc2.fit_transform(Y_val[:,np.newaxis]).toarray()

In [None]:
X_train_ss, X_val_ss, Y_train_ss, Y_val_ss = train_test_split(X_val, Y_val, test_size=0.2, random_state=0)

## Oversample

In [None]:
original = X_train.shape

sampler = SMOTE(random_state = 0)
X_resampled, Y_resampled = sampler.fit_resample(X_train.reshape((X_train.shape[0],-1)), Y_train)
X_resampled = X_resampled.reshape((-1, original[1], original[2], original[3]))

In [None]:
def count_imb(Y):
    counter = Counter(Y)
    for k,v in counter.items():
        per = v / len(Y) * 100
        print('Class=%s, n=%d (%.3f%%)' % (k, v, per))

In [None]:
count_imb(Y)

In [None]:
enc = OneHotEncoder()
Y_resampled = enc.fit_transform(Y_resampled[:,np.newaxis]).toarray()

## Baseline Models

In [None]:
from sklearn.model_selection import cross_val_score

def model_train_and_score(model, X, Y, X_val, Y_val):
    return np.mean(cross_val_score(model, X, Y))

In [None]:
# KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn import linear_model as LM
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [None]:
dim = 128*128*3

train_x_ss = X_train_ss.reshape(-1,dim)
train_y_ss = np.argmax(Y_train_ss, axis = 1)
val_x_ss = X_val_ss.reshape(-1,dim)
val_y_ss = np.argmax(Y_val_ss, axis = 1)

sampler = SMOTE(random_state = 0)
X_ss_resampled, Y_ss_resampled = sampler.fit_resample(train_x_ss, train_y_ss)

In [None]:
count_imb(train_y_ss)


          Without SMOTE.       With SMOTE
KNN:    0.6761318681318681  0.5780756114505837
 
LR:     0.7373289007910857  0.5664851619786406

DT:     0.6213721180780004  0.5246923831236833


In [None]:
KNN = KNeighborsClassifier(n_neighbors = 5)
LogisticReg = LM.LogisticRegression(penalty = 'none',
                                   tol = 1e-3,
                                   solver = 'lbfgs',
                                   max_iter = 50)
DT = DecisionTreeClassifier(max_depth = 10, random_state = 0)

for model in [KNN, LogisticReg, DT]:
    print('\n =================================')
    print('Without SMOTE:')
    print(model_train_and_score(model, train_x_ss, train_y_ss, val_x_ss, val_y_ss))
    print('\nWith SMOTE:')
    print(model_train_and_score(model, X_ss_resampled, Y_ss_resampled, val_x_ss, val_y_ss))
    print('\n')

## Data Loader

In [10]:
class whale_dolphin(Dataset):
    def __init__(self, X, Y):
        self.imgs = X
        self.labels = np.argmax(Y, axis = 1)
        self.transforms = transforms.Compose([
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, index):
        img_data = self.imgs[index]
        img_data = self.transforms(img_data).view(3,128,128)
        img_label = self.labels[index]
#         print(type(img_label))
#         img_label = torch.tensor(img_label, dtype=torch.float, requires_grad=True)
        return img_data, img_label

In [11]:
train_dataset = whale_dolphin(X_train, Y_train)
#train_dataset_rs = whale_dolphin(X_resampled, Y_resampled)
val_dataset = whale_dolphin(X_val, Y_val)

In [12]:
train_dataloader = DataLoader(train_dataset, batch_size = 64, shuffle = True)
val_dataloader = DataLoader(val_dataset, batch_size = 64, shuffle = True)

In [None]:
for data, label in val_dataloader:
    print(data.shape)
    print(label)
    break

## NN training

In [13]:
def train(model, device, n_epochs, optimizer, criterion, train_dataloader, val_dataloader):
    model.to(device)
    val_acc_list = []
    out_dir = "/"
    train_loss = []
    val_loss = []
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    for epoch in range(n_epochs):
        print(f"\nEpoch {epoch + 1}")
        model.train()
        sum_loss = 0.0
        v_loss_sum = 0.0
        correct = 0.0
        total = 0.0
        for batch_idx, (images, labels) in enumerate(train_dataloader):
            length = len(train_dataloader)
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)  # torch.size([batch_size, num_class])
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            sum_loss += loss.item()
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += predicted.eq(labels.data).cpu().sum()
            # iteration = batch_idx + 1 + epoch * length
            # t_los = sum_loss / (batch_idx + 1)
            # acc = round(100. * correct / total, 3)
            # print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
                  #% (epoch + 1, (batch_idx + 1 + epoch * length), sum_loss / (batch_idx + 1), 100. * correct / total))
        train_loss.append(sum_loss / len(train_dataloader))
        
        print("Waiting Val...")
        with torch.no_grad():
            correct = 0.0
            total = 0.0
            for batch_idx, (images, labels) in enumerate(val_dataloader):
                model.eval()
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, dim=1)
                v_loss_sum += criterion(outputs,labels).item()
                total += labels.size(0)
                correct += (predicted == labels).sum()
            
            val_loss.append(v_loss_sum / len(val_dataloader))
            print('Val\'s ac is: %.3f%%' % (100 * correct / total))

            acc_val = 100. * correct / total
            val_acc_list.append(acc_val)
        
    return train_loss, val_loss
#         torch.save(model.state_dict(), out_dir + "last.pt")
#         if acc_val == max(val_acc_list):
#             torch.save(model.state_dict(), out_dir + "best.pt")
#             print(f"save epoch {epoch} model")

## ResNet

In [None]:
class BottleNeck(nn.Module):
    """
    BottleNeck block for the ResNet-50
    1x1, 3x3, 1x1 Three convolution layers
    """
    def __init__(self, in_channels, out_channels, identity_downsample=None, stride=1):
        super().__init__()
        self.expansion = 4
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)  # No Size Change
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride,
                               padding=1)  # if stride=2，half size of the input, if =1, don't change
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, stride=1, padding=0)  # No Size Change
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample

    def forward(self, x):
        identity = x
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.bn3(x)

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)

        # identity shortcut
        x = x + identity
        x = self.relu(x)

        return x

In [None]:
class ResNet50(nn.Module):
    def __init__(self, bottleneck, layers, image_channels, class_nums):
        super().__init__()
        # initialize the in_channels after the first max pool layer
        self.in_channels = 64

        # conv1
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()

        # Res net bottleneck layers conv2, conv3, conv4, conv5
        # conv2
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = self._make_layer(bottleneck, layers[0], out_channels=64, stride=1)
        # conv3
        self.conv3 = self._make_layer(bottleneck, layers[1], out_channels=128, stride=2)
        # conv4
        self.conv4 = self._make_layer(bottleneck, layers[2], out_channels=256, stride=2)
        # conv5
        self.conv5 = self._make_layer(bottleneck, layers[3], out_channels=512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * 4, class_nums)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.maxpool(x)

        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)  # reshape the four-d tensor to 2-d matrix
        x = self.fc(x)

        return x

    def _make_layer(self, bottleneck, block_nums, out_channels, stride):
        identity_downsample = None
        layers = []
        block_minus = 0
        if stride != 1 or self.in_channels != out_channels * 4:
            identity_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * 4, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels * 4)
            )
            block_minus = 1
        layers.append(bottleneck(self.in_channels, out_channels, identity_downsample, stride=stride))
        self.in_channels = out_channels * 4

        for i in range(block_nums - block_minus):
            layers.append(bottleneck(self.in_channels, out_channels))

        return nn.Sequential(*layers)


In [None]:
model_1 = ResNet50(BottleNeck, [3, 4, 6, 3], 3, 6)
criterion = nn.CrossEntropyLoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(model_1.parameters(), lr=learning_rate)
train(model_1, 'cuda', 20, optimizer, criterion, train_dataloader, val_dataloader)

In [None]:
# ResNet50 Without SMOTE
resnet = ResNet50(BottleNeck, [3, 4, 6, 3], 3, 6)
criterion = nn.CrossEntropyLoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(resnet.parameters(), lr=learning_rate)
train(resnet, 'cuda', 20, optimizer, criterion, train_dataloader, val_dataloader)

## VGG16

In [None]:
# VGG16 Without SMOTE
vgg_model = vgg16(pretrained = True)

for param in vgg_model.parameters():
    param.requires_grad = False

vgg_model.classifier.add_module('7', nn.Linear(1000, 6))
print(vgg_model)

In [None]:
criterion = nn.CrossEntropyLoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, vgg_model.parameters()), lr=learning_rate)
train_loss_vgg, val_loss_vgg = train(vgg_model, 'cuda', 50, optimizer, criterion, train_dataloader, val_dataloader)

In [None]:
from matplotlib import pyplot

pyplot.plot(np.arange(50), train_loss_vgg, color = 'blue')
pyplot.plot(np.arange(50), val_loss_vgg, color = 'red')

## AlexNet

In [None]:
from torchvision.models import alexnet
alex_model = alexnet(pretrained = True)

for param in alex_model.parameters():
    param.requires_grad = False

alex_model.classifier.add_module('7', nn.Linear(1000, 6))
print(alex_model)

criterion = nn.CrossEntropyLoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, alex_model.parameters()), lr=learning_rate)
train_loss_alex, val_loss_alex = train(alex_model, 'cuda', 50, optimizer, criterion, train_dataloader, val_dataloader)

In [16]:
from matplotlib import pyplot

pyplot.plot(np.arange(37), train_loss_alex, color = 'blue')
pyplot.plot(np.arange(37), val_loss_alex, color = 'red')

In [15]:
from torchvision.models import alexnet
alex_model = alexnet(pretrained = False)

alex_model.classifier.add_module('7', nn.Linear(1000, 6))
print(alex_model)

criterion = nn.CrossEntropyLoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(alex_model.parameters(), lr=learning_rate)
train_loss_alex, val_loss_alex = train(alex_model, 'cuda', 50, optimizer, criterion, train_dataloader, val_dataloader)

## ResNet

In [None]:
from torchvision.models import resnet50
resnet_model = resnet50(pretrained = True)

for param in resnet_model.parameters():
    param.requires_grad = False
    
resnet_model.fc = nn.Linear(2048,6)

print(resnet_model)

criterion = nn.CrossEntropyLoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, resnet_model.parameters()), lr=learning_rate)
train_loss_res, val_loss_res = train(resnet_model, 'cuda', 50, optimizer, criterion, train_dataloader, val_dataloader)

In [None]:
pyplot.plot(np.arange(50), train_loss_res, color = 'blue')
pyplot.plot(np.arange(50), val_loss_res, color = 'red')

In [17]:
from torchvision.models import resnet50
resnet_model = resnet50(pretrained = False)

    
resnet_model.fc = nn.Linear(2048,6)

print(resnet_model)

criterion = nn.CrossEntropyLoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, resnet_model.parameters()), lr=learning_rate)
train_loss_res, val_loss_res = train(resnet_model, 'cuda', 50, optimizer, criterion, train_dataloader, val_dataloader)