In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import matplotlib.pyplot as plt # graphs

import torch # tensors
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
checkpoints = '/kaggle/working/checkpoints/'
if not os.path.exists(checkpoints):
    os.makedirs(checkpoints)
# !pip install timm

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
def get_bird_data(augmentation=0):
    transform_train = transforms.Compose([
        transforms.Resize(299),
        transforms.RandomCrop(299, padding=8, padding_mode='edge'), # Take 299x299 random crops
        transforms.RandomHorizontalFlip(),    # 50% of time flip image along y-axis
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    transform_test = transforms.Compose([
        transforms.Resize(299),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    if augmentation == -1:
        transform_train = transorm_test
    
    trainset = torchvision.datasets.ImageFolder(root='/kaggle/input/birds23sp/birds/train', transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
#     # split our training data into a training set and a validation set, so I can tweak lr if needed and know model acc
#     train_subset, val_subset = torch.utils.data.random_split(
#         trainset, [.9, .1], generator=torch.Generator().manual_seed(455))
#     # valloader will be input processed like the training set, that should be fine
#     trainloader = torch.utils.data.DataLoader(train_subset, batch_size=128, shuffle=True, num_workers=2)
#     valloader = torch.utils.data.DataLoader(val_subset, batch_size=128, shuffle=True, num_workers=2
#     , 'val': valloader 
    
    testset = torchvision.datasets.ImageFolder(root='/kaggle/input/birds23sp/birds/test', transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2)
    classes = open("/kaggle/input/birds23sp/birds/names.txt").read().strip().split("\n")
    class_to_idx = trainset.class_to_idx
    idx_to_class = {int(v): int(k) for k, v in class_to_idx.items()}
    idx_to_name = {k: classes[v] for k,v in idx_to_class.items()}
    return {'train': trainloader, 'test': testloader, 'to_class': idx_to_class, 'to_name':idx_to_name}

data = get_bird_data()

In [None]:
class EnsembleModel(nn.Module):   
    def __init__(self, modelA, modelB):
        super().__init__()
        self.modelA = modelA
        self.modelB = modelB
        self.classifier = nn.Linear(555*2, 555)
        
    def forward(self, x):
        x1 = self.modelA(x)[0]
        x2 = self.modelB(x)
        x = torch.cat((x1, x2), dim=1)
        out = self.classifier(x)
        return out

In [None]:
def train(net, dataloader, epochs=1, start_epoch=0, lr=0.01, momentum=0.9, decay=0.0005, 
          verbose=1, print_every=10, state=None, schedule={}, checkpoint_path=None):
    net.to(device)
    net.train()
    losses = []
    vallosses = []
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=decay)

    # Load previous training state
    if state:
        net.load_state_dict(state['net'])
        optimizer.load_state_dict(state['optimizer'])
        start_epoch = state['epoch']
        losses = state['losses']

    # Fast forward lr schedule through already trained epochs
    for epoch in range(start_epoch):
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]
    min_ep_loss = 100000    
    min_ep = 0
    for epoch in range(start_epoch, epochs):
        ep_loss = 0
        sum_loss = 0.0

        # Update learning rate when scheduled
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

        for i, batch in enumerate(dataloader, 0):
            inputs, labels = batch[0].to(device), batch[1].to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            # fetches the raw output, not the auxilory tensor. Inception net specific (on the [0])
            loss = criterion(outputs[0], labels)
            loss.backward()  # autograd magic, computes all the partial derivatives
            optimizer.step() # takes a step in gradient direction

            losses.append(loss.item())
            sum_loss += loss.item()

            if i % print_every == print_every-1:    # print every 10 mini-batches
                if verbose:
                  print('[%d, %5d] loss: %.3f' % (epoch, i + 1, sum_loss / print_every))
                ep_loss += sum_loss 
                sum_loss = 0.0
        if checkpoint_path:
            state = {'epoch': epoch+1, 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'losses': losses}
            torch.save(state, checkpoint_path + 'checkpoint-%d.pkl'%(epoch+1))
        if ep_loss < min_ep_loss:
            min_ep_loss = ep_loss
            min_ep = epoch+1
    print("Min loss at epoch (1 indexed): " + str(min_ep) + " with loss: " + str(min_ep_loss))
    return losses

In [None]:
# actual training of the model
inceptionnet = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
inceptionnet.fc = nn.Sequential(
    nn.Linear(2048, 1000),
    nn.ReLU(),
    nn.Linear(1000, 555),
)
# MEALnet = torch.hub.load('szq0214/MEAL-V2','meal_v2', 'mealv2_resnest50', pretrained=True)
# MEALnet.fc = nn.Linear(2048, 555)
# ensemble = EnsembleModel(inceptionnet, MEALnet) -- MEAL was too much RAM


losses = train(inceptionnet, data['train'], epochs=7, schedule={0:.01, 3:.001, 6:.0001}, lr=.01, print_every=50,
               checkpoint_path=checkpoints)

In [None]:
inceptionnet = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
inceptionnet.fc = nn.Sequential(
    nn.Linear(2048, 1000),
    nn.ReLU(),
    nn.Linear(1000, 555),
)
# MEALnet = torch.hub.load('szq0214/MEAL-V2','meal_v2', 'mealv2_resnest50_cutmix', pretrained=True)
# ensemble = EnsembleModel(inceptionnet, MEALnet)
data = get_bird_data(1)
state = torch.load(checkpoints + 'checkpoint-7.pkl')

losses = train(inceptionnet, data['train'], epochs=15, schedule={0:.01, 3:.001, 6:.0001, 10:.00002}, lr=.01, print_every=50,
               checkpoint_path=checkpoints, state = state)

In [None]:
def predict(net, dataloader, ofname):
    out = open(ofname, 'w')
    out.write("path,class\n")
    net.to(device)
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader, 0):
            if i%100 == 0:
                print(i)
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            
            fname, _ = dataloader.dataset.samples[i]
            out.write("test/{},{}\n".format(fname.split('/')[-1], data['to_class'][predicted.item()]))
    out.close()
    
# accuracy is ultra slow for some reason, so unreasonable to run,, unfortunately.
def accuracy(net, dataloader):
    net.to(device)
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader, 0):
            if i%100 == 0:
                print(i)
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            
            corr_it = (predicted == labels).float().sum()
            correct += corr_it.item()
            total += len(labels)
    acc = correct/total * 100
    return acc

In [None]:
# evaluation section
data = get_bird_data(-1)
print(accuracy(inceptionnet, data['train']))
predict(inceptionnet, data['test'], "submission.csv")