In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import matplotlib.pyplot as plt # graphs

import torch # tensors
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
checkpoints = '/kaggle/working/checkpoints/'
if not os.path.exists(checkpoints):
    os.makedirs(checkpoints)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

cuda:0


In [2]:
def get_bird_data(augmentation=0):
    transform_train = transforms.Compose([
        transforms.Resize(299),
        transforms.RandomCrop(299, padding=8, padding_mode='edge'), # Take 299x299 crops from padded images
        transforms.RandomHorizontalFlip(),    # 50% of time flip image along y-axis
        transforms.ToTensor(),
    ])
    
    transform_test = transforms.Compose([
        transforms.Resize(299),
        transforms.ToTensor(),
    ])
    trainset = torchvision.datasets.ImageFolder(root='/kaggle/input/birds23sp/birds/train', transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
#     # split our training data into a training set and a validation set, so I can tweak lr if needed and know model acc
#     train_subset, val_subset = torch.utils.data.random_split(
#         trainset, [.9, .1], generator=torch.Generator().manual_seed(455))
#     # valloader will be input processed like the training set, that should be fine
#     trainloader = torch.utils.data.DataLoader(train_subset, batch_size=128, shuffle=True, num_workers=2)
#     valloader = torch.utils.data.DataLoader(val_subset, batch_size=128, shuffle=True, num_workers=2
#     , 'val': valloader 
    
    testset = torchvision.datasets.ImageFolder(root='/kaggle/input/birds23sp/birds/test', transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2)
    classes = open("/kaggle/input/birds23sp/birds/names.txt").read().strip().split("\n")
    class_to_idx = trainset.class_to_idx
    idx_to_class = {int(v): int(k) for k, v in class_to_idx.items()}
    idx_to_name = {k: classes[v] for k,v in idx_to_class.items()}
    return {'train': trainloader, 'test': testloader, 'to_class': idx_to_class, 'to_name':idx_to_name}

data = get_bird_data()

In [3]:
def train(net, dataloader, epochs=1, start_epoch=0, lr=0.01, momentum=0.9, decay=0.0005, 
          verbose=1, print_every=10, state=None, schedule={}, checkpoint_path=None):
    net.to(device)
    net.train()
    losses = []
    vallosses = []
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=decay)

    # Load previous training state
    if state:
        net.load_state_dict(state['net'])
        optimizer.load_state_dict(state['optimizer'])
        start_epoch = state['epoch']
        losses = state['losses']

    # Fast forward lr schedule through already trained epochs
    for epoch in range(start_epoch):
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

    for epoch in range(start_epoch, epochs):
        sum_loss = 0.0

        # Update learning rate when scheduled
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

        for i, batch in enumerate(dataloader, 0):
            inputs, labels = batch[0].to(device), batch[1].to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            # fetches the raw output, not the auxilory tensor. Inception net specific (on the [0])
            loss = criterion(outputs[0], labels)
            loss.backward()  # autograd magic, computes all the partial derivatives
            optimizer.step() # takes a step in gradient direction

            losses.append(loss.item())
            sum_loss += loss.item()

            if i % print_every == print_every-1:    # print every 10 mini-batches
                if verbose:
                  print('[%d, %5d] loss: %.3f' % (epoch, i + 1, sum_loss / print_every))
                sum_loss = 0.0
        if checkpoint_path:
            state = {'epoch': epoch+1, 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'losses': losses}
            torch.save(state, checkpoint_path + 'checkpoint-%d.pkl'%(epoch+1))
    return losses

In [4]:
# actual training of the model
inceptionnet = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
inceptionnet.fc = nn.Linear(2048, 555) # This will reinitialize the layer as well
# state = torch.load(checkpoints + 'checkpoint-5.pkl')

losses = train(inceptionnet, data['train'], epochs=7, schedule={0:.01, 3:.001, 6:.0001}, lr=.01, print_every=10,
               checkpoint_path=checkpoints)#, state = state)

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:02<00:00, 44.5MB/s]


Learning rate: 0.010000
[0,    10] loss: 6.345
[0,    20] loss: 6.282
[0,    30] loss: 6.187
[0,    40] loss: 6.061
[0,    50] loss: 5.971
[0,    60] loss: 5.826
[0,    70] loss: 5.642
[0,    80] loss: 5.467
[0,    90] loss: 5.262
[0,   100] loss: 5.140
[0,   110] loss: 4.845
[0,   120] loss: 4.692
[0,   130] loss: 4.564
[0,   140] loss: 4.371
[0,   150] loss: 4.202
[0,   160] loss: 4.026
[0,   170] loss: 3.948
[0,   180] loss: 3.800
[0,   190] loss: 3.682
[0,   200] loss: 3.591
[0,   210] loss: 3.442
[0,   220] loss: 3.355
[0,   230] loss: 3.314
[0,   240] loss: 3.204
[0,   250] loss: 3.211
[0,   260] loss: 3.066
[0,   270] loss: 3.009
[0,   280] loss: 2.885
[0,   290] loss: 2.852
[0,   300] loss: 2.809
[1,    10] loss: 2.521
[1,    20] loss: 2.583
[1,    30] loss: 2.500
[1,    40] loss: 2.453
[1,    50] loss: 2.407
[1,    60] loss: 2.373
[1,    70] loss: 2.319
[1,    80] loss: 2.308
[1,    90] loss: 2.236
[1,   100] loss: 2.237
[1,   110] loss: 2.214
[1,   120] loss: 2.131
[1,   130]

In [5]:
inceptionnet = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
inceptionnet.fc = nn.Linear(2048, 555) # This will reinitialize the layer as well
data = get_bird_data(1)
state = torch.load(checkpoints + 'checkpoint-7.pkl')

losses = train(inceptionnet, data['train'], epochs=8, schedule={0:.01, 3:.001, 6:.0001}, lr=.01, print_every=10,
               checkpoint_path=checkpoints, state = state)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Learning rate: 0.010000
Learning rate: 0.001000
Learning rate: 0.000100
[7,    10] loss: 0.666
[7,    20] loss: 0.682
[7,    30] loss: 0.696
[7,    40] loss: 0.691
[7,    50] loss: 0.702
[7,    60] loss: 0.656
[7,    70] loss: 0.632
[7,    80] loss: 0.716
[7,    90] loss: 0.662
[7,   100] loss: 0.728
[7,   110] loss: 0.670
[7,   120] loss: 0.671
[7,   130] loss: 0.645
[7,   140] loss: 0.675
[7,   150] loss: 0.718
[7,   160] loss: 0.666
[7,   170] loss: 0.689
[7,   180] loss: 0.748
[7,   190] loss: 0.670
[7,   200] loss: 0.711
[7,   210] loss: 0.672
[7,   220] loss: 0.671
[7,   230] loss: 0.713
[7,   240] loss: 0.616
[7,   250] loss: 0.702
[7,   260] loss: 0.725
[7,   270] loss: 0.701
[7,   280] loss: 0.710
[7,   290] loss: 0.715
[7,   300] loss: 0.703


In [6]:
def predict(net, dataloader, ofname):
    out = open(ofname, 'w')
    out.write("path,class\n")
    net.to(device)
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader, 0):
            if i%100 == 0:
                print(i)
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
    
            
            
            fname, _ = dataloader.dataset.samples[i]
            out.write("test/{},{}\n".format(fname.split('/')[-1], data['to_class'][predicted.item()]))
    out.close()
    

def accuracy(net, dataloader):
    net.to(device)
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader, 0):
            if i%100 == 0:
                print(i)
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            
            datal: ndarray = np.equal(labels.cpu(), predicted.cpu())
            sz = len(datal)
            correct += datal.sum()
            total += sz
    acc = correct/total * 100
    return acc

In [7]:

# evaluation section
# print(accuracy(inceptionnet, data['train']))
predict(inceptionnet, data['test'], "submission.csv")

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
