In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import KFold
import h5py

import os

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

EPOCHS = 7
BATCH_SIZE = 128
IMG_SIZE = 224
K_FOLDS = 5
VALIDATION_PERC = 0.2
TRAIN_DATA_PATH = '../input/birds21wi/birds/train'
TEST_DATA_PATH = '../input/birds21wi/birds/test'
H5_TRAIN_PATH = '../input/224-compressed-train/birds21wi_train.h5'
H5_VALID_PATH = '../input/224-compressed-valid/birds21wi_valid.h5'

# k fold on training
# Final CNN should just train on the full train set

transform_train = transforms.Compose([
        transforms.Resize(IMG_SIZE),
        transforms.RandomCrop(IMG_SIZE, padding=8, padding_mode='edge'), # Take 128x128 crops from padded images
        transforms.RandomHorizontalFlip(),    # 50% of time flip image along y-axis
        transforms.ToTensor(),
    ])
    
transform_test = transforms.Compose([
        transforms.Resize(IMG_SIZE),
        transforms.CenterCrop(IMG_SIZE),
        transforms.ToTensor(),
    ])

class BirdH5Dataset(torch.utils.data.Dataset):
    def __init__(self, in_file):
        self.in_file = in_file
        
    def __len__(self):
        with h5py.File(self.in_file, 'r') as file:
            length = file['labels'].shape[0]
        return length
    
    def __getitem__(self, idx):
        with h5py.File(self.in_file, 'r') as file:
            image = file['images'][idx]
            label = file['labels'][idx]
        return (torch.tensor(image), torch.tensor(label))
        

def get_bird_dataloader():
    trainset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    
    testset = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=4)
    
    classes = open("/kaggle/input/birds21wi/birds/names.txt").read().strip().split("\n")
    
    # Backward mapping to original class ids (from folder names) and species name (from names.txt)
    class_to_idx = trainset.class_to_idx
    idx_to_class = {int(v): int(k) for k, v in class_to_idx.items()}
    idx_to_name = {k: classes[v] for k,v in idx_to_class.items()}
    
    return {'train': trainloader, 'test': testloader, 'to_class': idx_to_class, 'to_name':idx_to_name}

def get_kfold_bird_dataset():
    trainset = BirdH5Dataset(H5_TRAIN_PATH)
    validset = BirdH5Dataset(H5_VALID_PATH)
    
    classes = open("/kaggle/input/birds21wi/birds/names.txt").read().strip().split("\n")
    
    return {'train' : trainset, 'validation' : validset, 'classes' : classes}

def get_kfold_bird_dataset_old():
    trainset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=transform_train)
    validset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=transform_test)
    
    classes = open("/kaggle/input/birds21wi/birds/names.txt").read().strip().split("\n")
    
    # Backward mapping to original class ids (from folder names) and species name (from names.txt)
    class_to_idx = trainset.class_to_idx
    idx_to_class = {int(v): int(k) for k, v in class_to_idx.items()}
    idx_to_name = {k: classes[v] for k,v in idx_to_class.items()}
    return {'train': trainset, 'validation' : validset, 'to_class': idx_to_class, 'to_name':idx_to_name}

def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()
    
def split_train(trainset, validset):
    np.random.seed(42)
    validset_size = len(validset)
    indices = list(range(validset_size))
    split = int(np.floor(VALIDATION_PERC * validset_size))
    np.random.shuffle(indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(indices[split:])
    valid_sampler = torch.utils.data.SubsetRandomSampler(indices[:split])
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4)
    validloader = torch.utils.data.DataLoader(validset, batch_size=1, sampler=valid_sampler, num_workers=4)
    
    resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50', pretrained=True)
    #print(resnet)
    resnet.fc = nn.Linear(2048, 555)

    #Higher: more penalty for large weights, less powerful model
    #Lower: less penalty, more overfitting

    #losses = train(resnet, trainloader, epochs=EPOCHS, lr=.01, print_every=10, checkpoint_path='./')
    state = torch.load('../input/resnet50/checkpoint-7.pkl')
    losses = train(resnet, trainloader, epochs=EPOCHS, schedule={0:.01, 5:.001, 10:.0001}, print_every=10, state=state, checkpoint_path='./')
    #losses = train_validate(resnet, trainloader, validloader, epochs=EPOCHS, schedule={0:.01, 5:.001, 10:.0001}, checkpoint_path='./')
    #train_accu = accuracy(resnet, trainloader)
    valid_accu = accuracy(resnet, validloader)
    #print("Training accuracy: %f" % train_accu)
    print("Validation accuracy: %f" % valid_accu)
    
    
def kfold_train(trainset, validset):
    valid_results = {} # for kfold results
    #train_results = {}
    torch.manual_seed(42) # fix random seed
    
    kfold = KFold(n_splits=K_FOLDS, shuffle=True)
    
    validset_size = len(validset)
    indices = list(range(validset_size))
    for fold, (train_idx, valid_idx) in enumerate(kfold.split(indices)):
        print('Fold', fold)
        train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
        valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx)
        trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=0)
        validloader = torch.utils.data.DataLoader(validset, batch_size=1, sampler=valid_sampler, num_workers=0)
        
        resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50', pretrained=True)
        #print(resnet)
        resnet.fc = nn.Linear(2048, 555)
        
        losses = train(resnet, trainloader, epochs=EPOCHS, schedule={0:.01, 6:.001}, print_every=10, checkpoint_path='./')
        
        torch.save(resnet.state_dict(), f'./model-fold-{fold}.pth')
        
        #train_accu = accuracy(resnet, trainloader)
        valid_accu = accuracy(resnet, validloader)
        #print('Training Accuracy for fold %d: %d %%' % (fold, 100.0 * train_accu))
        print('Validation Accuracy for fold %d: %d %%' % (fold, 100.0 * valid_accu))
        #train_results[fold] = 100 * train_accu
        valid_results[fold] = 100 * valid_accu
        
    
    print(f'K-FOLD CROSS VALIDATION RESULTS FOR {K_FOLDS} FOLDS')
    print('--------------------------------')
    #print('train')
    #sum = 0.0
    #for key, value in train_results.items():
    #    print(f'Fold {key}: {value} %')
    #    sum += value
    #print(f'Training Average: {sum/len(train_results.items())} %')  
    #print('validation')
    sum = 0.0
    for key, value in valid_results.items():
        print(f'Fold {key}: {value} %')
        sum += value
    print(f'Validation Average: {sum/len(valid_results.items())} %')
    
def train_validate(net, dataloader, validloader, epochs=1, start_epoch=0, lr=0.01, momentum=0.9, decay=0.0005, 
          verbose=1, state=None, schedule={}, checkpoint_path=None):
    net.to(device)
    net.train()
    losses = []
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=decay)

    # Load previous training state
    if state:
        net.load_state_dict(state['net'])
        optimizer.load_state_dict(state['optimizer'])
        start_epoch = state['epoch']
        losses = state['losses']

    # Fast forward lr schedule through already trained epochs
    for epoch in range(start_epoch):
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

    for epoch in range(start_epoch, epochs):
        sum_loss = 0.0

        # Update learning rate when scheduled
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]
                
        count = 0
        for i, batch in enumerate(dataloader, 0):
            count += 1
            inputs, labels = batch[0].to(device), batch[1].to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()  # autograd magic, computes all the partial derivatives
            optimizer.step() # takes a step in gradient direction

            losses.append(loss.item())
            sum_loss += loss.item()
        if checkpoint_path:
            state = {'epoch': epoch+1, 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'losses': losses}
            torch.save(state, checkpoint_path + 'checkpoint-%d.pkl'%(epoch+1))
        print('epoch', epoch)
        print('loss', sum_loss / count)
        sum_loss = 0.0
        #train_accu = accuracy(net, dataloader)
        valid_accu = accuracy(net, validloader)
        #print("Training accuracy: %f" % train_accu)
        print("Validation accuracy: %f" % valid_accu)
        print()
    return losses
    
def train(net, dataloader, epochs=1, start_epoch=0, lr=0.01, momentum=0.9, decay=0.0005, 
          verbose=1, print_every=10, state=None, schedule={}, checkpoint_path=None):
    net.to(device)
    net.train()
    losses = []
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=decay)

    # Load previous training state
    if state:
        net.load_state_dict(state['net'])
        optimizer.load_state_dict(state['optimizer'])
        start_epoch = state['epoch']
        losses = state['losses']

    # Fast forward lr schedule through already trained epochs
    for epoch in range(start_epoch):
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

    for epoch in range(start_epoch, epochs):
        sum_loss = 0.0

        # Update learning rate when scheduled
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

        for i, batch in enumerate(dataloader, 0):
            inputs, labels = batch[0].to(device), batch[1].to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()  # autograd magic, computes all the partial derivatives
            optimizer.step() # takes a step in gradient direction

            losses.append(loss.item())
            sum_loss += loss.item()

            if i % print_every == print_every-1:    # print every 10 mini-batches
                if verbose:
                  print('[%d, %5d] loss: %.3f' % (epoch, i + 1, sum_loss / print_every))
                sum_loss = 0.0
        if checkpoint_path:
            state = {'epoch': epoch+1, 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'losses': losses}
            torch.save(state, checkpoint_path + 'checkpoint-%d.pkl'%(epoch+1))
    return losses

def smooth(x, size):
    return np.convolve(x, np.ones(size)/size, mode='valid')

def predict(net, dataloader, ofname):
    out = open(ofname, 'w')
    out.write("path,class\n")
    net.to(device)
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader, 0):
            if i%100 == 0:
                print(i)
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            fname, _ = dataloader.dataset.samples[i]
            #out.write("test/{},{}\n".format(fname.split('/')[-1], data['to_class'][predicted.item()]))
            out.write("test/{},{}\n".format(fname.split('/')[-1], predicted.item()))
    out.close()

def accuracy(net, dataloader):
    print('Start accuracy tests')
    net.to(device)
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader, 0):
            if i%100 == 0:
                print(i)
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct/total

def write_accuracy(train_accu, valid_accu, output_filename):
    out = open(output_filename, 'w')
    out.write("Training accuracy: %f\n" % train_accu)
    out.write("Validation accuracy: %f" % valid_accu)
    out.close()
    
def predict_given_state():
    data = get_bird_dataloader()
    resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50', pretrained=True)
    resnet.fc = nn.Linear(2048, 555)
    state = torch.load('../input/resnet50/checkpoint-7.pkl')
    losses = train(resnet, data['train'], epochs=EPOCHS, schedule={0:.01, 5:.001, 10:.0001}, state=state, print_every=10, checkpoint_path='./')
    predict(resnet, data['test'], "preds.csv")
    
    
#data = get_bird_data()
data = get_kfold_bird_dataset()
split_train(trainset=data['train'], validset=data['validation'])
predict_given_state()

#data = get_bird_dataloader()
#resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50', pretrained=True)
        #print(resnet)
#resnet.fc = nn.Linear(2048, 555)
#state = torch.load('../input/resnet50/checkpoint-5.pkl')
#losses = train(resnet, data['train'], epochs=EPOCHS, schedule={0:.01, 5:.001, 10:.0001}, state=state, print_every=10, checkpoint_path='./')
#predict(resnet, data['test'], "preds.csv")


"""
dataloader = torch.utils.data.DataLoader(data['train'], batch_size=1, shuffle=False, num_workers=4)
dataiter = iter(dataloader)
images, labels = dataiter.next()
print(images.shape)
imshow(images[0])
print(labels[0].item(), data['classes'][labels[0].item()])


file = h5py.File(H5_PATH, 'r')
print(file["images"].shape)
print(file["labels"].shape)
im_torch = torch.tensor(file["images"][0])
lb = file['labels'][0]
imshow(im_torch)
classes = open("/kaggle/input/birds21wi/birds/names.txt").read().strip().split("\n")
print(classes[lb], lb)
"""



#kfold_train(trainset=data['train'], validset=data['validation'])

# Use pretrained resnet
#resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
#resnet.fc = nn.Linear(512, 555)
#if torch.cuda.is_available():
 #   print('resnet set to cuda')
  #  resnet.cuda()
#print('load state')
#state = torch.load('../input/splittrainmodel/checkpoint-5.pkl', map_location=torch.device('cpu'))
#resnet.load_state_dict(state['net'])
#print('load state done')

#losses = train(resnet, data['train'], epochs=EPOCHS, lr=.01, print_every=10, checkpoint_path='./')

#dataiter = iter(data['test'])
#images, labels = dataiter.next()
#imshow(images[0])
#print(labels[0].item(), data['to_name'][labels[0].item()])
#outputs = resnet(images)
#_, predicted = torch.max(outputs.data, 1)
#print(data['to_name'][predicted[0].item()], predicted[0].item())

#plt.plot(smooth(losses,50))

#predict(resnet, data['validation'], "validpreds.csv")
#train_accu = accuracy(resnet, data['train'])
#valid_accu = accuracy(resnet, data['validation'])
#print("Training accuracy: %f" % train_accu)
#print("Validation accuracy: %f" % valid_accu)
plt.show()

cuda:0


Downloading: "https://github.com/pytorch/vision/archive/v0.6.0.zip" to /root/.cache/torch/hub/v0.6.0.zip
Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

Learning rate: 0.010000
Learning rate: 0.001000
Start accuracy tests
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
Validation accuracy: 0.748574


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


Learning rate: 0.010000
Learning rate: 0.001000
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
