## Mounting

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


## Imports Here

In [0]:
import PIL
print(PIL.PILLOW_VERSION)

4.0.0


In [0]:
# License: BSD
# Author: Chanchal Kumar Maji

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()   # interactive mode

## Just Loading the Model

In [0]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = '/content/drive/My Drive/Kaggle/dataset/'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x]) 
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [0]:
# TODO: Write a function that loads a checkpoint and rebuilds the model
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = checkpoint['model']
    #model.classifier = checkpoint['classifier']
    model.fc = nn.Linear(512, 2)
    model.load_state_dict(checkpoint['state_dict'], strict=False)
    model.class_to_idx = checkpoint['class_to_idx']
    #optimizer = checkpoint['optimizer']
    #epochs = checkpoint['epochs']
    
    for param in model.parameters():
        param.requires_grad = False
        
    return model, checkpoint['class_to_idx']

model, class_to_idx = load_checkpoint('/content/drive/My Drive/Kaggle/checkpoint.pth')

model.to(device)
model.eval()

accuracy = 0

for inputs, labels in dataloaders['val']:
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model(inputs)
    
    # Class with the highest probability is our predicted class
    equality = (labels.data == outputs.max(1)[1])

# Accuracy is number of correct predictions divided by all predictions
    accuracy += equality.type_as(torch.FloatTensor()).mean()
    
print("Test accuracy: {:.7f}".format(accuracy/len(dataloaders['val'])))


Test accuracy: 0.9931794


## Now prediction

In [0]:
import pandas as pd

df = pd.read_csv('drive/My Drive/Kaggle/SampleSubmission.csv')

In [0]:
df.head(11)

Unnamed: 0,image_id,has_oilpalm
0,img_000012018.jpg,1
1,img_000022018.jpg,1
2,img_000032017.jpg,1
3,img_000042017.jpg,1
4,img_000052017.jpg,1
5,img_000062017.jpg,1
6,img_000062018.jpg,1
7,img_000122017.jpg,1
8,img_000132018.jpg,1
9,img_000142018.jpg,1


In [0]:
import os

In [0]:
files_hold = os.listdir('drive/My Drive/Kaggle/leaderboard_holdout_data/')
files_test = os.listdir('drive/My Drive/Kaggle/leaderboard_test_data/')

In [0]:
len(files_hold), len(files_test)

(2178, 4356)

In [0]:
2178 + 4356

6534

In [0]:
from PIL import Image
from pathlib import Path

In [0]:
trans = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [0]:
f = files_hold[0]
image = Image.open(Path('drive/My Drive/Kaggle/leaderboard_holdout_data/'+f))
image = trans(image)

In [0]:
image.shape

torch.Size([3, 224, 224])

In [0]:
reshaped = image.view(1, 3, 224, 224)

In [0]:
output = model(reshaped.to(device))
output

tensor([[  9.0171, -10.0433]], device='cuda:0')

In [0]:
a = torch.exp(output[0][0])
b = torch.exp(output[0][1])
p = b / (a+b)

In [0]:
p

tensor(5.2748e-09, device='cuda:0')

In [0]:
d = {}

In [0]:
c = 0
for f in files_hold:
    image = Image.open(Path('drive/My Drive/Kaggle/leaderboard_holdout_data/'+f))
    image = trans(image)
    reshaped = image.view(1, 3, 224, 224)
    output = model(reshaped.to(device))
    a = torch.exp(output[0][0])
    b = torch.exp(output[0][1])
    p = b / (a+b)
    d[f] = p
    
    c += 1
    if c % 100 == 0:
        print(c)

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100


In [0]:
c ==len(d)

True

In [0]:
d_ = {}

In [0]:
c_ = 0
for f in files_test:
    image = Image.open(Path('drive/My Drive/Kaggle/leaderboard_test_data/'+f))
    image = trans(image)
    reshaped = image.view(1, 3, 224, 224)
    output = model(reshaped.to(device))
    a = torch.exp(output[0][0])
    b = torch.exp(output[0][1])
    p = b / (a+b)
    d_[f] = p
    
    c_ += 1
    if c_ % 100 == 0:
        print(c_) 

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300


In [0]:
c_ == len(d_)

True

In [0]:
df.head()

Unnamed: 0,image_id,has_oilpalm
0,img_000012018.jpg,1
1,img_000022018.jpg,1
2,img_000032017.jpg,1
3,img_000042017.jpg,1
4,img_000052017.jpg,1


In [0]:
prob = []
for index, row in df.iterrows():
    #print(row['image_id'], row['has_oilpalm'])
    key = row['image_id']
    if key in d:
        p = d[key]
    else:
        p = d_[key]
        
    prob.append(p.cpu().detach().numpy())

In [0]:
prob[:10]

[array(0.97105145, dtype=float32),
 array(0.00329787, dtype=float32),
 array(0.99919766, dtype=float32),
 array(0.9548783, dtype=float32),
 array(0.00223055, dtype=float32),
 array(0.00126582, dtype=float32),
 array(0.00275381, dtype=float32),
 array(0.0004056, dtype=float32),
 array(0.9969722, dtype=float32),
 array(2.359375e-09, dtype=float32)]

In [0]:
ans = pd.Series(prob)

In [0]:
d = pd.read_csv('drive/My Drive/Kaggle/SampleSubmission.csv')
d['has_oilpalm'] = ans
d.head(11)

Unnamed: 0,image_id,has_oilpalm
0,img_000012018.jpg,0.97105145
1,img_000022018.jpg,0.003297872
2,img_000032017.jpg,0.99919766
3,img_000042017.jpg,0.9548783
4,img_000052017.jpg,0.0022305541
5,img_000062017.jpg,0.0012658185
6,img_000062018.jpg,0.0027538051
7,img_000122017.jpg,0.00040559805
8,img_000132018.jpg,0.9969722
9,img_000142018.jpg,2.359375e-09


In [0]:
d.to_csv('drive/My Drive/Kaggle/submission.csv', index=False)