In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import torch
from torchvision import datasets, transforms, models
from torch.autograd import Variable
import numpy as np
from matplotlib import pyplot as plt

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

# Show a single Image

In [None]:
from PIL import Image
data_dir = '../input/training-d'
name = os.listdir(data_dir)[1]
Image.open(data_dir+"/"+name)

#### Check size

In [None]:
pic = Image.open(data_dir+"/"+name)
pic.size

# Load csv file

In [None]:
name = pd.read_csv('../input/training-d.csv')
print(len(name))
print(name.columns)

Drop unnecessary columns

In [None]:
name = name.drop(columns=['original filename', 'scanid', 'num', 'database name original',
       'database name'])

Print first 10 values

In [None]:
name.iloc[:10, 0:]

# Check again

In [None]:
t = name.iloc[10]
print("Label: ", t[0])
Image.open(data_dir+"/"+t[1])

# Data loader
prepare datasets first

In [None]:
import torch
from torch.utils.data import Dataset

class Dataset(Dataset):
    def __init__(self, df, root, transform=None):
        self.data = df
        self.root = root
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        item = self.data.iloc[index]
        
        path = self.root + "/" + item[1]
        image = Image.open(path)
        label = item[0]
        
        if self.transform is not None:
            image = self.transform(image)
            
        return image, label

prepare data

In [None]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
                                transforms.RandomRotation(10),
                                transforms.RandomHorizontalFlip(),
                                transforms.ColorJitter(),
                                transforms.ToTensor(),
                                transforms.Normalize(mean, std)])

test_transform = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize(mean, std)])

train_data  = Dataset(name, data_dir, train_transform)
test_data = Dataset(name, data_dir, test_transform)

print("Trainig Samples: ",len(train_data))

### Prepare loader

Batch Size: 128

Split percentage: 20%

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler

#batch size
batch_size=64

# split data 20% for testing
test_size = 0.2
# obtain training indices that will be used for validation
num_train = len(train_data)

# mix data
# index of num of train
indices = list(range(num_train))
# random the index
np.random.shuffle(indices)
split = int(np.floor(test_size * num_train))
# divied into two part
train_idx, test_idx = indices[split:], indices[:split]

# define the sampler
train_sampler = SubsetRandomSampler(train_idx)
test_sampler = SubsetRandomSampler(test_idx)

# prepare loaders
train_loader = torch.utils.data.DataLoader(
    train_data, batch_size=batch_size,
    sampler=train_sampler)

test_loader = torch.utils.data.DataLoader(
    test_data, batch_size=batch_size,
    sampler=test_sampler)

print("Train dataloader:{}".format(len(train_loader)))
print("Test dataloader:{}".format(len(test_loader)))

In [None]:
classes = list()
for i in range(10):
    classes.append(str(i))

classes

In [None]:
!wget https://raw.githubusercontent.com/Iamsdt/60daysofudacity/master/day22/Helper.py

# Visualize Data

In [None]:
import Helper
Helper.visualize(test_loader, classes, num_of_image=5)

# Create model

In [None]:
model = models.densenet161(pretrained=True)
print(model.classifier)

In [None]:
model = Helper.freeze_parameters(model)

### Chnage classifier

In [None]:
import torch.nn as nn
from collections import OrderedDict

classifier = nn.Sequential(
  nn.Linear(in_features=2208, out_features=2048),
  nn.ReLU(),
  nn.Dropout(p=0.4),
  nn.Linear(in_features=2048, out_features=1024),
  nn.ReLU(),
  nn.Dropout(p=0.3),
  nn.Linear(in_features=1024, out_features=10),
  nn.LogSoftmax(dim=1)  
)
    
model.classifier= classifier
model.classifier

# Define loss and optimizer

In [None]:
# Gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#move tensor to default device
model.to(device)

criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=1e-4)

# Training

In [None]:
epoch = 5+10+5

In [None]:
model, train_loss, test_loss = Helper.train(model, train_loader, test_loader, epoch, optimizer, criterion)

### Load best model

In [None]:
model = Helper.load_latest_model(model)

#### Check for overfitting

In [None]:
Helper.check_overfitted(train_loss, test_loss)

# Testing

In [None]:
Helper.test(model, test_loader)

In [None]:
def test_per_class(model, test_loader, criterion, classes):

    total_class = len(classes)

    test_loss = 0.0
    class_correct = list(0. for i in range(total_class))
    class_total = list(0. for i in range(total_class))

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.eval()  # prep model for evaluation

    for data, target in test_loader:
        # Move input and label tensors to the default device
        data, target = data.to(device), target.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update test loss
        test_loss += loss.item() * data.size(0)
        # convert output probabilities to predicted class
        _, pred = torch.max(output, 1)
        # compare predictions to true label
        correct = np.squeeze(pred.eq(target.data.view_as(pred)))
        # calculate test accuracy for each object class
        for i in range((5)):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

    # calculate and print avg test loss
    test_loss = test_loss / len(test_loader.dataset)
    print('Test Loss: {:.6f}\n'.format(test_loss))

    for i in range(total_class):
        if class_total[i] > 0:
            print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                str(i), 100 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

    print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))

In [None]:
test_per_class(model, test_loader, criterion, classes)

# Test some single Image

In [None]:
from PIL import Image

def test(file):
    file = Image.open(file).convert('RGB')
    img = test_transform(file).unsqueeze(0)
    with torch.no_grad():
        out = model(img.to(device))
        proba = torch.exp(out)
        top_p, top_class = proba.topk(1, dim=1)
        print(f"Predicted Label: {top_class.item()}")
        plt.imshow(np.array(file))
        plt.show()

In [None]:
from PIL import Image
from matplotlib import pyplot as plt
data_dir = '../input/testing-d'
name = os.listdir(data_dir)[4]
file = data_dir+"/"+name
print(file)

test(file)

In [None]:
def test_all(file):
    file = Image.open(file).convert('RGB')
    img = test_transform(file).unsqueeze(0)
    with torch.no_grad():
        out = model(img.to(device))
        proba = torch.exp(out)
        top_p, top_class = proba.topk(1, dim=1)
    return top_class.item()

In [None]:
submission = [['ImageId', 'Label']]
data_dir = '../input/testing-d'
li = os.listdir(data_dir)
for i in li:
    file = data_dir+"/"+i
    pred = test_all(file)
    submission.append([i, pred])

print("Complete")

In [None]:
import csv

with open('predection.csv', 'w') as submissionFile:
    writer = csv.writer(submissionFile)
    writer.writerows(submission)
    
print('predection Complete!')

In [None]:
out = pd.read_csv('predection.csv')
out