In [1]:
import pandas as pd
import numpy as np
from PIL import Image, ImageOps
import matplotlib.pyplot as plt
from random import randint
import glob
import os
import random
import datetime

from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


import torchvision
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler

from scipy import misc
from scipy import ndimage
from skimage import feature

%matplotlib inline

In [2]:
class CamaraModelDataset(Dataset):

    def __init__(self, path2data, transforms=None, is_train = False, is_test=False):
        
        self.transform = transforms 
        
        self.X= []
        self.y = []
        count = 0
        labels = {}
        for subdir, dirs, files in os.walk(path):
            if subdir.split('/')[-1] != '':
                labels[count] = subdir.split('/')[-1]
                path_folder = os.path.join(subdir, '*.jpg')
                files = glob.glob(path_folder) 
                self.X.extend(files)
                self.y.extend(np.full((1, len(files)), count, dtype=int)[0])
                
                count = count + 1
                
        self.labels = labels
            
    def __getitem__(self, index):
        
        path = self.X[index]
        label = self.y[index]

        with open(path, 'rb') as f:
            flbase = os.path.basename(path)
            
            with Image.open(f) as img:
                 image = img.convert('RGB')
                    
        if self.transform is not None:
            image = self.transform(image)

        return image, label

    def __len__(self):
        return len(self.X)

In [3]:
class RGB2Gray(object):
    
    def __call__(self, image):
        image = np.array(image)
        return np.dot(image[...,:3], [0.299, 0.587, 0.114])

In [4]:
eps=1e-7
numPoints = 24
radius = 3

In [5]:
class LocalBinaryPatterns(object):

        def __call__(self, image):

            lbp = feature.local_binary_pattern(image, numPoints,
                        radius, method="uniform")

            (hist, _) = np.histogram(lbp.ravel(),
                bins=np.arange(0, numPoints + 3),
                range=(0, numPoints + 2))

            # normalize the histogram
            hist = hist.astype("float")
            hist /= (hist.sum() + eps)

            return hist

In [6]:
img_width = img_height=128
nb_channels = 3

In [7]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((img_width, img_height)),
        RGB2Gray(),
        LocalBinaryPatterns(),
    ])
}

In [8]:
path = 'data/flowers/'

In [9]:
dsets = {'train': CamaraModelDataset(path, transforms=data_transforms['train'],  is_train = True),
         'valid': CamaraModelDataset(path, transforms=data_transforms['train'],  is_train = True),
        }

In [10]:
random_seed = 3
shuffle = True
valid_size = 0.2
batch_size = 128

In [11]:
num_train = len(dsets['train'])
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))

if shuffle:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

In [12]:
dloader ={'train': torch.utils.data.DataLoader(dsets['train'], batch_size=batch_size, sampler=train_sampler),
         'valid': torch.utils.data.DataLoader(dsets['valid'], batch_size=batch_size, sampler=valid_sampler)
         } 

In [13]:
labels = dsets['train'].labels
print('Labels of the dataset: {}'.format(labels))

Labels of the dataset: {0: 'daisy', 1: 'dandelion', 2: 'rose', 3: 'sunflower', 4: 'tulip'}


In [14]:
def CamaraPhotos(dsets, labels):
    
    rand_img = random.randrange(0, len(dsets))
    img, y = dsets[rand_img]
    img = img.numpy()
    img = img.transpose((1, 2, 0))  # Channel Last
    plt.imshow(img)
    
    plt.title('Label: {}'.format(labels[int(y)]))

    plt.axis('off')

In [15]:
'''
plt.figure(figsize=(20,5))
for i in range(0, 3):
    plt.subplot(1,3,i+1)

    CamaraPhotos(dsets['train'], labels)
'''

"\nplt.figure(figsize=(20,5))\nfor i in range(0, 3):\n    plt.subplot(1,3,i+1)\n\n    CamaraPhotos(dsets['train'], labels)\n"

In [16]:
class LogisticRegression(nn.Module):
    def __init__(self, input_size, num_classes):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_size, num_classes)
    
    def forward(self, x):
        out = self.linear(x)
        return out

In [17]:
#input_size = nb_channels*img_width*img_height
input_size = numPoints + 2
num_classes = len(labels)

model = LogisticRegression(input_size, num_classes)

In [18]:
n_epochs = 5
learningRate = 0.5

In [None]:
criterion = torch.nn.CrossEntropyLoss(size_average=True)
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)

In [None]:
start_time = datetime.datetime.now()

for epoch in range(n_epochs):
    i0 = 0
    for i, (features, labels) in enumerate(dloader['train']):
    
        #features = Variable(features.view(-1, nb_channels*img_width*img_height))
        features = Variable(features).float()
        labels = Variable(labels)

        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = model(features)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        i0+=1
        print('\rProcessing Batchess: {}/{}'.format(i0, len(dloader['train'])), end='')

    print ('\nEpoch: [%d/%d], Loss: %.4f' 
           % (epoch+1, n_epochs, loss.data[0]))
    
print('Execution time {0:.2f} s'.format((datetime.datetime.now() - start_time).total_seconds()))

Processing Batchs: 21/28

### Evaluate the model

In [None]:
def predict(dset_loaders, model, use_gpu=False):

    predictions = []
    labels_lst = []
    ii_n = len(dset_loaders)
    start_time = datetime.datetime.now()

    for i, (inputs, labels) in enumerate(dset_loaders):
        
        if use_gpu:
            inputs = inputs.cuda()
            labels = labels.cuda()
            
        inputs = Variable(inputs).float()
        predictions.append(model(inputs).data)
        labels_lst.append(labels)

        print('\rpredict: {}/{}'.format(i, ii_n - 1), end='')
    print(' ok')
    print('Execution time {0:.2f} s'.format((datetime.datetime.now()- start_time).total_seconds()))
    if len(predictions) > 0:
        return {'pred': torch.cat(predictions, 0), 'true':torch.cat(labels_lst, 0) }

In [None]:
result_train = predict(dloader['train'], model)
result_valid = predict(dloader['valid'], model)

In [None]:
def getPrediction(result):
    _, predicted = torch.max(result['pred'], 1)
    result['pred'] = predicted.cpu().numpy()
    result['true'] = result['true'].cpu().numpy()
    return result 

In [None]:
result_train = getPrediction(result_train)
result_valid = getPrediction(result_valid)

In [None]:
correct_train = (result_train['true'] == result_train['pred']).sum()
correct_valid = (result_valid['true'] == result_valid['pred']).sum()

In [None]:
print('Train: {}/{}'.format(correct_train, len(dloader['train'])*batch_size))
print('Valid: {}/{}'.format(correct_valid, len(dloader['valid'])*batch_size))

In [None]:
plt.figure(figsize=(20,4))
for plotIndex, badIndex in enumerate(misclassifiedIndexes[0:5]):
    plt.subplot(1, 5, plotIndex + 1)
    plt.imshow(np.reshape(X_test[badIndex], (20,20)), cmap=plt.cm.gray)
    plt.axis('off')
    plt.title('Predicted: {}, Actual: {}'.format(predictions[badIndex], y_test[badIndex]), fontsize = 15)

### Image processing draft

In [None]:
image = misc.face()

In [None]:
def image_filtering(mode, image):
    
    if mode == 'gaussian':
        filtered = ndimage.gaussian_filter(image, sigma=3)
    elif mode == 'median':
        #A median filter preserves better the edges:
        #Median filter: better result for straight boundaries (low curvature):

        filtered = ndimage.median_filter(image, 2)
        
    return filtered

In [None]:
filtered = image_filtering('gaussian', image)

In [None]:
plt.figure(figsize=(20,5))
plt.subplot(1,3,1)
plt.imshow(image, cmap=plt.cm.gray)
plt.axis('off')
plt.subplot(1,3,2)
plt.imshow(filtered, cmap=plt.cm.gray)
plt.axis('off')
plt.subplot(1,3,3)
plt.imshow(image - filtered, cmap=plt.cm.gray)
plt.axis('off')

In [None]:
image_gray = rgb2gray(image)

In [None]:
# return the histogram of Local Binary Patterns


In [None]:
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
plt.imshow(image_gray, cmap=plt.cm.gray)
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(lbp, cmap=plt.cm.gray)
plt.axis('off')