In [7]:
import imageio
from skimage import color
from PIL import Image
import matplotlib.pyplot as plt
import math
import numpy as np
from skimage import morphology
import scipy
import random

In [8]:
image_len = 28
image_wid = 28
n_augmentation = 50


In [75]:
from skimage.transform import resize

def rescale_down_sample(img, new_len, new_wid):
    img = resize(img, (new_len, new_wid))
    assert img.shape == (new_len,new_wid), "Image is " + str(img.shape)
    return img

def rotate_image(img, theta):
    pil_img = Image.fromarray(np.uint8((1-img)*255), "L")
    rotated_img = pil_img.rotate(theta)
    return (255 - np.array(rotated_img))/255.

def unzoom_image(img, unzoom):
    assert unzoom <= 1,"unzoom must be <= 1"
    length, width = img.shape
    pad_len_inf = math.floor((28/unzoom-length)/2)
    pad_len_sup = math.ceil((28/unzoom-length)/2)
    pad_wid_inf = math.floor((28/unzoom-width)/2)
    pad_wid_sup = math.ceil((28/unzoom-width)/2)
    padded_img = np.pad(img, ((pad_len_inf, pad_len_sup),(pad_wid_inf, pad_wid_sup)), constant_values = 1)
    return scipy.ndimage.zoom(padded_img, unzoom, cval=1)

def translate(img, dx, dy):
    length, width = img.shape
    while min(list(set(img[:, width - abs(dx):].flatten()))) < 0.98:
        dx -=1
        if dx == 1:
            break
    while min(list(set(img[length-abs(dy):, :].flatten()))) <0.98:
        dy -=1
        if dy == 1:
            break
    x_moved = np.roll(img, dx, 1)
    return np.roll(x_moved, dy, 0)

def apply_all(img, theta, unzoom, dx, dy):
    return resize(translate(unzoom_image(rotate_image(img, theta),unzoom),dx,dy), (28, 28))

In [77]:
def generate_data(path = "operators/", image_len = 28, image_wid = 28, n_augmentation = 50):
    """
    path : Root path to images folder
    image_len, image_wid:  parameters of the out images
    n_augmentation: number os generated samples for each images

    output: a dictionary with contains as key the signs, each key has a list of narrays.
    """
    plus_image = color.rgb2gray(imageio.imread(path + "+.png"))
    minus_image = color.rgb2gray(imageio.imread(path + "-.png"))
    multiply_image = color.rgb2gray(imageio.imread(path + "*.png"))
    divide_image = color.rgb2gray(imageio.imread(path + "%.png"))
    equal_image = color.rgb2gray(imageio.imread(path + "=.png"))

    plus_image_ds = rescale_down_sample(plus_image, image_len, image_wid)
    minus_image_ds = rescale_down_sample(minus_image, image_len, image_wid)
    multiply_image_ds = rescale_down_sample(multiply_image, image_len, image_wid)
    divide_image_ds = rescale_down_sample(divide_image, image_len, image_wid)
    equal_image_ds = rescale_down_sample(equal_image, image_len, image_wid)


    augmented_data_set = {'+':[],'-':[],'*':[],'/':[],'=':[]}
    for i in range(n_augmentation):
        theta = random.randint(0,360) if random.uniform(0,1) > 0.2 else 0
        unzoom = random.uniform(0.5, 1) if random.uniform(0,1) > 0.2 else 1
        dx = random.randint(1, 5) if random.uniform(0,1) > 0.2 else 1
        dy = random.randint(1, 5) if random.uniform(0,1) > 0.2 else 1
        augmented_plus = apply_all(plus_image_ds,theta, unzoom, dx, dy)
        augmented_minus = apply_all(minus_image_ds,theta, unzoom, dx, dy)
        augmented_multiply = apply_all(multiply_image_ds,theta, unzoom, dx, dy)
        augmented_divide = apply_all(divide_image_ds,theta, unzoom, dx, dy)
        augmented_equal = apply_all(equal_image_ds,theta, unzoom, dx, dy)
        augmented_data_set['+'].append(augmented_plus)
        augmented_data_set['-'].append(augmented_minus)
        augmented_data_set['*'].append(augmented_multiply)
        augmented_data_set['/'].append(augmented_divide)
        augmented_data_set['='].append(augmented_equal)

    return augmented_data_set

In [234]:
data_operators = generate_data(path = "operators/", image_len = 28, image_wid = 28, n_augmentation = 2000)

------------------------------------------------------------------------------------------------

In [235]:
def data_labeled(data):
    data_labeled = np.zeros((len(data['+'])*5, 28, 28))
    labels = np.zeros((len(data['+'])*5))
    classes = {'+':10, '-': 11, '*':12, '/':13, '=':14}
    
    shift = 0
    for i in data.keys():
        for k in range(len(data[i])):
            data_labeled[k+shift] = data[i][k]
            labels[k+shift] = classes[i]
        #We need a shift in order to avoid overwritting samples, each new key we start at zero    
        shift += 100
    return data_labeled, labels

In [236]:
data_oper, labels_oper = data_labeled(data_operators)

In [238]:
print("data_oper shape: ", data_oper.shape)
print("labels_oper shape: ", labels_oper.shape)

data_oper shape:  (10000, 28, 28)
labels_oper shape:  (10000,)


In [239]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_oper, labels_oper, test_size=0.2, random_state=42)

In [240]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape
print("X_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape)

print("X_test shape: ", X_test.shape)
print("y_test shape: ", y_test.shape)

X_train shape:  (8000, 28, 28)
y_train shape:  (8000,)
X_test shape:  (2000, 28, 28)
y_test shape:  (2000,)


----------------------------------------------------------------------------

In [241]:
import gzip
import numpy as np
import matplotlib.pyplot as plt
import os

def extract_data(filename, image_shape, image_number):
    with gzip.open(filename) as bytestream:
        bytestream.read(16)
        buf = bytestream.read(np.prod(image_shape) * image_number)
        data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
        data = data.reshape(image_number, image_shape[0], image_shape[1])
    return data


def extract_labels(filename, image_number):
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * image_number)
        labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
    return labels

In [242]:
image_shape = (28, 28)
train_set_size = 60000
test_set_size = 10000

data_part2_folder = os.path.join("/Users/user/Desktop/UPMC/EPFL/PatternRecognition/iapr-2020-master/data/lab-03-data", 'part2')

train_images_path = os.path.join(data_part2_folder, 'train-images-idx3-ubyte.gz')
train_labels_path = os.path.join(data_part2_folder, 'train-labels-idx1-ubyte.gz')
test_images_path = os.path.join(data_part2_folder, 't10k-images-idx3-ubyte.gz')
test_labels_path = os.path.join(data_part2_folder, 't10k-labels-idx1-ubyte.gz')

train_images = extract_data(train_images_path, image_shape, train_set_size)
test_images = extract_data(test_images_path, image_shape, test_set_size)
train_labels = extract_labels(train_labels_path, train_set_size)
test_labels = extract_labels(test_labels_path, test_set_size)

In [243]:
train_images.shape, test_images.shape, train_labels.shape, test_labels.shape

((60000, 28, 28), (10000, 28, 28), (60000,), (10000,))

In [244]:
train_imgs = np.concatenate((train_images, X_train), axis=0)
train_labels = np.concatenate((train_labels, y_train), axis=0)

test_imgs = np.concatenate((test_images, X_test), axis=0)
test_labels =  np.concatenate((test_labels, y_test), axis=0)

In [245]:
print("Train images shape: ", train_imgs.shape)
print("Train labels shape: ", train_labels.shape)
print("Test images shape: ", test_imgs.shape)
print("Test labels shape: ", test_labels.shape)

Train images shape:  (68000, 28, 28)
Train labels shape:  (68000,)
Test images shape:  (12000, 28, 28)
Test labels shape:  (12000,)


In [246]:
labels = []
for i in train_labels:
    if i not in labels:
        labels.append(i)

In [247]:
print("Class labels are: ", labels)
print("Number of labels: ", len(labels))


Class labels are:  [5.0, 0.0, 4.0, 1.0, 9.0, 2.0, 3.0, 6.0, 7.0, 8.0, 14.0, 11.0, 12.0, 10.0, 13.0]
Number of labels:  15


In [248]:
import torch
from torch import optim, nn
from torch.nn import functional as F
import numpy as np

In [249]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
train_images_norm = scaler.fit_transform(train_imgs.reshape(-1, 28*28)).reshape(-1, 28, 28)
test_images_norm = scaler.transform(test_imgs.reshape(-1, 28*28)).reshape(-1, 28, 28)

In [250]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 15)
    
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.leaky_relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.leaky_relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
    
    def train_(self, n_epochs, learning_rate, batch_size):
        self.train()
        opt = optim.SGD(self.parameters(), lr=learning_rate)
        losses = []
        for n in range(n_epochs):
            sum_loss = 0
            for b in range(0, len(train_images_norm), batch_size):
                predictions = self(torch.Tensor(train_images_norm).narrow(0, b, batch_size).view(-1, 1, 28, 28))
                loss = F.nll_loss(predictions, torch.LongTensor(train_labels).narrow(0, b, batch_size))
                sum_loss = sum_loss + loss.item()
                self.zero_grad()
                loss.backward()
                opt.step()
            losses.append(sum_loss)
        return losses
            
    def test_(self, batch_size):
        nb_errors = 0
        for b in range(0, len(test_images_norm), batch_size):
            predictions = self(torch.Tensor(test_images_norm).view(-1, 1, 28, 28).narrow(0, b, batch_size))
            predictions_classes = torch.argmax(predictions, dim = 1)
            for k in range(batch_size):
                if torch.Tensor(test_labels)[b+k].item() != predictions_classes[k].item():
                    nb_errors += 1
        return 1 - nb_errors*1.0/len(test_images)

In [251]:
accuracies = []
for iter_ in range(5):
    cnn = CNN()
    train_losses = cnn.train_(10, 0.2, 100)
    accuracy = cnn.test_(100)
    accuracies.append(accuracy)
mean_accuracy = sum(accuracies)/5.0
print("Accuracy for CNN on test set with 10 epochs averaged over 5 runs : " + str(mean_accuracy))

Accuracy for CNN on test set with 10 epochs averaged over 5 runs : 0.79402


In [254]:
torch.save(cnn.state_dict(), "cnn1_weight")

In [255]:
model = CNN()
model.load_state_dict(torch.load("cnn1_weight"))
model.eval()

CNN(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=15, bias=True)
)