In [1]:
import pandas as pd
import numpy as np
import os 

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets
import torchvision.utils as utils
import torch.optim as optim

import matplotlib.pyplot as plt

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [16]:
'''
8 Class classification problem 

Initially work on trying to classifiy based on google search scraped pictures of these lightbulbs.
The problem with this is that they nearly always have a white backgroud and are not realistic of a phone camera version
of a picture of the light bulb
'current search: B15 light bulbs'

Classes
E27 light bulbs
E14 light bulbs
B22 light bulbs
B15 light bulbs
GU10 light bulbs
G4
G9
MR16


Need to also buy some examples of the light bulbs as a way of building up my own dataset that might be more realistic.

'''

"\n8 Class classification problem \n\nInitially work on trying to classifiy based on google search scraped pictures of these lightbulbs.\nThe problem with this is that they nearly always have a white backgroud and are not realistic of a phone camera version\nof a picture of the light bulb\n'current search: B15 light bulbs'\n\nClasses\nE27 light bulbs\nE14 light bulbs\nB22 light bulbs\nB15 light bulbs\nGU10 light bulbs\nG4\nG9\nMR16\n\n\nNeed to also buy some examples of the light bulbs as a way of building up my own dataset that might be more realistic.\n\n"

In [18]:
# Used to rename all the file names to avoid really long file names and make it easier to know which bulb it is. 

def file_rename():
    # Function to rename multiple files 
    for bulb_type in ['B15','B22','E14', 'E27', 'G4', 'G9','GU10', 'MR16']:

        location = r"C:\Users\awalker8\Documents\COVID_Furlow\bulb_class\bulb_data"
        location = location + '\\' + bulb_type + '\\'
        print(location)
        for count, filename in enumerate(os.listdir(location)): 
            dst = bulb_type + str(count) + ".jpg"
            src = location + filename 
            dst = location + dst 
            # rename() function will 
            # rename all the files 
            os.rename(src, dst) 

# Datasets

In [14]:
# Create a pytorch dataset, and then dataloader to allow for mini-batch processing. 
data_transform = transforms.Compose([
        transforms.RandomSizedCrop(128),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
        ])
    
light_bulbs = datasets.ImageFolder(root=r'C:\Users\awalker8\Documents\COVID_Furlow\bulb_class\bulb_data',
                                           transform=data_transform)
    

# If i use more than 0 num_workers get an issue with file truncation??
dataloader = torch.utils.data.DataLoader(light_bulbs,
                                             batch_size=64, shuffle=True,
                                             num_workers=0)

In [22]:
dataloader.dataset

Dataset ImageFolder
    Number of datapoints: 4489
    Root location: C:\Users\awalker8\Documents\COVID_Furlow\bulb_class\bulb_data
    StandardTransform
Transform: Compose(
               RandomSizedCrop(size=(128, 128), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [32]:
train_ds, dev_ds, test_ds = torch.utils.data.random_split(dataloader.dataset, (3500,500,489))

In [20]:
# for x, y in dataloader:
#     print(x.shape)
#     x.shape[0]
#     out = np.reshape(x.numpy()[0,:,:,:],newshape =[x.shape[0],x.shape[2],x.shape[3],x.shape[1]])
#     print(out.shape)
#     print(out[0,:,:,:].shape)
#     plt.imshow(out[0,:,:,:])
#     plt.show()
    
#     #plt.imshow(np.reshape(x.numpy()[0,:,:,:],newshape =[1,64,64,3]))
#     #plt.show()
#     print(y) # image label
#     break

# CNN

In [26]:
# Defining the weights_init function that takes as input a neural network m and that will initialize all its weights.
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [27]:
# Define the classifier

class bulb_model(nn.Module):
    
    def __init__(self):
        super(bulb_model, self).__init__()
    
        self.main = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=4, stride=2, padding=1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False),
            # add a max pooling layer
            nn.MaxPool2d(kernel_size =3 , stride=1, padding=0),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace = True), 
            nn.Conv2d(128, 256, 4, 2, 1, bias = False), 
            nn.MaxPool2d(kernel_size =2 , stride=1, padding=0),
            nn.BatchNorm2d(256), 
            nn.LeakyReLU(0.2, inplace = True), 
            nn.Conv2d(256, 512, 4, 2, 1, bias = False),
            nn.BatchNorm2d(512), 
            nn.LeakyReLU(0.2, inplace = True), 
            nn.Conv2d(512, 10, 4, 1, 0, bias = False), 
            nn.Flatten(),
            nn.Linear(160, 512),
            nn.Dropout(p=0.2),
            nn.Linear(512, 128),
            nn.Dropout(p=0.2),
            nn.Linear(128, 10),
            nn.Softmax()
        )
        
    def forward(self, input):
        output = self.main(input)
        return output

In [28]:
classifier_1 = bulb_model()
classifier_1.apply(weights_init)

bulb_model(
  (main): Sequential(
    (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): MaxPool2d(kernel_size=3, stride=1, padding=0, dilation=1, ceil_mode=False)
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): LeakyReLU(negative_slope=0.2, inplace=True)
    (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): MaxPool2d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
    (8): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): LeakyReLU(negative_slope=0.2, inplace=True)
    (10): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (11): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): Leaky

# Train classifier

In [29]:
# create an optimiser for the classifier
optimiser_bulb = optim.Adam(classifier_1.parameters(), lr=0.002, betas = (0.9,0.999))
# Create a criterion. Lets use cross entropy loss. 
criterion = nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')

In [17]:
# for epoch in range(1):

#     for data, labels in dataloader:
#         i = 1
#         # push the image through the network
#         prediction = classifier_1(data)
#         print(prediction)
#         print(prediction.shape)
#         print(labels)
#         if i == 1:
#             break

In [34]:
for epoch in range(1):
    i = 1
    for data, labels in dataloader:
        print(data)
        print(labels)
        break

tensor([[[[2.2489, 2.2489, 2.2489,  ..., 1.2557, 1.2385, 1.2043],
          [2.2489, 2.2489, 2.2489,  ..., 1.4612, 1.3927, 1.3242],
          [2.2489, 2.2489, 2.2489,  ..., 1.3413, 1.2728, 1.2043],
          ...,
          [2.2318, 2.2318, 2.2318,  ..., 2.2489, 2.2489, 2.2489],
          [2.2318, 2.2318, 2.2318,  ..., 2.2489, 2.2489, 2.2489],
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489]],

         [[2.4286, 2.4286, 2.4286,  ..., 1.5707, 1.5532, 1.5182],
          [2.4286, 2.4286, 2.4286,  ..., 1.7808, 1.7108, 1.6408],
          [2.4286, 2.4286, 2.4286,  ..., 1.6583, 1.5882, 1.5182],
          ...,
          [2.4111, 2.4111, 2.4111,  ..., 2.4286, 2.4286, 2.4286],
          [2.4111, 2.4111, 2.4111,  ..., 2.4286, 2.4286, 2.4286],
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286]],

         [[2.6400, 2.6400, 2.6400,  ..., 1.9080, 1.8905, 1.8557],
          [2.6400, 2.6400, 2.6400,  ..., 2.1171, 2.0474, 1.9777],
          [2.6400, 2.6400, 2.6400,  ..., 1

In [30]:
for epoch in range(1):
    i = 1
    for data, labels in train_ds:
     
        # push the image through the network
        prediction = classifier_1(data)
        
        error = criterion(prediction, labels)
        print('[%d/%d][%d/%d] Loss: %.4f' % (epoch, 25, i, len(train_ds),error))
  
        # zero grad
        classifier_1.zero_grad()
        error.backward()
        optimiser_bulb.step()
        
        #if i == 10:
        #   break
            
        i = i + 1

RuntimeError: Expected 4-dimensional input for 4-dimensional weight 64 3 4 4, but got 3-dimensional input of size [3, 128, 128] instead

# Would be good to be able to show images from the loader whenever we fancy. Why can't i get this to work :()

In [7]:
# Helper function to show a batch
def show_bulb_batch(sample_batched):
    
    """Show image for a batch of samples."""
    images_batch = sample_batched[0][1]
    batch_size = len(images_batch)
    im_size = images_batch.size(2)
    grid_border_size = 2

    grid = utils.make_grid(images_batch)
    plt.imshow(grid.numpy().transpose((1, 2, 0)))

    plt.title('Batch from dataloader')


In [8]:
# for i_batch, sample_batched in enumerate(dataloader):
#     print(i_batch)
#     print(sample_batched[0][1].shape)
#     #ToPILImage(sample_batched[0][1])
#     #to_img(sample_batched[0][1])
    
#     if i_batch ==3:
#         break

In [9]:
import matplotlib.pyplot as plt

In [None]:
for i_batch, sample_batched in enumerate(dataloader):
    print(i_batch, sample_batched[0].size())

    # observe 4th batch and stop.
    if i_batch == 1:
        plt.figure()
        show_bulb_batch(sample_batched)
        plt.axis('off')
        plt.ioff()
        plt.show()
        break