In [1]:
!pip install efficientnet_pytorch

import matplotlib.pyplot as plt
import os 
import numpy as np
import torch
import datetime
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data.sampler import SubsetRandomSampler
from mlxtend.evaluate import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
from mlxtend.plotting import plot_confusion_matrix
from collections import OrderedDict 
import shutil
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from pathlib import Path
import sys


#debug mode
debug=True

#loaded_model_path=os.path.normpath(sys.argv[0])

loaded_model_path = os.getcwd()
load_model=os.path.join(loaded_model_path,'checkpoint_stage_I.pth')

csv_file_name="Classified_Images_stage_II.csv"
data_dir=r'dataset'

batch_size = 100
num_workers=0

In [2]:

class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [3]:
train_on_gpu = torch.cuda.is_available()
if debug==True:
    if not train_on_gpu:
        print('CUDA is not available.  Training on CPU ...')
    else:
        print('CUDA is available!  Training on GPU ...')



CUDA is available!  Training on GPU ...


In [4]:
paths_of_files = []
test_folder_path=os.path.join(os.path.join(data_dir,'Sorted_Images_I'),'test')
#test_product_defect = r'dataset\Sorted_Images_I\Product Defect'

if not os.path.exists(test_folder_path):
    os.makedirs(test_folder_path)
    
    #make a directory in test/test with images to be sent into...
    #get root, directory and files
    for root, direc, files in os.walk(data_dir):
        for file in files:
            paths_of_files.append(os.path.join(root, file))
    
    for i, f in enumerate(paths_of_files):
        old_image_path=paths_of_files[i]
        new_image_path =os.path.join(root, paths_of_files[i].split(os.sep)[-1])
        shutil.copy(old_image_path, new_image_path) 
    
    print (" {:d} images moved  --> ../Sorted_Images_I".format(len(paths_of_files)))
else:
    print('Already Exsists..')
	

test_dir = os.path.join(data_dir, 'Sorted_Images_I')
# classes are folders in each directory with these names
classes = ['Lot Code', 'Other', 'Package', 'Product Defect', 'Receipt']


Already Exsists..


In [5]:
test_transforms = transforms.Compose([transforms.Resize(255),
                                          transforms.CenterCrop(224),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])


test_data = ImageFolderWithPaths(test_dir, transform=test_transforms)

In [6]:
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
                                          num_workers=num_workers, shuffle=True)

In [7]:
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = checkpoint['model']
    model.load_state_dict(checkpoint['state_dict'])
    for parameter in model.parameters():
        parameter.requires_grad = False

    model.eval()
    return model

model = load_checkpoint(load_model)
print(model.parameters)

<bound method Module.parameters of DataParallel(
  (module): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (9): ReLU(inplace=True)
      (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (12): ReLU(inplace=True)
      (13): MaxPool2d

In [8]:
if train_on_gpu:
    model.cuda()

In [9]:
total_preds=np.array([])
total_labels=np.array([])

# track test loss 
# over 5  classes
test_loss = 0.0
df = pd.DataFrame(columns=['Predicted Class','Path'])


batch_number=0
Start_time=(datetime.datetime.now())
model.eval() # evaluation mode

# iterate over test data
for batch_number, (data, target, path) in enumerate(test_loader):
    print("Analyzing Batch {}".format(batch_number))
    #create a sub dataframe    
    sub_df = pd.DataFrame(columns=['Predicted Class','Path'])
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        data, target = data.cuda(), target.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    #debug step --- print('The output:{}'.format(output.shape))
    # calculate the batch loss
    #loss = criterion(output, target)
    
    # update  test loss 
    # test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1) 
   

   #%%     
    ########################################################
    ##############Write to a CSV File ######################
    ########################################################    
    
   
    for idx in np.arange(len(pred)): 

        #write to the CSV file
        sub_df.loc[idx,'Predicted Class'] = classes[pred[idx]]
        sub_df.loc[idx,'Path']= path[idx]
    
    #append sub_df to the main df
    df=df.append(sub_df, ignore_index=True)



End_time=(datetime.datetime.now())   

print('Time taken for Testing: {}'.format(End_time-Start_time))
print('-------------------------------')

Analyzing Batch 0
Analyzing Batch 1
Analyzing Batch 2
Analyzing Batch 3
Analyzing Batch 4
Analyzing Batch 5
Analyzing Batch 6
Analyzing Batch 7
Analyzing Batch 8
Analyzing Batch 9
Analyzing Batch 10
Analyzing Batch 11
Analyzing Batch 12
Analyzing Batch 13
Analyzing Batch 14
Analyzing Batch 15
Analyzing Batch 16
Analyzing Batch 17
Analyzing Batch 18
Analyzing Batch 19
Analyzing Batch 20
Analyzing Batch 21
Analyzing Batch 22
Analyzing Batch 23
Analyzing Batch 24
Analyzing Batch 25
Analyzing Batch 26
Analyzing Batch 27
Analyzing Batch 28
Analyzing Batch 29
Analyzing Batch 30
Analyzing Batch 31
Analyzing Batch 32
Analyzing Batch 33
Analyzing Batch 34
Analyzing Batch 35
Analyzing Batch 36
Analyzing Batch 37
Analyzing Batch 38
Analyzing Batch 39
Analyzing Batch 40
Analyzing Batch 41
Analyzing Batch 42
Analyzing Batch 43
Analyzing Batch 44
Analyzing Batch 45
Analyzing Batch 46
Analyzing Batch 47
Analyzing Batch 48
Analyzing Batch 49
Analyzing Batch 50
Analyzing Batch 51
Analyzing Batch 52
Ana

In [13]:
print(df[900:920])

    Predicted Class                                               Path
900  Product Defect  dataset\Sorted_Images_I\test\00P2E00001VEmm0UA...
901        Lot Code  dataset\Sorted_Images_I\test\00P2E00001R2tKhUA...
902  Product Defect  dataset\Sorted_Images_I\test\00P2E00001cWlFNUA...
903  Product Defect  dataset\Sorted_Images_I\test\00P2E00001VGQI2UA...
904  Product Defect  dataset\Sorted_Images_I\test\00P2E00001ZaIoJUA...
905  Product Defect  dataset\Sorted_Images_I\test\00P2E00001c1epRUA...
906  Product Defect  dataset\Sorted_Images_I\test\00P2E00001XmIafUA...
907  Product Defect  dataset\Sorted_Images_I\test\00P2E00001XmBz7UA...
908  Product Defect  dataset\Sorted_Images_I\test\00P2E00001dHe6JUA...
909  Product Defect  dataset\Sorted_Images_I\test\00P2E00001PqtkjUA...
910  Product Defect  dataset\Sorted_Images_I\test\00P2E00001baWSNUA...
911  Product Defect  dataset\Sorted_Images_I\test\00P2E00001echyzUA...
912  Product Defect  dataset\Sorted_Images_I\test\00P2E00001SpPMyUA...
913  P

In [14]:
for i, class_name in enumerate(classes):
   #make 5 folders 
    new_path = os.path.join(test_dir, class_name)
    if not os.path.exists(new_path):
        os.makedirs(new_path)
    
    print("{} of images were classified as {}".format(  len(df[df['Predicted Class']==class_name]) ,class_name))

print("Total classified images: {} ".format(len(df)))

676 of images were classified as Lot Code
18 of images were classified as Other
356 of images were classified as Package
4268 of images were classified as Product Defect
81 of images were classified as Receipt
Total classified images: 5399 


In [15]:
for i,row in df.iterrows():
    
    old_path=df.loc[i,'Path']    
    new_path=os.path.join(test_dir,df.loc[i,'Predicted Class'],df.loc[i,'Path'].split(os.sep)[-1])
    shutil.move(old_path, new_path)
    

#saves the processed data to data.csv file
df.to_csv(csv_file_name, sep='\t',index=False,  encoding='latin-1') 
print('***All images are sorted & "Classfied_Images.csv" was generated***')

#delete folder dataset\images\Sorted_Images_I\test
if not Path(test_folder_path).is_file():
    os.rmdir(test_folder_path)

***All images are sorted & "Classfied_Images.csv" was generated***
