In [9]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm # a smart progress bar as an output
import cv2

Preparing classes based on subfolders' names

In [3]:
class_names = ['Normal', 'Diseased']
class_names_label = {class_name:i for i, class_name in enumerate(class_names)}

nb_classes = len(class_names)

IMAGE_SIZE = (28, 28)

**Loading images from folders**

In [17]:
#Written by Tosin-doc/cupsvsspoonsvsplateCNN_2.py, with updates in some lines
def load_data(url):
    
    datasets = [url]    
    
    # Iterate through training and test sets
    for dataset in datasets:
        
        images = []
        labels = []
        
        print("Loading {}".format(dataset))
        
        # Iterate through each folder corresponding to a category
        for folder in os.listdir(dataset):
            label = class_names_label[folder]
            
            # Iterate through each image in our folder
            for file in tqdm(os.listdir(os.path.join(dataset, folder))):
                
                # Get the path name of the image
                img_path = os.path.join(os.path.join(dataset, folder), file)
                
                # Open and resize the img
                image = cv2.imread(img_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                image = cv2.resize(image, IMAGE_SIZE) 
                
                # Append the image and its corresponding label to the output
                images.append(image)
                labels.append(label)
                
        images = np.array(images, dtype = 'float32')
        labels = np.array(labels, dtype = 'int32')   
      

    return images,labels

In [5]:
# Extracting image info then convert them into a Dataframe
def convertToDataFrame(x):
  df_data = pd.DataFrame(x.tolist())
  final_df =pd.DataFrame()
  for i in df_data.columns:  
    df_col= pd.DataFrame(df_data[i].tolist())
    final_df = pd.concat([final_df,df_col],axis=1)

  return final_df
  
# Adding class label to the DataFrame
def addLable(y,df):
  label = pd.DataFrame(y)
  df['label'] = label
  return df



**Images as a zip file should be uploaded to Colab files**

In [None]:
!unzip '/content/Training.zip'

In [None]:
!unzip '/content/Testing.zip'

**Preparing training images**

In [10]:
x_training, y_training= load_data('/content/Training')

Loading /content/Training


100%|██████████| 39334/39334 [00:02<00:00, 18796.71it/s]
100%|██████████| 46781/46781 [00:02<00:00, 17981.16it/s]


In [11]:
df_train = convertToDataFrame(x_training)
df_train = addLable(y_training,df_train)

In [12]:
df_train.to_csv('Training.csv')

**Preparing testing images**

In [13]:
x_testing, y_testing= load_data('/content/Testing')

Loading /content/Testing


100%|██████████| 19557/19557 [00:01<00:00, 16718.91it/s]
100%|██████████| 17882/17882 [00:02<00:00, 7454.75it/s]


In [14]:
df_test = convertToDataFrame(x_testing)
df_test = addLable(y_testing,df_test)

In [15]:
df_test.to_csv('Testing.csv')

In [16]:
df_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,...,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86110,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
86111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
86112,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
86113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
