In [1]:
import os
import cv2
import json
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
DSET_PATH = '../input/cassava-leaf-disease-classification/'
TRAIN_IMGS_PATH = 'train_images/'
TEST_IMGS_PATH = 'test_images/'

In [3]:
train_data = pd.read_csv(DSET_PATH+'train.csv')

In [4]:
train_data.head()

Unnamed: 0,image_id,label
0,1000015157.jpg,0
1,1000201771.jpg,3
2,100042118.jpg,1
3,1000723321.jpg,1
4,1000812911.jpg,3


In [5]:
config = {
    "MODEL": {
        "MODEL_NAME": "leafy",
        "MODEL_DESCRIPTION": "Classify Leaf Diseases",
        "MODEL_TYPE": "convolutional",
        "PROBLEM_TYPE": "classification",
        "MODEL_PARAMS": {
            "INPUT_SHAPE": [
                600,
                800,
                3
            ],
            "METRICS" : ["accuracy"],
            "OPTIMIZER" : {
                "type" : "adam",
                "params":{
                    "beta1" : 0.9,
                    "beta2" : 0.99
                }
            }
        },
        "CALLBACKS":[{
            "type" : "early_stopping",
            "params": {
                "epochs" : 2,
                "change_threshold" : 0.01
            }
        },{
            "type" : "tensorboard",
            "params" : {}
        },{
            "type" : "checkpoint",
            "params" : {}
        }]
    },
   "DATASET":{
      "SPLIT_DATA":True,
      "DATA_TYPE":"image",
      "VALID_SPLIT_SIZE":0.1,
      "DATA":{
         "INTERFACE_TYPE":"csv",
         "INTERFACE_FILE": "train.csv",
         "DATA_PATH": DSET_PATH,
         "DIRECTORIES":[TRAIN_IMGS_PATH],
         "FEATURES":"image_id",
         "TARGETS":"label"
      },
      "PREPROCESSES":[
         {
            "type":"resize",
            "params":{
               "target_size":[
                  600,
                  800
               ]
            }
         },
         {
            "type":"color_normalization",
            "params":{
               "calc_means": True
            }
         }
      ],
      "AUGMENTATIONS":[
         {
            "type":"rotate",
            "params":{
               "min_angle":20,
               "max_angle":45,
               "prob":0.4
            }
         },
         {
            "type":"translate",
            "params":{
               "x_shift":100,
               "y_shift":20
            }
         }
      ]
   },
    "HYPERPARAMETERS": {
        "TRAINING_BATCH_SIZE": 8,
        "NUM_EPOCHS": 100,
        "STEPS_PER_EPOCH": 0,
        "LEARNING_RATE": 1e-3,
        "MONITOR_METRIC": "val_loss",
        "SAVE_WEIGHTS": True,
        "EARLY_STOP_EPOCHS": 2,
        "DROPOUT_RATIO": 0.3,
        "LR_DECAY_ALPHA": 0.4
    },
    "CONFIG_INFO": {
        "LOG_DIR": "logs",
        "MODEL_IMAGE_PATH": "model_images",
        "CHECKPOINTS_PATH": "checkpoints"
    }
}

In [6]:
def resize(target_size):
    return

def color_normalization(calc_means=True, means=[]):
    return

def rotate(min_angle, max_angle):
    return

def translate(x_shift, y_shift):
    return

In [7]:
def get_features_labels_from_dirs(root, dirs, features, targets):
    if targets == '__dirname__':
        targets = dirs
    
    if dirs == '*':
        dirs = next(os.walk(root_path))[1]
    elif dirs == '':
        dirs = []
    
    return features, targets

In [8]:
class PREPROCESS:
    def __init__(self, config):
        self.processes = []
        for preprocess in config:
            self.processes.append(self.get_preprocess(preprocess.type)(**preprocess.params))
            
    def apply(self, imgs):
        for preprocess in self.processes:
            imgs = preprocess(imgs)
        return imgs
        

In [9]:
class DICTIONARY:
	def __init__(self, **response):
		for k,v in response.items():
			if isinstance(v, dict):
				self.__dict__[k] = DICTIONARY(**v)
			else:
				self.__dict__[k] = v


class CONFIG(DICTIONARY):
	def __init__(self, config, **kwargs):

		self.config = config

		for key, val in kwargs.items():
			config[key] = val

		super(CONFIG, self).__init__(**config)

In [10]:
config = CONFIG(config)

In [11]:
# DATA LOADER
class DATA_LOADER:
    def __init__(self, config):
        self.config = config
    
    def get_batch(self):
        while True:
            
        return
    
    def preprocess(self):
        return
    
    def augment(self):
        return

In [12]:
class DATA_FEEDER:
    def __init__(self, config, loader=None):
        self.config = config
        self.loader = loader if loader else self.get_loader(config.DATASET.DATA_TYPE)
        self.get_dset_info()
    
    def get_loader(self, dtype):
        if dtype == 'image':
            return IMAGE_LOADER(self.config)
        elif dtype == 'audio':
            return AUDIO_LOADER(self.config)
        elif dtype == 'text':
            return TEXT_LOADER(self.config)
        elif dtype == 'structured':
            return STRUCTURED_LOADER(self.config)
        else:
            print('No Default Loader Found For Given Data Type! Please Implement A Custom Data Loader Or Look At The Existing Loaders.')
            return None
        
    def get_next_batch(self):
        return self.loader.get_batch()
    
    # Getting indices and features and targets for the dataset.
    def get_dset_info(self):
        interface_type = self.config.DATASET.DATA.INTERFACE_TYPE
        split_data = self.config.DATASET.SPLIT_DATA
        
        # If the dataset info is given as a csv file
        if interface_type == 'csv':          
            # If dataset is to be split in train and valid sets.
            if split_data:
                fl = self.config.DATASET.DATA.INTERFACE_FILE
                root_path = self.config.DATASET.DATA.DATA_PATH
                df = pd.read_csv(root_path+fl)
                indices = np.arange(0, len(df))
                self.split_dset(indices)
            # If train and valid datasets are probided separately.
            else:
                train_fl = self.config.DATASET.DATA.TRAIN_DATA.INTERFACE_FILE
                train_root_path = self.config.DATASET.DATA.TRAIN_DATA.DATA_PATH
                valid_fl = self.config.DATASET.DATA.VALID_DATA.INTERFACE_FILE
                valid_root_path = self.config.DATASET.DATA.VALID_DATA.DATA_PATH
                
                self.train_indices = np.arange(0, len(pd.read_csv(train_root_path+train_fl)))
                self.valid_indices = np.arange(0, len(pd.read_csv(valid_root_path+valid_fl)))
                
        elif interface_type == 'dir':
            if split_data:
                root_path = self.config.DATASET.DATA.DATA_PATH
                dirs = self.config.DATASET.DATA.DIRECTORIES
                    
                features, targets = get_features_labels_from_dirs(root_path, dirs, self.config.DATASET.DATA.FEATURES, self.config.DATASET.DATA.TARGETS)
                indices = np.arange(0, len(features))
                self.split_dset(indices)
                
            else:
                # Train Data
                train_root_path = self.config.DATASET.DATA.TRAIN_DATA.DATA_PATH
                train_dirs = self.config.DATASET.DATA.TRAIN_DATA.DIRECTORIES

                if train_dirs == '*':
                    train_dirs = next(os.walk(train_root_path))[1]
                elif train_dirs == '':
                    train_dirs = []
                    
                # Valid Data
                valid_root_path = self.config.DATASET.DATA.VALID_DATA.DATA_PATH
                valid_dirs = self.config.DATASET.DATA.VALID_DATA.DIRECTORIES

                if valid_dirs == '*':
                    valid_dirs = next(os.walk(valid_root_path))[1]
                elif valid_dirs == '':
                    valid_dirs = []
                    
                train_features, train_targets = get_features_labels_from_dirs(train_root_path, train_dirs, self.config.DATASET.DATA.FEATURES, self.config.DATASET.DATA.TARGETS)
                valid_features, valid_targets = get_features_labels_from_dirs(valid_root_path, valid_dirs, self.config.DATASET.DATA.FEATURES, self.config.DATASET.DATA.TARGETS)
                
                self.train_indices = np.arange(0, len(self.train_features))
                self.valid_indices = np.arange(0, len(self.valid_features))
            
        elif interface_type == 'text':
            print('text')
        elif interface_type == 'json':
            print('json')
        elif interface_type == 'xml':
            print('xml')
        
    
    def split_dset(self, indices):
        dtype = self.config.DATASET.DATA_TYPE
        
        if dtype != 'timeseries':
            np.random.shuffle(indices)

        split_index = len(indices)-int(len(indices)*self.config.DATASET.VALID_SPLIT_SIZE)
        self.train_indices = indices[:split_index]
        self.valid_indices = indices[split_index:]
        

In [13]:
fig=plt.figure(figsize=(20, 20))
columns = 2
rows = 2
for i in range(1, columns*rows +1):
    img = cv2.imread(DSET_PATH+TRAIN_IMGS_PATH+train_imgs[np.random.randint(0, len(train_imgs))])
    print(img.shape)
    fig.add_subplot(rows, columns, i)
    plt.imshow(img)
    
plt.show()

NameError: name 'train_imgs' is not defined

<Figure size 1440x1440 with 0 Axes>