In [40]:
from misc import *
from classify_models import vgg16_model
from misc import utils
import numpy as np

from datatypes import ImageTraining as it
from sklearn.utils.class_weight import compute_class_weight

##### Goal

The goal of this notebook is to show how we can quickly iteratate and test different partitions of the data using the generators I made this summer and the models Ben has been using in his own iterations. The reason I opt to use these rather than our current method is because 
* less repeated code
* more easily repeatable since we don't have to copy and paste new 'train/' and 'validate/' folders
* Most of all, I want to try unique groupings that maintain a little more order in the sense that related models (control, experimental) are side by side. 

#### File Sets

More Information on Generator Tutorial

First, we need to define our structure of files. I have my image files all stored in <br> ../Severity_final  
which is not in the repo. I then have one folder for each disease and healthy then another folder for each severity of the diseases. You should be able to infer the structure from the code below, but please ask me if you have questions. 

In [1]:
# prefix = '../Severities_Final'
# prefix = '../Severity_cropped'

def produce_files(prefix):
    cbb_files = [prefix + '/cbb-levels/cbb_2/',
                prefix + '/cbb-levels/cbb_3/',
                prefix + '/cbb-levels/cbb_4/',
                prefix + '/cbb-levels/cbb_5/']


    cgm_files = [prefix + '/cgm-levels/cgm_2/',
                prefix + '/cgm-levels/cgm_3/',
                prefix + '/cgm-levels/cgm_4/',
                prefix + '/cgm-levels/cgm_5/']


    cmd_files = [prefix + '/cmd-levels/cmd_2/',
                prefix + '/cmd-levels/cmd_3/',
                prefix + '/cmd-levels/cmd_4/',
                prefix + '/cmd-levels/cmd_5/']


    cbsd_files = [prefix + '/cbsd-levels/cbsd_2/',
                 prefix + '/cbsd-levels/cbsd_3/',
                 prefix + '/cbsd-levels/cbsd_4/',
                 prefix + '/cbsd-levels/cbsd_5/']


    healthy_images = [prefix + '/healthy_1/']

    return cbb_files, cgm_files, cmd_files, cbsd_files, healthy_images
    

## Example 1: Healthy vs CBSD (Uncropped)

### Defining Label_Dict

More information in Generator Tutorial IPYNB

The source of how I make the different "partitions" of files is in my label dict. The label_dict is a dictionary object where the key is an integer and value is a list of directories that include photos for that label.

In [45]:
uncropped_prefix = '../Severities_Final/'
cbb_files, cgm_files, cmd_files, cbsd_files, healthy_images = produce_files(uncropped_prefix)

label_dict = {1: healthy_images, 0: cbsd_files}
num_classes = 2
class_names = ['cbsd','healthy']

In [6]:
paths, labels = utils.get_paths_labels(label_dict)

Found 1474 examples with label 1
Found 1743 examples with label 0


In [8]:
batch_size = 16
target_size = (224, 224)

itrain = it.ImageTraining(label_dict)
traingen, testgen = itrain.get_train_test_generators(batch_size=batch_size, target_size=target_size)

Found 1474 examples with label 1
Found 1743 examples with label 0
partitioning data with train size 2573 and test size 643


In [28]:
steps_test = testgen.n // batch_size
steps_train = traingen.n // batch_size
print('steps in train generator:',steps_train)
print('steps in test generator:',steps_test)

class_weight = compute_class_weight(class_weight='balanced',
                                    classes=np.unique(traingen.classes),
                                    y=traingen.classes)

steps in train generator: 160
steps in test generator: 40


In [None]:
model = vgg16_model.get_transfer_model(num_classes=num_classes)

In [30]:
history = model.fit_generator(generator=traingen,
                                  epochs=5,
                                  steps_per_epoch=steps_train,
                                  class_weight=class_weight,
                                  validation_data=testgen,
                                  validation_steps=steps_test)

## Example 2: Healthy vs CBSD (Cropped)

In [46]:
uncropped_prefix = '../Severity_cropped/'
cbb_files, cgm_files, cmd_files, cbsd_files, healthy_images = produce_files(uncropped_prefix)

label_dict = {1: healthy_images, 0: cbsd_files}
num_classes = 2
class_names = ['cbsd','healthy']

#### Repeat same code as example one except this time the photo source is changed

## Example 3: Healthy vs CBSD vs CGM (Uncropped)

In [37]:
uncropped_prefix = '../Severities_Final/'
cbb_files, cgm_files, cmd_files, cbsd_files, healthy_images = produce_files(uncropped_prefix)

e3_label_dict = {1: healthy_images, 0: cbsd_files, 2: cgm_files}
num_classes = 3
class_names = ['cbsd','healthy', 'cgm']

In [38]:
paths, labels = utils.get_paths_labels(e3_label_dict)

Found 1474 examples with label 1
Found 1743 examples with label 0
Found 722 examples with label 2


In [41]:
batch_size = 16
target_size = (224, 224)

itrain = it.ImageTraining(e3_label_dict)
traingen, testgen = itrain.get_train_test_generators(batch_size=batch_size, target_size=target_size)

Found 1474 examples with label 1
Found 1743 examples with label 0
Found 722 examples with label 2
partitioning data with train size 3151 and test size 787


In [42]:
steps_test = testgen.n // batch_size
steps_train = traingen.n // batch_size
print('steps in train generator:',steps_train)
print('steps in test generator:',steps_test)

class_weight = compute_class_weight(class_weight='balanced',
                                    classes=np.unique(traingen.classes),
                                    y=traingen.classes)
class_weight

steps in train generator: 196
steps in test generator: 49


array([0.74810066, 0.89848874, 1.8171857 ])

In [None]:
model = vgg16_model.get_transfer_model(num_classes=num_classes)

In [None]:
history = model.fit_generator(generator=traingen,
                                  epochs=5,
                                  steps_per_epoch=steps_train,
                                  class_weight=class_weight,
                                  validation_data=testgen,
                                  validation_steps=steps_test)

### Continue with iterations and photo types as much as you want

In [47]:
#TODO add in all evaluation and graphing functions from misc/ben_utils.py (pasted code that
# needs to be integrated to fit current file structure format) 