In [22]:
import json
import wandb
from wandb.keras import WandbCallback
from pathlib import Path
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator

In [23]:
run = wandb.init()

config = run.config

bb_params = ['height', 'width', 'x', 'y']
config.width = 224 # or 299
config.height = 224 # or 299

W&B Run: https://app.wandb.ai/univai-ss2019/SoLong/runs/0ne3fc0l
Call `%%wandb` in the cell containing your training loop to display live results.


In [24]:
def convert_bb(bb, size):
    bb = [bb[p] for p in bb_params]
    conv_x = (config.width / size[0])
    conv_y = (config.height / size[1])
    bb[0] = bb[0]*conv_y
    bb[1] = bb[1]*conv_x
    bb[2] = max(bb[2]*conv_x, 0)
    bb[3] = max(bb[3]*conv_y, 0)
    return bb


def create_rect(bb, color='red'):
    return plt.Rectangle((bb[2], bb[3]), bb[1], bb[0], color=color, fill=False, lw=3)

def to_plot(img):
    if K.image_dim_ordering() == 'tf':
        return np.rollaxis(img, 0, 1).astype(np.uint8)
    else:
        return np.rollaxis(img, 0, 3).astype(np.uint8)

def plotfish(img):
    plt.imshow(to_plot(img))
    
def show_bb(i):
    bb = val_bbox[i]
    plotfish(val[i])
    plt.gca().add_patch(create_rect(bb))
    

In [25]:
import glob
import numpy as np
folders = glob.glob('train/*')
from sklearn.model_selection import train_test_split
anno_classes = ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'NOF']
annodict = dict(enumerate(anno_classes))
dictanno = {v: k for k, v in annodict.items()}

In [26]:
records = []
for folder in folders:
    files = glob.glob(folder+"/*.jpg")
    labels = [e.split('/')[1] for e in files]
    train, valid = train_test_split(range(len(files)), test_size=0.2, random_state=1983)
    mask = np.zeros(len(files))
    for j in train:
        mask[j] = 1
    for i, label in enumerate(labels):
        d = dict(label=dictanno[label], file=files[i], train=mask[i])
        records.append(d)
import pandas as pd

from keras_preprocessing.image import ImageDataGenerator

In [27]:
import json
bb_json = {}

for c in anno_classes:
    j = json.load(open('bbox/{}_labels.json'.format(c), 'r'))
    for l in j:
        if 'annotations' in l.keys() and len(l['annotations'])>0:
            bb_json[l['filename'].split('/')[-1]] = sorted(
                l['annotations'], key=lambda x: x['height']*x['width'])[-1]
print(bb_json.keys())

FileNotFoundError: [Errno 2] No such file or directory: 'bbox/ALB_labels.json'

In [21]:
count=0
tot=0
keys = bb_json.keys()
records2 = []
for r in records:
    tot +=1
    name = r['file'].split('/')[-1]
    if not name in keys:
        count += 1
        #print(r['file'])
        r['bbox'] = None
    else:
        bbox = bb_json[name]
        r['x'] = bbox['x']
        r['y'] = bbox['y']
        r['width'] = bbox['width']
        r['height'] = bbox['height']
        records2.append(r)
print("nobbox", count, tot)
print("rec5", records2[:5])
df = pd.DataFrame.from_records(records2)
dftrain = df[df.train==1][['file', 'label', 'x', 'y', 'width', 'height']]
dfvalid = df[df.train==0][['file', 'label', 'x', 'y', 'width', 'height']]

dftrain.to_csv("tv_train.csv", index=False, header=True)
dfvalid.to_csv("tv_valid.csv", index=False, header=True)

FileNotFoundError: [Errno 2] No such file or directory: 'bbox/alb_labels.json'

In [None]:
img_width, img_height = 224,224

input_shape = (img_width,img_height,3)

train_data_dir = '../SoLong/kagfish/train'

config.n_train_samples = 200
config.n_validation_samples = 200
config.epochs = 10
batch_size = 64


# this is the augmentation configuration we will use for validation:
# only rescaling

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.1,
        zoom_range=0.1,
        rotation_range=10.,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        validation_split = 0.2)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size = (config.width, config.height),
        batch_size = batch_size,
        shuffle = True,
        subset = "training",
        classes = anno_classes,
        class_mode = 'categorical')


validation_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size = (config.width, config.height),
        batch_size = batch_size,
        shuffle = True,
        subset = "validation",
        classes = anno_classes,
        class_mode = 'categorical')


model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(8))
model.add(Activation('softmax'))
model.summary()
#exit()
sgd = SGD(lr = 1e-4)
model.compile(loss='categorical_crossentropy', optimizer=sgd,
              metrics=['accuracy'])

#best_model_file = "./weights.h5"
#best_model = ModelCheckpoint(best_model_file, monitor='val_acc', verbose = 1, save_best_only = True)

model.fit_generator(
        train_generator,
        samples_per_epoch = config.n_train_samples,
        nb_epoch = config.epochs,
        validation_data = validation_generator,
        nb_val_samples = config.n_validation_samples,
        callbacks = [WandbCallback()])