# Assignment 2 - Cascading Meta Learning
 
**Authors:**

1.   Liav Bachar 205888472
2.   Naor Kolet 205533060


# 0. Imports

In [136]:
import pandas as pd
import numpy as np



# TensorFlow
import tensorflow as tf

from tensorflow.keras.layers import Flatten #Dense, Conv2D, Dropout, MaxPool2D
from tensorflow.keras.layers import Input#BatchNormalization, Concatenate
from tensorflow.keras.models import Model, load_model
# from tensorflow.keras.utils import to_categorical
# from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications import VGG16, ResNet50V2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Scikit-learn
from sklearn.model_selection import train_test_split
# from sklearn.metrics import confusion_matrix, accuracy_score
# from sklearn.model_selection import StratifiedKFold
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import log_loss
 
# Plots
# import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

# Misc.
from scipy.io import loadmat
import os
import random
import joblib
import cv2
from glob import glob
from tqdm import tqdm_notebook as tqdm

%matplotlib inline

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Download Data

In [3]:
# Another dataset? https://www.tensorflow.org/tutorials/load_data/images
if not os.path.exists(r'./datasets/'):
    !mkdir ./datasets
    !wget 'https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz' -P './datasets/'
    !tar -xf ./datasets/102flowers.tgz -C ./datasets/
    
#     !wget 'https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102segmentations.tgz' -P './datasets/'
    
    !wget 'https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat' -P './datasets/'
    
labels = loadmat('./datasets/imagelabels.mat')['labels'].reshape(-1)

## Load Images

In [28]:
def load_image(image_path):
    img = cv2.imread(image_path)/255
    img_shape = img.shape
    del img
    
    return img_shape

In [13]:
images_path = glob('./datasets/jpg/*')

In [29]:
shapes = {load_image(img) for img in images_path}
print(shapes)

{(501, 530, 3), (501, 541, 3), (715, 500, 3), (568, 501, 3), (699, 500, 3), (500, 599, 3), (547, 500, 3), (535, 500, 3), (773, 501, 3), (611, 500, 3), (500, 724, 3), (768, 500, 3), (731, 500, 3), (599, 500, 3), (500, 735, 3), (583, 500, 3), (501, 743, 3), (501, 503, 3), (756, 500, 3), (667, 501, 3), (501, 648, 3), (832, 500, 3), (500, 550, 3), (680, 500, 3), (500, 561, 3), (804, 500, 3), (652, 500, 3), (500, 686, 3), (500, 697, 3), (551, 501, 3), (501, 705, 3), (530, 500, 3), (688, 500, 3), (518, 500, 3), (594, 500, 3), (767, 500, 3), (500, 754, 3), (751, 500, 3), (582, 500, 3), (500, 765, 3), (500, 659, 3), (501, 533, 3), (815, 500, 3), (501, 667, 3), (500, 648, 3), (500, 793, 3), (739, 500, 3), (500, 542, 3), (501, 522, 3), (663, 500, 3), (508, 501, 3), (651, 500, 3), (635, 500, 3), (503, 500, 3), (884, 500, 3), (500, 727, 3), (500, 716, 3), (500, 872, 3), (501, 746, 3), (500, 610, 3), (500, 755, 3), (750, 500, 3), (500, 784, 3), (565, 500, 3), (500, 667, 3), (500, 689, 3), (500, 678

In [6]:
split_seed = 42
# 50% 25% 25%
train_paths, val_tst_paths, train_labels, val_tst_labels = train_test_split(images_path, labels, train_size=0.5, random_state=split_seed, shuffle=True, stratify=labels)
val_paths, tst_paths, val_labels, tst_labels = train_test_split(val_tst_paths, val_tst_labels, train_size=0.5, random_state=split_seed, shuffle=True, stratify=val_tst_labels)
len(train_paths),len(val_paths),len(tst_paths),

(4094, 2047, 2048)

## Image Data Generator

In [165]:
train_df, val_df, test_df = pd.DataFrame({'filename': train_paths, 'class':train_labels}), pd.DataFrame({'filename': val_paths, 'class':val_labels}), pd.DataFrame({'filename': tst_paths, 'class':tst_labels})
train_df, val_df, test_df = train_df.astype(str), val_df.astype(str), test_df.astype(str)


data_gen_train = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    rescale=1./255)

data_gen_val = ImageDataGenerator(rescale=1./255)

data_gen_test = ImageDataGenerator(rescale=1./255)

# flow_from_dataframe

In [166]:
def datagen_flow(data_gen, df, batch_size, resize_shape, classes):
    return data_gen.flow_from_dataframe(df, batch_size=batch_size, target_size=resize_shape, seed=SEED, validate_filenames=False, classes=classes)

In [170]:
classes = list(np.unique(labels).astype(str))
data_loader_train = datagen_flow(data_gen_train, train_df, 32, (224,224), classes)
data_loader_val = datagen_flow(data_gen_val, val_df, 32, (224,224), classes)
data_loader_test = datagen_flow(data_gen_test, test_df, 32, (224,224), classes)

Found 4094 non-validated image filenames belonging to 102 classes.
Found 2047 non-validated image filenames belonging to 102 classes.
Found 2048 non-validated image filenames belonging to 102 classes.


In [156]:
next(data_gen_train)

(array([[[[0.18239248, 0.2079222 , 0.08811671],
          [0.18211056, 0.20735836, 0.08896249],
          [0.18182863, 0.20679452, 0.08980826],
          ...,
          [0.23342219, 0.21233319, 0.09120187],
          [0.2190052 , 0.19849901, 0.07768755],
          [0.22000447, 0.20227319, 0.08698365]],
 
         [[0.17274772, 0.19245492, 0.0845621 ],
          [0.17162004, 0.19076337, 0.08315249],
          [0.17049234, 0.18907185, 0.08174287],
          ...,
          [0.27934837, 0.25434795, 0.10674928],
          [0.26832753, 0.24336746, 0.0966552 ],
          [0.26509625, 0.24112155, 0.09617415]],
 
         [[0.159003  , 0.17468928, 0.07641077],
          [0.15815724, 0.17384352, 0.0766927 ],
          [0.15731147, 0.17299774, 0.07697462],
          ...,
          [0.32225284, 0.3017586 , 0.11072445],
          [0.3217042 , 0.29747137, 0.10863381],
          [0.3100024 , 0.2824108 , 0.09413042]],
 
         ...,
 
         [[0.0627451 , 0.07058824, 0.0509804 ],
          [0.06274

# Pre-trained models

In [148]:
def get_feature_extractor(base_model, width=224, height=224, channel=3):
    
    model = base_model(input_shape=(width, height, channel), include_top=False)
    
    for layer in model.layers:
        layer.trainable = False
    
    inp = model.input
    X = model.output
    
    return Model(model.input, model.output, name=base_model.__name__)


In [149]:
vgg_fe = get_feature_extractor(VGG16)
resent_fe = get_feature_extractor(ResNet50V2)