# Importing the relevant Libraries

In [0]:
import keras as keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, InputLayer
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import keras.backend as K
import os
from tqdm import tqdm
%matplotlib inline

#Loading and normalizing data

In [0]:
train = pd.read_csv('Multi_Label_dataset/train.csv')

diff=[]

for i in list(train.drop(['Id', 'Genre'],axis=1)):
  d = list(train.drop(['Id', 'Genre'],axis=1)[i].value_counts())
  diff.append([i, np.abs(d[0] - d[1])])

selected_classes = pd.DataFrame(diff).sort_values(1)

train_image = []
for i in tqdm(range(train.shape[0])):
    img = image.load_img('Multi_Label_dataset/Images/'+train['Id'][i]+'.jpg',target_size=(224,224,3))
    img = image.img_to_array(img)
    img = img/255
    train_image.append(img)
X = np.array(train_image)
y = train.drop(['Id', 'Genre'],axis=1)

# Functions for compiling a pretarined model as well a function for classwise data selection

In [0]:
def get_model(freeze=True, tpu=True):
  model = Sequential()
  model.add(VGG16(include_top=False, input_shape=(224,224,3)))
  model.add(Flatten())
  model.add(Dense(128, activation='relu'))
  model.add(Dropout(0.3))
  model.add(Dense(64, activation='relu'))
  model.add(Dropout(0.3))
  model.add(Dense(1, activation='sigmoid'))
  
  if freeze:
    model.layers[0].trainable = False
  
  if tpu:
    model = get_tpu_model(model)

  model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
  return model

  def get_tpu_model(model):
    if 'COLAB_TPU_ADDR' not in os.environ:
        print('ERROR: Not connected to a TPU runtime')
        return model
    else:
        TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
        print ('TPU address is', tpu_address)

        tpu_model = tf.contrib.tpu.keras_to_tpu_model(
            model,
            strategy=tf.contrib.tpu.TPUDistributionStrategy(
              tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)))
        return tpu_model

def get_training_data(class_name):
  X_train, X_test, y_train, y_test = train_test_split(X, np.array(y[class_name]), random_state=42, test_size=0.2, stratify=np.array(y[class_name]))
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=42, test_size=0.2, stratify=y_train)

  return X_train, X_val, X_test, y_train, y_val, y_test

# Training seperate models for each of the classes accounting for class imbalance and ensuring that testing and validation class distributions are similar to the training distribution

In [0]:
from sklearn.metrics import f1_score, label_ranking_average_precision_score, hamming_loss


for c in tqdm(list(selected_classes[0])):
  X_train, X_val, X_test, y_train, y_val, y_test = get_training_data(c)
  model = get_model()
  
  model.fit(X_train, y_train, epochs=20, validation_data=(X_val, y_val), batch_size=32)
  pred = model.predict(X_val)

  print("Classwise f1_score")
  for i in list(pd.DataFrame(np.round(y_val))):
    print(f1_score(pd.DataFrame(np.round(pred))[i], pd.DataFrame(np.round(y_val))[i]))

  print("label_ranking_average_precision_score")
  print(label_ranking_average_precision_score(np.round(pred), y_val))

  print("hamming_loss")
  print(hamming_loss(np.round(pred), y_val))

  model.save(c+'_model.h5')