## Importing necessary Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from keras.preprocessing.image import img_to_array, array_to_img
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization, Conv2D, MaxPool2D

## Reading Data from .csv file

In [None]:
data=pd.read_csv("E:/Semester 6/IE 406 - 4.0 - Machine Learning/Project/codes/train.csv")
data.shape

In [None]:
data.head()

# Image pre-processing
## since image size is different from all images we need to preprocess them and assign a fix width and height to each of them
Here, we are resizing each image into dimension of 150 X 101 X 3.

In [None]:
img_width=100
img_height=100

x=[]

for i in tqdm(range(data.shape[0])):

  path = 'E:/Semester 6/IE 406 - 4.0 - Machine Learning/Project/codes/Images/' +data['Id'][i] + '.jpg'
  pic = image.load_img(path,target_size=(img_height, img_width, 3)) 
  pic = image.img_to_array(pic)
  pic = pic/255.0
  x.append(pic)

X=np.array(x)

In [None]:
X.shape

An example is shown below:

In [None]:
plt.imshow(X[11])
print("Genre:", data['Genre'][11])
plt.show()

In [None]:
y=data.drop(['Id','Genre'],axis=1)
y=y.values
y.shape

## Spliting data into training and validation

In [None]:
n=5000
X_train, X_val, y_train, y_val = train_test_split(X[:n], y[:n], test_size = 0.15, random_state = 0)
print("Image size =", X_train[0].shape)

# Building our custom CNN architecture

In [None]:
model = Sequential()

model.add(Conv2D(16, (3, 3), activation='relu', input_shape = X_train[0].shape))
model.add(BatchNormalization())
model.add(MaxPool2D(2, 2))
model.add(Dropout(0.2))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2, 2))
model.add(Dropout(0.3))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2, 2))
model.add(Dropout(0.4))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2, 2))
model.add(Dropout(0.5))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(25, activation='sigmoid'))

In [None]:
model.summary()

In [None]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [None]:
#history = model.fit(X_train, y_train, epochs=10, validation_data = (X_val, y_val))
history = model.fit(X_train, y_train, epochs=10, validation_data = (X_val, y_val))

# Learning Curve

In [None]:
def plot_learningCurve(history,epoch):

    epoch_range=range(1,epoch+1)
    
    plt.plot(epoch_range,history.history['accuracy'])
    plt.plot(epoch_range,history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train','Val'],loc='upper left')
    plt.show()
    
    
    plt.plot(epoch_range,history.history['loss'])
    plt.plot(epoch_range,history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train','Val'],loc='upper left')
    plt.show()

In [None]:
plot_learningCurve(history,10)

# Testing Data

In [None]:
n_test = 100
X_test = X[n:n + n_test]
y_test = y[n:n + n_test]

pred = model.predict(np.array(X_test))
pred.shape

# Accuracy

In [None]:
def accuracy_score(y_test, pred):
    value = 0

    for i in range(0, len(pred)):
        first3_index = np.argsort(pred[i])[-3:]
        correct = np.where(y_test[i] == 1)[0]
        flag=1

        for j in first3_index:
            if j in correct:
                if flag==1:
                    value += 1
                    flag=0

    acc = value/len(pred)

    print("Accuracy =", acc*100, "%")

In [None]:
accuracy_score(y_test, pred)

# Testing image outside from dataset

In [None]:
def predict_genre(path):
    
    img_width=100
    img_height=100

    pic=image.load_img(path, target_size=(img_height, img_width, 3)) 
    plt.imshow(pic)

    pic=image.img_to_array(pic)
    pic=pic/255.0
    pic=pic.reshape(1, img_height, img_width, 3)

    classes=data.columns[2:]
    y_prob=model.predict(pic)
    top_3= np.argsort(y_prob[0])[-3:]

    print("Predicted Genre are as follows:")
    print()

    for i in range(3):
        print("     ", classes[top_3[i]])

In [None]:
predict_genre('E:/Semester 6/IE 406 - 4.0 - Machine Learning/Project/codes/extra/phir_hera_pheri.jpeg')

In [None]:
predict_genre('E:/Semester 6/IE 406 - 4.0 - Machine Learning/Project/codes/extra/toystoryposter.jpg')

# Tranfer learning
### We even tested on cover of books; however, since features of book covers are different than movie posters, it is not always right.

In [None]:
predict_genre('E:/Semester 6/IE 406 - 4.0 - Machine Learning/Project/codes/extra/gone_girl.jpeg')

In [None]:
predict_genre('E:/Semester 6/IE 406 - 4.0 - Machine Learning/Project/codes/extra/the_fault_in_our_stars.jpeg')

# END of CODE