In [1]:
import tensorflow as tf

print("TensorFlow Version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

TensorFlow Version: 2.13.0
Num GPUs Available:  0


In [2]:
# Basic Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Machine Learning Libraries
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Deep Learning Libraries
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D,MaxPool2D,Dense,Flatten,BatchNormalization,Dropout
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


2023-10-07 19:17:52.899275: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2023-10-07 19:17:52.899344: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-10-07 19:17:52.899362: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-10-07 19:17:52.899495: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-10-07 19:17:52.899729: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
# Download dataset on https://www.kaggle.com/datasets/raman77768/movie-classifier

# Load the dataset
movies = pd.read_csv('movie_dataset/train.csv') # Dataset containing genres with movie poster IDs

In [4]:
X = [] # Image data
# movies.shape[0] = number of iteration required to convert all pictures to numpy array

# Normalizing the images to pixel values
for i in tqdm(range(movies.shape[0])):
    path = 'movie_dataset/Images/' + movies['Id'][i] + '.jpg'

    # Load the image and resize it to 350x350 pixels
    image = load_img(path, target_size=(350,350,3))

    # Convert the image to array
    image_array = img_to_array(image)

    # Normalize the image
    image_array = image_array/255

    # Append the image to the list
    X.append(np.array(image_array))

# Convert the list to numpy array
X = np.array(X).astype('float32')

100%|██████████| 7254/7254 [00:10<00:00, 696.92it/s]


In [5]:
y = movies.drop(['Id','Genre'],axis=1)
y = y.to_numpy()

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.1,random_state=40)

In [7]:
model = Sequential()
model.add(Conv2D(16,kernel_size=(3,3),activation='relu',input_shape=X_train[0].shape))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.3))

model.add(Conv2D(32,kernel_size=(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.3))


model.add(Conv2D(64,kernel_size=(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.4))

model.add(Flatten())

model.add(Dense(128,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(25,activation='sigmoid'))

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 348, 348, 16)      448       
                                                                 
 batch_normalization (Batch  (None, 348, 348, 16)      64        
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 174, 174, 16)      0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 174, 174, 16)      0         
                                                                 
 conv2d_1 (Conv2D)           (None, 172, 172, 32)      4640      
                                                                 
 batch_normalization_1 (Bat  (None, 172, 172, 32)      1

---

## Optimization and Adjustment

In [28]:
movies.columns[2:]

Index(['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror',
       'Music', 'Musical', 'Mystery', 'N/A', 'News', 'Reality-TV', 'Romance',
       'Sci-Fi', 'Short', 'Sport', 'Thriller', 'War', 'Western'],
      dtype='object')

In [19]:
from sklearn.utils.class_weight import compute_class_weight

# Assuming y_train is a 2D array of shape (n_samples, n_labels)
n_labels = y_train.shape[1]
class_weights = {}

for i in range(n_labels):
    # Extract the column for label i and compute class weights
    y_train_label_i = y_train[:, i]
    weights = compute_class_weight('balanced', classes=[0, 1], y=y_train_label_i)
    
    # Store the weights in a dictionary
    class_weights[i] = {0: weights[0], 1: weights[1]}

# Convert class weights to sample weights
sample_weights = np.ones(y_train.shape)

for i in range(n_labels):
    sample_weights[:, i] = np.where(y_train[:, i] == 1, class_weights[i][1], class_weights[i][0])

# Lower the Drama class weight by 50%
sample_weights[:, 7] *= 0.5
# Lower the Romance class weight by 75%
sample_weights[:, 18] *= 0.25


In [12]:
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduced_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)
optimizer = Adam(learning_rate=1e-4)

model.compile(loss='binary_crossentropy',optimizer=optimizer,metrics=['accuracy'])

---

In [13]:
# Save the best model with ModelCheckpoint
from tensorflow.keras.callbacks import ModelCheckpoint
# model.compile(optimizer=tf.keras.optimizers.legacy.Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Define a ModelCheckpoint callback
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

In [14]:
# use this for general model
history = model.fit(X_train,y_train,epochs=3,validation_data=(X_test,y_test), batch_size=32, callbacks=[checkpoint, reduced_lr])

Epoch 1/3
Epoch 2/3


  saving_api.save_model(


Epoch 3/3
