In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Neural Networks

# Import

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report


# Load

In [3]:
# Load dataset
train = pd.read_csv('/content/drive/MyDrive/ML dataset /train.csv')
test = pd.read_csv('/content/drive/MyDrive/ML dataset /test.csv')

# Split

In [4]:
X_train = train.drop(columns=[col for col in train.columns if isinstance(col, str) and col.startswith('genre_')])
y_train = train[[col for col in train.columns if isinstance(col, str) and col.startswith('genre_')]]
X_test = test.drop(columns=[col for col in test.columns if isinstance(col, str) and col.startswith('genre_')])
y_test = test[[col for col in test.columns if isinstance(col, str) and col.startswith('genre_')]]

In [5]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(130629, 1515)
(130629, 19)
(32658, 1515)
(32658, 19)


# Training

In [6]:
# Convert training and test datasets to int8
X_train = X_train.astype(np.int8)
y_train = y_train.astype(np.int8)
X_test = X_test.astype(np.int8)
y_test = y_test.astype(np.int8)

In [7]:
# Define the model architecture
model = Sequential([
    Dense(512, activation='relu', input_shape=(1515,)),
    Dropout(0.3),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(19, activation='sigmoid')  # 19 output units for multilabel classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Compile the model with binary cross-entropy loss and Adam optimizer
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(
    X_train, y_train,
    epochs=100,  # Set high enough to allow potential improvement
    batch_size=128,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping]
)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


Epoch 1/100
[1m1021/1021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 25ms/step - accuracy: 0.2221 - loss: 0.3110 - val_accuracy: 0.2615 - val_loss: 0.2603
Epoch 2/100
[1m1021/1021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 25ms/step - accuracy: 0.2628 - loss: 0.2652 - val_accuracy: 0.2780 - val_loss: 0.2583
Epoch 3/100
[1m1021/1021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 24ms/step - accuracy: 0.2676 - loss: 0.2622 - val_accuracy: 0.2816 - val_loss: 0.2569
Epoch 4/100
[1m1021/1021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 24ms/step - accuracy: 0.2761 - loss: 0.2605 - val_accuracy: 0.2769 - val_loss: 0.2558
Epoch 5/100
[1m1021/1021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 24ms/step - accuracy: 0.2781 - loss: 0.2599 - val_accuracy: 0.2859 - val_loss: 0.2551
Epoch 6/100
[1m1021/1021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 23ms/step - accuracy: 0.2819 - loss: 0.2590 - val_accuracy: 0.2838 - val_loss: 0.255

In [9]:
# Predict probabilities for the test set
y_pred_proba = model.predict(X_test)

# Convert probabilities to binary predictions (threshold = 0.5)
y_pred = np.round(y_pred_proba)
# Extract the genre column names and remove the 'genre_' prefix
genre_columns = [col.replace('genre_', '') for col in train.columns if isinstance(col, str) and col.startswith('genre_')]
# Print classification report
print(classification_report(y_test, y_pred, target_names=genre_columns))

[1m1021/1021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step
                 precision    recall  f1-score   support

         Action       0.68      0.04      0.08      2988
      Adventure       0.60      0.01      0.01      1681
      Animation       0.63      0.03      0.05      2603
         Comedy       0.71      0.01      0.02      8162
          Crime       0.00      0.00      0.00      2220
    Documentary       0.60      0.03      0.06      5998
          Drama       0.58      0.25      0.35     12017
         Family       0.00      0.00      0.00      1683
        Fantasy       0.00      0.00      0.00      1434
        History       0.00      0.00      0.00      1002
         Horror       0.00      0.00      0.00      2961
          Music       0.00      0.00      0.00      1993
        Mystery       0.00      0.00      0.00      1279
        Romance       0.00      0.00      0.00      3280
Science Fiction       1.00      0.00      0.00      1360
       TV 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
