In [4]:
# Install the gdown library
# ! pip install gdown

In [5]:
import pickle
import gdown
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score

## Part 1
Download the preprocessed test set and labels saved in Google Drive:

In [None]:
destination = "./test_data.npy"
gdown.download("https://drive.google.com/uc?id=1AhOyj5JJKfp3rfPtj38dk4lQGSYe1b8r", destination, quiet=False)

In [None]:
destination = "./labels_test.npy"
gdown.download("https://drive.google.com/uc?id=1wUB_cbm-IsW-t1CFPYXJSWPXyNWzO4vF", destination, quiet=False)

In [8]:
labels_test = np.load("labels_test.npy")
test_data = np.load("test_data.npy")


### 1st best model : On the fly embeddings model on lyrics only

Download the model from Google Drive

In [None]:
destination = "./on_the_fly_embeddings_lyrics_only.h5"
gdown.download("https://drive.google.com/uc?id=1JM4fj9Z5AL56WM6MrFxDs1wp9Coml98i", destination, quiet=False)

In [10]:
model_on_the_fly = load_model("on_the_fly_embeddings_lyrics_only.h5")



In [11]:
model_on_the_fly

<Sequential name=sequential, built=True>

The model is evaluated on the test set

In [12]:
on_the_fly_loss, on_the_fly_accuracy = model_on_the_fly.evaluate(test_data, labels_test)
print("On the fly Embeddings Accuracy:", on_the_fly_accuracy)
print("On the fly Embeddings Loss:", on_the_fly_loss)

[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4253 - loss: 1.8255
On the fly Embeddings Accuracy: 0.4222815930843353
On the fly Embeddings Loss: 1.860418677330017


Traning accuracy : 72%

Validation accuracy: 61%

As we can see, during testing the model is overfitting and giving an accuracy of nearly 42%.

### 2nd best model : Pre trained Embeddings

Download the model from Google Drive:

In [None]:
destination = "./pretrained_embeddings_lyrics_only.h5"
gdown.download("https://drive.google.com/uc?id=1CX4n5D3QXihbEUtzx8EmEDdK8EKQ9pJ9", destination, quiet=False)

In [14]:
model_pretrained_embeddings = load_model("pretrained_embeddings_lyrics_only.h5")



In [15]:
model_pretrained_embeddings

<Sequential name=sequential, built=True>

Download the pretrained test Embeddings

In [None]:
destination = "./embeddings_test.npy"
gdown.download("https://drive.google.com/uc?id=1RpKGsXHCXyGd4ArekwXfHkPkTkE0kK_k", destination, quiet=False)

In [17]:
embeddings_test = np.load("embeddings_test.npy")

The model is evaluated on the test set:

In [18]:
pretrained_embeddings_loss, pretrained_embeddings_accuracy = model_pretrained_embeddings.evaluate(embeddings_test, labels_test)
print("On the fly Embeddings Accuracy:", pretrained_embeddings_accuracy)
print("On the fly Embeddings Loss:", pretrained_embeddings_loss)

[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5745 - loss: 1.1803
On the fly Embeddings Accuracy: 0.5709115862846375
On the fly Embeddings Loss: 1.1998653411865234


Traning accuracy : 57%

Validation accuracy: 56%

As we can see, during testing the model is not overfitting and giving an accuracy of nearly 57%.

## Part 2
Load the dataset:

In [19]:
from keras.datasets import cifar100
from sklearn.model_selection import train_test_split

In [20]:
# Load the CIFAR-100 dataset
(x_all, y_all), (_, _) = cifar100.load_data(label_mode='fine')

# Getting the unique class labels in the dataset
classes = np.unique(y_all)

# Randomly selecting 50 classes
selected_classes = np.random.choice(classes, size=50, replace=False)

# Reshaping the labels to 1D array
y_train = y_all.reshape(-1)

# Splitting the dataset based on the selected classes
selected_indices = np.isin(y_train, selected_classes)
x_block1 = x_all[selected_indices]
y_block1 = y_all[selected_indices]

remaining_classes = np.setdiff1d(classes, selected_classes)
selected_classes = np.random.choice(remaining_classes, size=50, replace=False)

selected_indices = np.isin(y_train, selected_classes)
x_block2 = x_all[selected_indices]
y_block2 = y_all[selected_indices]

In [21]:
x_block1_train, x_block1_test, y_block1_train, y_block1_test = train_test_split(x_block1, y_block1, test_size=0.2, random_state=42)
x_block2_train, x_block2_test, y_block2_train, y_block2_test = train_test_split(x_block2, y_block2, test_size=0.2, random_state=42)

### Best CNN Model
Download the model from Google Drive

In [None]:
destination = "./cnn_skip_connection.h5"

gdown.download("https://drive.google.com/uc?id=1OXyroo2b8mbwWOu7DeqhXrMQZkGluGAk", destination, quiet=False)

In [23]:
model_cnn_skip_connection = load_model("cnn_skip_connection.h5")



In [24]:
model_cnn_skip_connection

<Sequential name=sequential, built=True>

The model is evaluated

In [25]:
cnn_skip_connection_loss, cnn_skip_connection_accuracy = model_cnn_skip_connection.evaluate(x_block1_test, y_block1_test)
print("CNN Skip Connection Accuracy:", cnn_skip_connection_accuracy)
print("CNN Skip Connection Loss:", cnn_skip_connection_loss)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 31ms/step - accuracy: 0.3940 - loss: 14.3027
CNN Skip Connection Accuracy: 0.39820000529289246
CNN Skip Connection Loss: 14.383540153503418


Traning accuracy :72%

Validation accuracy: 51%

As we can see, during testing the model is overfitting and giving an accuracy of nearly 40%.

### Best AutoEncoder Model

I uploaded the best autoencoder model on google drive. Here is the link to it (https://drive.google.com/file/d/1zBkwg0vOy_w7MiyVPpOd30RzmRu--h9d/view?usp=drive_link).
But while running the load model command it was throwing an error which was due to index out of range, I tried debugging and also running the model on google collab and on jupyter notebooks but is was not succesful. This might be due to the saving of the model using the .keras extension, I had also previously tried saving it with .h5 extension but that didn't work either.

The model gave the results listed below when it was trained on the dataset.

Traning accuracy: 46.7%

Validation accuracy: 46.85%