##Installing dependencies for our project

In [None]:
!pip3 install transformers



##Imports
Importing all of the necessary packages for our code.

In [None]:
import pandas as pd
import numpy as np

from transformers import BertTokenizer
from transformers import TFAutoModel

import tensorflow as tf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


###GPU Details

In [None]:
!nvidia-smi

Tue Dec  5 18:55:35 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

##Loading the data
Loading the dataset to be able to properly access and manipulate the data.

In [None]:
dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/tcc_ceds_music.csv')

##Cleaning the data
In this section, we are examining the data, dropping unnecessary columns, dropping any NA values, and dropping any duplicates. Overall, the dataset was pretty clean already.

In [None]:
dataset.shape

(28362, 31)

In [None]:
dataset.head()

Unnamed: 0.1,Unnamed: 0,artist_name,track_name,release_date,genre,lyrics,len,dating,violence,world/life,...,sadness,feelings,danceability,loudness,acousticness,instrumentalness,valence,energy,topic,age
0,0,mukesh,mohabbat bhi jhoothi,1950,pop,hold time feel break feel untrue convince spea...,95,0.000598,0.063746,0.000598,...,0.380299,0.117175,0.357739,0.454119,0.997992,0.901822,0.339448,0.13711,sadness,1.0
1,4,frankie laine,i believe,1950,pop,believe drop rain fall grow believe darkest ni...,51,0.035537,0.096777,0.443435,...,0.001284,0.001284,0.331745,0.64754,0.954819,2e-06,0.325021,0.26324,world/life,1.0
2,6,johnnie ray,cry,1950,pop,sweetheart send letter goodbye secret feel bet...,24,0.00277,0.00277,0.00277,...,0.00277,0.225422,0.456298,0.585288,0.840361,0.0,0.351814,0.139112,music,1.0
3,10,pérez prado,patricia,1950,pop,kiss lips want stroll charm mambo chacha merin...,54,0.048249,0.001548,0.001548,...,0.225889,0.001548,0.686992,0.744404,0.083935,0.199393,0.77535,0.743736,romantic,1.0
4,12,giorgos papadopoulos,apopse eida oneiro,1950,pop,till darling till matter know till dream live ...,48,0.00135,0.00135,0.417772,...,0.0688,0.00135,0.291671,0.646489,0.975904,0.000246,0.597073,0.394375,romantic,1.0


In [None]:
dataset = dataset.dropna()

In [None]:
dataset.drop_duplicates(subset=['lyrics'], inplace=True)

In [None]:
dataset.columns

Index(['Unnamed: 0', 'artist_name', 'track_name', 'release_date', 'genre',
       'lyrics', 'len', 'dating', 'violence', 'world/life', 'night/time',
       'shake the audience', 'family/gospel', 'romantic', 'communication',
       'obscene', 'music', 'movement/places', 'light/visual perceptions',
       'family/spiritual', 'like/girls', 'sadness', 'feelings', 'danceability',
       'loudness', 'acousticness', 'instrumentalness', 'valence', 'energy',
       'topic', 'age'],
      dtype='object')

In [None]:
dataset.drop(labels=(['Unnamed: 0', 'artist_name', 'track_name', 'release_date', 'genre','len','age']), axis=1, inplace=True)

In [None]:
dataset.head()

Unnamed: 0,lyrics,dating,violence,world/life,night/time,shake the audience,family/gospel,romantic,communication,obscene,...,like/girls,sadness,feelings,danceability,loudness,acousticness,instrumentalness,valence,energy,topic
0,hold time feel break feel untrue convince spea...,0.000598,0.063746,0.000598,0.000598,0.000598,0.048857,0.017104,0.263751,0.000598,...,0.000598,0.380299,0.117175,0.357739,0.454119,0.997992,0.901822,0.339448,0.13711,sadness
1,believe drop rain fall grow believe darkest ni...,0.035537,0.096777,0.443435,0.001284,0.001284,0.027007,0.001284,0.001284,0.001284,...,0.001284,0.001284,0.001284,0.331745,0.64754,0.954819,2e-06,0.325021,0.26324,world/life
2,sweetheart send letter goodbye secret feel bet...,0.00277,0.00277,0.00277,0.00277,0.00277,0.00277,0.158564,0.250668,0.00277,...,0.00277,0.00277,0.225422,0.456298,0.585288,0.840361,0.0,0.351814,0.139112,music
3,kiss lips want stroll charm mambo chacha merin...,0.048249,0.001548,0.001548,0.001548,0.0215,0.001548,0.411536,0.001548,0.001548,...,0.081132,0.225889,0.001548,0.686992,0.744404,0.083935,0.199393,0.77535,0.743736,romantic
4,till darling till matter know till dream live ...,0.00135,0.00135,0.417772,0.00135,0.00135,0.00135,0.46343,0.00135,0.00135,...,0.00135,0.0688,0.00135,0.291671,0.646489,0.975904,0.000246,0.597073,0.394375,romantic


In [None]:
dataset.shape

(28362, 24)

In [None]:
target_list = ['dating', 'violence', 'world/life', 'night/time',
       'shake the audience', 'family/gospel', 'romantic', 'communication',
       'obscene', 'music', 'movement/places', 'light/visual perceptions',
       'family/spiritual', 'like/girls', 'sadness', 'feelings', 'danceability',
       'loudness', 'acousticness', 'instrumentalness', 'valence', 'energy']

##Parameters
Initialize the parameters for machine learning processes.

In [None]:
seq_len = 128 #word seq len of the tokenized sequences

num_samples = len(dataset)

#initializing 2 numpy zeros arrays of dimensions of our data frame and seq len

#token ids
Xids = np.zeros( (num_samples, seq_len))
#attention mask
Xmask = np.zeros ((num_samples, seq_len))

Xids.shape

(28362, 128)

##Tokenizing the lyrics using BERT
To allow the lyrics to be input into the neural network, we utilized BERT's tokenizer to convert all of the words to numerical values.


In [None]:
#to convert the text to tokenizer we use bert case model
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

#we're iterating every text in lyrics and are putting the tokenized number arrays
for i, phrase in enumerate(dataset['lyrics']):
  tokens = tokenizer. encode_plus (phrase, max_length=seq_len, truncation=True,
                                   padding='max_length', add_special_tokens=True, return_tensors='tf')
  Xids[i, :] = tokens['input_ids']#the token with [CLS]-start of seq, [SEP]-separator, [PAD]-padding
  Xmask[i, :] = tokens['attention_mask']#this will have the relation with attention marked as 1, else 0

tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
Xids

array([[  101.,  2080.,  1159., ...,     0.,     0.,     0.],
       [  101.,  2059.,  3968., ...,     0.,     0.,     0.],
       [  101., 14266.,  3952., ...,     0.,     0.,     0.],
       ...,
       [  101.,  1243.,  8214., ...,     0.,     0.,     0.],
       [  101.,  4031.,  4031., ...,     0.,     0.,     0.],
       [  101.,  9879.,  7010., ...,     0.,     0.,     0.]])

In [None]:
Xmask

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.]])

##Making the label
In order to make the label, we will one-hot encode the topics, which is what our model is trying to predict.

In [None]:
dataset['topic'].unique()

array(['sadness', 'world/life', 'music', 'romantic', 'violence',
       'obscene', 'night/time', 'feelings'], dtype=object)

In [None]:
labels = np.zeros((num_samples, len(dataset['topic'].unique())))

labels.shape

(28362, 8)

In [None]:
factorized_topic = pd.factorize(dataset['topic'])[0]

In [None]:
factorized_topic

array([0, 1, 2, ..., 5, 5, 5])

In [None]:
labels[np.arange(num_samples), factorized_topic] = 1
#for classification
labels

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]])

##Creating the Tensorflow Dataset with slices

In order be able to compile and fit the models, the need to combine the data we have in slices to Tensorflow's dataset.

In [None]:
#to make our dataset as tensorflow ds
tf_ds = tf.data.Dataset.from_tensor_slices((Xids, Xmask, labels))

tf_ds.take(1)

<_TakeDataset element_spec=(TensorSpec(shape=(128,), dtype=tf.float64, name=None), TensorSpec(shape=(128,), dtype=tf.float64, name=None), TensorSpec(shape=(8,), dtype=tf.float64, name=None))>

In [None]:
Xids[0,:].shape

(128,)

In [None]:
labels[0,:].shape

(8,)

In [None]:
#maps our {x train}, y train (tf standard)
def map_func(input_ids, masks, labels):
  return {'input_ids': input_ids, 'attention_mask': masks}, labels

In [None]:
#mapped tf ds
tf_ds = tf_ds.map(map_func)

tf_ds.take(1)

<_TakeDataset element_spec=({'input_ids': TensorSpec(shape=(128,), dtype=tf.float64, name=None), 'attention_mask': TensorSpec(shape=(128,), dtype=tf.float64, name=None)}, TensorSpec(shape=(8,), dtype=tf.float64, name=None))>

In [None]:
batch_size = 64 # a set of 16 is considered for a batch for training
#shuffling the training data & dropping the remainders
tf_ds = tf_ds.shuffle(10000).batch(batch_size, drop_remainder=True)

tf_ds.take(1)

<_TakeDataset element_spec=({'input_ids': TensorSpec(shape=(64, 128), dtype=tf.float64, name=None), 'attention_mask': TensorSpec(shape=(64, 128), dtype=tf.float64, name=None)}, TensorSpec(shape=(64, 8), dtype=tf.float64, name=None))>

##Spliting the tf ds (tensorflow dataset) into training set & validation set

Using a 90/10 split, we split out dataset into training and validation.

In [None]:
split = 0.9

train_ds = tf_ds.take(int((num_samples / batch_size) * split))
val_ds = tf_ds.skip(int((num_samples / batch_size) * split))

del tf_ds

##Function which returns our model with the input, model and output layers



1.   making the input ids & mask
2.   using bert we're embedding out input as layers
3.   with the embedding we're making our output layers as x & y
4.   then we'll create a neural network model with input & output
5.   we're making our optimizer, loss & accuracy functions and returning it after compiling



In [None]:
def giveOurModel():
  #our own nn of input layer
  input_ids = tf.keras.layers.Input(shape=(seq_len,), name='input_ids', dtype='int32')
  mask = tf.keras.layers.Input(shape=(seq_len,), name='attention_mask', dtype='int32')

  bert = TFAutoModel.from_pretrained('bert-base-cased')
  embeddings = bert.bert(input_ids, attention_mask=mask)[1]

  #output layers
  x = tf.keras.layers.Dense(512, activation='relu')(embeddings)
  y = tf.keras.layers.Dense(len(dataset['topic'].unique()), activation='softmax', name='outputs')(x)

  #creating a model
  model = tf. keras.Model(inputs=[input_ids, mask], outputs=y)

  print(model.summary())

  lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
      initial_learning_rate=1e-5,
      decay_steps=10000,
      decay_rate=1e-6)
  optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
  loss = tf.keras.losses.CategoricalCrossentropy()
  acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
  model.compile(optimizer=optimizer, loss=loss, metrics=[acc])

  return model

##Parallelising our model

TensorFlow's `MultiWorkerMirroredStrategy` to distribute the training of a deep learning model across multiple workers in a distributed computing environment. This is used for training large models on multiple GPUs or across multiple machines.

In [None]:
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

with strategy.scope():
  model = giveOurModel()

Instructions for updating:
use distribute.MultiWorkerMirroredStrategy instead


model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_ids (InputLayer)      [(None, 128)]                0         []                            
                                                                                                  
 attention_mask (InputLayer  [(None, 128)]                0         []                            
 )                                                                                                
                                                                                                  
 bert (TFBertMainLayer)      TFBaseModelOutputWithPooli   1083102   ['input_ids[0][0]',           
                             ngAndCrossAttentions(last_   72         'attention_mask[0][0]']      
                             hidden_state=(None, 128, 7                                       

Let's break down what's happening:

1. **Creating a `MultiWorkerMirroredStrategy` instance:**
   ```python
   strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
   ```
   Here, we're creating a strategy object that will be used to distribute the training across multiple workers. This strategy is specifically designed for synchronous training, where each worker computes gradients based on a portion of the data and then communicates with others to update the model.

2. **Defining the model within the strategy's scope:**
   ```python
   with strategy.scope():
       model = giveOurModel()
   ```
   Within the `with strategy.scope():` block, we define our deep learning model (`giveOurModel()`). The `strategy.scope()` is used to ensure that the variables and computations related to the model are distributed and synchronized across all the workers.

In simple terms, this code sets up a strategy for distributed training, allowing the deep learning model to be trained simultaneously on multiple workers, which can be different GPUs or even separate machines. The `with strategy.scope():` block ensures that the model is created and trained in a way that takes advantage of the distributed computing resources. This can lead to faster training times, especially for large models and datasets.

##Training our model

In [None]:
history = model.fit(train_ds, validation_data=val_ds, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


##Saving our model

In [None]:
model.save('/content/drive/MyDrive/Colab Notebooks/music_classification_model')

##Loading our model from pre-saved

In [None]:
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/music_classification_model')

loaded_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_ids (InputLayer)      [(None, 128)]                0         []                            
                                                                                                  
 attention_mask (InputLayer  [(None, 128)]                0         []                            
 )                                                                                                
                                                                                                  
 bert (TFBertMainLayer)      TFBaseModelOutputWithPooli   1083102   ['input_ids[0][0]',           
                             ngAndCrossAttentions(last_   72         'attention_mask[0][0]']      
                             hidden_state=(None, 128, 7                                       

##Making a sample function `prep_data` for tokenizing for our test data

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

def prep_data(text):
  tokens = tokenizer.encode_plus(text, max_length=128, truncation=True,
                                 padding='max_length', add_special_tokens=True,
                                 return_token_type_id=False, return_tensors='tf')
  return {'input_ids': tf.cast(tokens['input_ids'], tf.float64), 'attention_mask': tf.cast(tokens['attention_mask'], tf.float64)}

##Loading the topics

In [None]:
# Both are same
# topic = dataset['topic'].unique()
topic = ['sadness', 'world/life', 'music', 'romantic', 'violence',
       'obscene', 'night/time', 'feelings']

##Sample test data `lyrics`

In [None]:
#sample songs from the same dataset but these are not leant or validated
pop = prep_data("know baby hang touch weak strong know cope down stay need roses right arm sweet thorns know touch weak strong know cope down stay need touch weak strong know cope down stay need touch weak strong know cope down stay need")

country = prep_data("fall feel touch real mind reelin round cause feel today fall fall know gonna fall try thinkin ease mind mean fall fall know gonna fall think stay maybe longer")

blues = prep_data("splay canker brain bone decay little remain heart beat feel restrain live go gonna shoot cold hand misfortune pass long time render senseless gold hear stick limbo perch atop throne live go better fast cold hand splay canker brain boones decay little remain heart beat feel restrain live go gonna shoot cold hand live go better fast cold hand cold hand cold hand")

jazz = prep_data("leave lonely believe lonely somebody night time right time kiss night time time reminisce regret instead forget somebody intend independently blue want want borrow today tomorrow")

reggae = prep_data("rough rider cool stroker strong whiner hard night hard night hard night night night night feel break today feel break today lord feel mash today strong whiner rough rider whiney whiney night know yesterday know today strong whiner rough rider whiney whiney night chop night wear brush tonight say wear brush tonight")

rock = prep_data("race cross heavens straight look like comet slice morn scorch horizon blaze land hand stand exciter salvation task stand exciter salvation bid touch fry crisp close smoke smolder blister singe ignition hit soul cringe stand exciter salvation task stand exciter come fall knees repent exciter come taste tongue come snap state look light selfindulgence result shatter eye predominant complacency lead beguile lie stand exciter salvation task stand exciter salvation bid leap amidst combustive dance shall brand thermal cauterize mass melt order stand exciter salvation task stand exciter come fall knees repent stand exciter stand exciter stand exciter stand exciter")

hip_hop = prep_data("spit minute watch team money hurt competition start clinic repertoire live long haul stop ballin like chamique holdsclaw single black female outlaw want hardcore brother want pedestal garbage residue rybody talk righteous responsible busy judge judge know scriptures play doubt gettin bout wait right crazy catch send hollars people lafayette woods booker thousand strong start name baby test go baby phase baby test go baby baby truth recognize fight fact track better guy hella tight bedstuy come comin head high fresh zone night rip mics lonely alright stop livin life fullest thinkin positive killers sound stupid ready wasn henny cause prove fakers thinkin twice cause moment truth right raisin rything stake eyebrows want gotta come crowd pity read story take shoot dumb sharpie spot like catch future dismal sound miserable clown world cause pitiful")

pop_1 = prep_data("lovin true things plan need help hand understand days fair hour year year lovin true things plan need help hand understand days fair hour year")

pop_2 = prep_data("baby lord feel touch summertime love fall home ball feel right feel right feel right feel right feel right feel right feel baby feel feel baby feel wanna clap hand wanna clap hand feel feel feel right feel right feel right feel right")

pop_3 = prep_data("oldfashioned song playin radio wrap music sound promise swear hear slowly ramble need bringin cause go oldfashioned song sure write oldfashioned song comin threepart weave dream listen even light underscore affair tenderness feel come know swear hear slowly ramble need bringin cause go oldfashioned song comin threepart oldfashioned song sure write oldfashioned song comin threepart oldfashioned song sure write weave dream listen song song comin song sure write song comin song oldfashioned song sure write oldfashioned song song comin comin threepart oldfashioned song song sure write oldfashioned song song comin comin threepart oldfashioned song song sure write")

Keyword arguments {'return_token_type_id': False} not recognized.
Keyword arguments {'return_token_type_id': False} not recognized.
Keyword arguments {'return_token_type_id': False} not recognized.
Keyword arguments {'return_token_type_id': False} not recognized.
Keyword arguments {'return_token_type_id': False} not recognized.
Keyword arguments {'return_token_type_id': False} not recognized.
Keyword arguments {'return_token_type_id': False} not recognized.
Keyword arguments {'return_token_type_id': False} not recognized.
Keyword arguments {'return_token_type_id': False} not recognized.
Keyword arguments {'return_token_type_id': False} not recognized.


##Print the predicted outcome

In [None]:
probs_pop = loaded_model.predict(pop)
print("probs_pop", topic[np.argmax(probs_pop[0])]) #romantic

probs_pop romantic


In [None]:
probs_country = loaded_model.predict(country)
print("probs_country", topic[np.argmax(probs_country[0])]) #sadness

probs_country sadness


In [None]:
probs_blues = loaded_model.predict(blues)
print("probs_blues", topic[np.argmax(probs_blues[0])]) #violence

probs_blues violence


In [None]:
probs_jazz = loaded_model.predict(jazz)
print("probs_jazz", topic[np.argmax(probs_jazz[0])]) #night/time

probs_jazz night/time


In [None]:
probs_reggae = loaded_model.predict(reggae)
print("probs_reggae", topic[np.argmax(probs_reggae[0])]) #night/time

probs_reggae night/time


In [None]:
probs_rock = loaded_model.predict(rock)
print("probs_rock", topic[np.argmax(probs_rock[0])]) #violence

probs_rock violence


In [None]:
probs_hip_hop = loaded_model.predict(hip_hop)
print("probs_hip_hop", topic[np.argmax(probs_hip_hop[0])]) #obscene

probs_hip_hop obscene


In [None]:
probs_pop_1 = loaded_model.predict(pop_1)
print("probs_pop_1", topic[np.argmax(probs_pop_1[0])]) #world/life

probs_pop_1 world/life


In [None]:
probs_pop_2 = loaded_model.predict(pop_2)
print("probs_pop_2", topic[np.argmax(probs_pop_2[0])]) #feelings

probs_pop_2 feelings


In [None]:
probs_pop_3 = loaded_model.predict(pop_3)
print("probs_pop_3", topic[np.argmax(probs_pop_3[0])]) #music

probs_pop_3 music
