## MSDI Combined Analysis

In [1]:
import pandas as pd
import numpy as np
import tables
import h5py
import pickle

import math
import random

import os, sys, glob
from pathlib import Path

import keras
from keras import layers
from keras import Sequential
from keras.layers import Dense, Activation, Flatten, Conv2D, Dropout, MaxPooling2D, BatchNormalization

import tensorflow as tf

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import requests

def download_file_from_google_drive(id, destination):
    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value

        return None

    def save_response_content(response, destination):
        CHUNK_SIZE = 3276800

        with open(destination, "wb") as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)

    URL = "https://drive.google.com/u/0/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    




In [3]:
# TAKE ID FROM SHAREABLE LINK
#https://drive.google.com/file/d/1wduOo4DLWGEAF44odjv6BQlsxYsO_0c6/view?usp=sharing
file_id = r'1wduOo4DLWGEAF44odjv6BQlsxYsO_0c6'
# DESTINATION FILE ON YOUR DISK
destination = 'shortlisted_tracks_df.pkl' #pickle4
download_file_from_google_drive(file_id, destination)

In [4]:
# TAKE ID FROM SHAREABLE LINK
#https://drive.google.com/file/d/1dhgh0ZPptiqBFHZ1vPhBU6-GH39k2NPY/view?usp=sharing
file_id = r'1dhgh0ZPptiqBFHZ1vPhBU6-GH39k2NPY'
# DESTINATION FILE ON YOUR DISK
destination = 'msd_data.zip'
download_file_from_google_drive(file_id, destination)

In [5]:
!rm -rf msd_data
!unzip -q msd_data.zip

In [6]:
# TAKE ID FROM SHAREABLE LINK
#https://drive.google.com/file/d/1T3uoNvxdC-u7ImJf8xOkqRssQio_WmKf/view?usp=sharing
file_id = r'1T3uoNvxdC-u7ImJf8xOkqRssQio_WmKf'
# DESTINATION FILE ON YOUR DISK
destination = 'lyric_df.pkl' #pickle4
download_file_from_google_drive(file_id, destination)

In [7]:
# TAKE ID FROM SHAREABLE LINK
#https://drive.google.com/file/d/16VKAXPeAbqbOSrfZp2SSBajh3KezftuM/view?usp=sharing
file_id = r'16VKAXPeAbqbOSrfZp2SSBajh3KezftuM'
# DESTINATION FILE ON YOUR DISK
destination = 'aart_data.zip'
download_file_from_google_drive(file_id, destination)

In [8]:
!rm -rf aart_data
!unzip -q aart_data.zip

In [9]:
with open('shortlisted_tracks_df' +'.pkl', 'rb') as handle:
    shortlisted_tracks_df = pickle.load(handle)

In [10]:
shortlisted_tracks_df

Unnamed: 0_level_0,Genre
TrackId,Unnamed: 1_level_1
TREQVVI128F427C38E,Country
TRYCYSD128F148CF20,Country
TRLCZET12903D03F70,Country
TRJORPT128F42BAA8D,Country
TROBJYA128F42A2984,Country
...,...
TRQFZEO128E07930BC,Rock
TRNSWUN128F4247F87,Rock
TRLENOU128F4281E94,Rock
TRDTSQP128F421C665,Rock


In [11]:
track_genre_df = shortlisted_tracks_df.sample(frac=1, random_state=1)
track_genre_df

Unnamed: 0_level_0,Genre
TrackId,Unnamed: 1_level_1
TRQNZED128F42968DB,Electronic
TRNXIPI128F93038E3,Rock
TRMXKFV128F428EFF3,Pop
TRZBVCE128F92C2951,Metal
TRNOFGZ128E07860D0,Rap
...,...
TRIARZC12903CB4BC4,Rap
TRBNQWY128F4257C00,Pop
TRTSSYQ128F42BCDD4,Electronic
TRDXJEQ128F9336A21,RnB


In [12]:
track_count = len(track_genre_df.index.values)
track_count

4900

In [13]:
genre_labels = list(track_genre_df['Genre'].unique())
genre_count = len(genre_labels)
print(genre_labels, "count: ", genre_count)

['Electronic', 'Rock', 'Pop', 'Metal', 'Rap', 'RnB', 'Country'] count:  7


In [14]:
with open('lyric_df' +'.pkl', 'rb') as handle:
    lyric_df = pickle.load(handle)

In [15]:
lyric_df

Unnamed: 0_level_0,Genre,wordvec
TrackId,Unnamed: 1_level_1,Unnamed: 2_level_1
TREQVVI128F427C38E,Country,"[8, 3, 7, 4, 9, 2, 3, 3, 2, 2, 7, 1, 0, 0, 7, ..."
TRYCYSD128F148CF20,Country,"[14, 4, 37, 0, 3, 0, 1, 0, 0, 3, 4, 0, 4, 3, 2..."
TRLCZET12903D03F70,Country,"[5, 17, 0, 6, 8, 14, 1, 0, 0, 8, 4, 1, 7, 0, 0..."
TRJORPT128F42BAA8D,Country,"[16, 3, 14, 7, 1, 6, 3, 2, 9, 0, 0, 4, 0, 3, 7..."
TROBJYA128F42A2984,Country,"[13, 5, 10, 8, 4, 2, 0, 1, 7, 1, 0, 1, 2, 2, 5..."
...,...,...
TRQFZEO128E07930BC,Rock,"[35, 7, 23, 10, 5, 1, 0, 9, 6, 0, 0, 4, 0, 5, ..."
TRNSWUN128F4247F87,Rock,"[8, 9, 0, 4, 3, 6, 0, 0, 0, 8, 1, 3, 0, 0, 2, ..."
TRLENOU128F4281E94,Rock,"[0, 0, 0, 0, 0, 24, 62, 0, 0, 0, 0, 0, 0, 0, 0..."
TRDTSQP128F421C665,Rock,"[19, 12, 0, 5, 7, 0, 9, 7, 0, 1, 0, 0, 2, 1, 1..."


In [16]:
msdi_genres = np.zeros((track_count, genre_count), dtype=np.float32)
for i, track_id in enumerate(track_genre_df.index.values):
  msdi_genres[i][genre_labels.index(track_genre_df['Genre'].iloc[i])] = 1

In [17]:
modelling_test_size = 0.3
y_train, y_test = train_test_split(msdi_genres, test_size=modelling_test_size, random_state=1)

In [18]:
mfcc_count = 12
seg_count = 512
msdi_mfcc_values = np.zeros((track_count, mfcc_count), dtype=np.float32)
#msdi_mfcc_values = np.zeros((track_count, seg_count, mfcc_count), dtype=np.float32)
track_seg_counts = []

for i, track_id in enumerate(track_genre_df.index.values):
    filepath = 'msd_data/' + track_id[2] + '/' + track_id[3] + '/' + track_id[4] + '/' + track_id + '.h5'
    try:
      hfile = h5py.File(filepath, 'r')
      track_mfcc_tmp = np.array(hfile['analysis']['segments_timbre'])
      '''track_seg_counts.append(track_mfcc_tmp.shape[0])
      track_seg_count = min(track_mfcc_tmp.shape[0], seg_count)
      fill_count = 0
      while fill_count < seg_count:
        fill_count_tmp = min(track_seg_count, (seg_count-fill_count))
        msdi_mfcc_values[i][fill_count:fill_count+fill_count_tmp] = track_mfcc_tmp[:fill_count_tmp]
        fill_count = fill_count + fill_count_tmp
      #'''
      msdi_mfcc_values[i] = np.mean(track_mfcc_tmp, axis=0)
      hfile.close()
    except (RuntimeError, TypeError, NameError, OSError):
      print('failed for file: ', filepath)

In [19]:
x_train_mfcc, x_test_mfcc = train_test_split(msdi_mfcc_values, test_size=modelling_test_size, random_state=1)
msdi_mfcc_values = None

In [20]:
seg_count = 1024
chroma_count = 12

msdi_chroma_values = np.zeros((track_count, chroma_count), dtype=np.float32)
#msdi_chroma_values = np.zeros((track_count, seg_count, chroma_count), dtype=np.float32)

for i, track_id in enumerate(track_genre_df.index.values):
    filepath = 'msd_data/' + track_id[2] + '/' + track_id[3] + '/' + track_id[4] + '/' + track_id + '.h5'
    try:
      hfile = h5py.File(filepath, 'r')
      track_chroma_tmp = np.array(hfile['analysis']['segments_pitches'])
      '''track_seg_counts.append(track_chroma_tmp.shape[0])
      track_seg_count = min(track_chroma_tmp.shape[0], seg_count)
      fill_count = 0
      while fill_count < seg_count:
        fill_count_tmp = min(track_seg_count, (seg_count-fill_count))
        msdi_chroma_values[i][fill_count:fill_count+fill_count_tmp] = track_mfcc_tmp[:fill_count_tmp]
        fill_count = fill_count + fill_count_tmp
      '''
      msdi_chroma_values[i] = np.mean(track_chroma_tmp, axis=0)
      hfile.close()
    except (RuntimeError, TypeError, NameError, OSError):
      print('failed for file: ', filepath)

In [21]:
x_train_chroma, x_test_chroma = train_test_split(msdi_chroma_values, test_size=modelling_test_size, random_state=1)
msdi_chroma_values = None

In [22]:
#track_count = len(track_genre_df.index.values)
wordvec_len = 5000

lyric_values = np.zeros((track_count, wordvec_len), dtype=np.float32)

for i, track_id in enumerate(track_genre_df.index.values):
    lyric_values[i] = np.array(lyric_df[lyric_df.index == track_id]['wordvec'].values[0])

In [23]:
#TF-IDF
lyric_values_tf_idf = np.zeros((track_count, wordvec_len), dtype=np.float32)
lyric_values_docfreqs = np.zeros((wordvec_len), dtype=np.float32)
#lyric_values_docfreqs = np.sum((lyric_values), axis=0)
#lyric_values_docfreqs = np.sum((lyric_values > 1), axis=0)
for i in range(track_count):
  for j in range(wordvec_len):
    if lyric_values[i][j] > 0:
      lyric_values_docfreqs[j] = lyric_values_docfreqs[j] + 1

lyric_values_docwordcounts = np.sum(lyric_values, axis=1)

for i in range(track_count):
  for  j in range(wordvec_len):
    if lyric_values[i][j] > 0:
      termfreq = lyric_values[i][j]/lyric_values_docwordcounts[i]
      docfreq = lyric_values_docfreqs[j]
      idocfreq = np.log(track_count/(docfreq))
      tf_idf = termfreq*idocfreq
      lyric_values_tf_idf[i][j] = tf_idf

In [24]:
lyric_values_docfreqs

array([3985., 4229., 3804., ...,   12.,   22.,    0.], dtype=float32)

In [25]:
#TF-IGM (Inverse Gravity Moment)
tf_igm_lambda = 7

term_class_specific_frequency = np.zeros((wordvec_len, genre_count), dtype=np.float32)
for i, track_id in enumerate(track_genre_df.index.values):
  genre_index = np.argmax(msdi_genres[i])
  term_freqs = np.array(lyric_df[lyric_df.index == track_id]['wordvec'].values[0])
  term_class_specific_frequency[::,genre_index] = term_class_specific_frequency[::, genre_index] + term_freqs

sum_ranked_term_class_specific_frequency = np.zeros((wordvec_len), dtype=np.float32)
for i in range(wordvec_len):
  val = 0
  freqs = np.copy(term_class_specific_frequency[i])
  for j in range(genre_count):
    val = val+ (j+1)*max(freqs)
    freqs[np.argmax(freqs)] = 0
  sum_ranked_term_class_specific_frequency[i] = val

lyric_values_tf_rigm = np.zeros((track_count, wordvec_len), dtype=np.float32)

for i in range(track_count):
  genre_index = np.argmax(msdi_genres[i])
  for  j in range(wordvec_len):
    if lyric_values[i][j] > 0:
      termfreq = lyric_values[i][j]/lyric_values_docwordcounts[i]
      tfigm = np.sqrt(termfreq)*(1+(tf_igm_lambda*(term_class_specific_frequency[j][genre_index]/sum_ranked_term_class_specific_frequency[j])))
      lyric_values_tf_rigm[i][j] = tfigm

In [26]:
sum_ranked_term_class_specific_frequency

array([1.89586e+05, 1.62000e+05, 1.52979e+05, ..., 1.40000e+01,
       4.70000e+01, 0.00000e+00], dtype=float32)

In [27]:
x_train_lyric, x_test_lyric = train_test_split(lyric_values_tf_rigm, test_size=modelling_test_size, random_state=1)
lyric_values = None
lyric_values_tf_idf = None
lyric_values_tf_rigm = None

In [28]:
image_row = 128
image_col = 128
image_ch = 3

msdi_aart_images = np.zeros((track_count, image_row, image_col, image_ch), dtype=np.float32)

for i, track_id in enumerate(track_genre_df.index.values):
    filepath = 'aart_data/' + track_id[2] + '/' + track_id[3] + '/' + track_id[4] + '/' + track_id + '.jpg'
    raw_img = tf.io.read_file(filepath)
    img_decoded = tf.image.decode_jpeg(raw_img)
    img_tmp = tf.image.resize(img_decoded, [image_row, image_col])
    immax = img_tmp.numpy().max()
    immin = img_tmp.numpy().min()
    imrange = 1
    if immax > immin:
        imrange = (immax-immin)
    img_norm = (img_tmp - immin)/imrange
    msdi_aart_images[i] = ((img_norm * 2) - 1)

In [29]:
x_train_aart, x_test_aart = train_test_split(msdi_aart_images, test_size=modelling_test_size, random_state=1)
msdi_aart_images = None

In [38]:
def get_mfcc_model(input_shape, num_classes):

  model = Sequential()
  model.add(Dense(256, input_shape=input_shape, activation='relu', kernel_initializer='he_uniform'))
  #model.add(BatchNormalization())

  model.add(Dense(512, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dropout(0.25))

  model.add(Dense(512, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dropout(0.5))

  model.add(Dense(num_classes, activation='softmax', kernel_initializer='he_uniform'))
  return model

In [39]:
def get_chroma_model(input_shape, num_classes):

  model = Sequential()
  model.add(Dense(256, input_shape=input_shape, activation='relu', kernel_initializer='he_uniform'))
  #model.add(BatchNormalization())

  model.add(Dense(512, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dropout(0.25))

  model.add(Dense(512, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dropout(0.5))

  model.add(Dense(num_classes, activation='softmax', kernel_initializer='he_uniform'))
  return model

In [40]:
def get_lyric_model(input_shape, num_classes):

  model = Sequential()
  model.add(Dense(1024, input_shape=input_shape, activation='relu', kernel_initializer='he_uniform'))
  #model.add(BatchNormalization())

  model.add(Dense(1024, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dropout(0.25))

  model.add(Dense(1024, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dropout(0.5))

  model.add(Dense(num_classes, activation='softmax', kernel_initializer='he_uniform'))

  #model.summary
  return model

In [41]:
def get_aart_model(input_shape, num_classes):

  model = Sequential()
  model.add(Conv2D(64, kernel_size=(3, 3), input_shape=input_shape, activation='relu', kernel_initializer='he_uniform'))

  model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_initializer='he_uniform'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(Dropout(0.25))

  model.add(Flatten())

  model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dropout(0.5))

  model.add(Dense(num_classes, activation='softmax', kernel_initializer='he_uniform'))
  #model.summary
  return model

In [42]:
x_train_mfcc.shape

(3430, 12)

In [43]:
mfcc_model = get_mfcc_model(x_train_mfcc[0].shape, genre_count)
chroma_model = get_chroma_model(x_train_chroma[0].shape, genre_count)
lyric_model = get_lyric_model(x_train_lyric[0].shape, genre_count)
aart_model = get_aart_model(x_train_aart[0].shape, genre_count)

mfcc_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
chroma_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
lyric_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
aart_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)

#loss_obj = tf.keras.losses.BinaryCrossentropy(from_logits=True)
loss_obj = keras.losses.categorical_crossentropy

def total_loss(mfcc_y_pred, mfcc_y, chroma_y_pred, chroma_y, lyric_y_pred, lyric_y, aart_y_pred, aart_y):
  mfcc_loss = loss_obj(mfcc_y, mfcc_y_pred)
  chroma_loss = loss_obj(chroma_y, chroma_y_pred)
  lyric_loss = loss_obj(lyric_y, lyric_y_pred)
  aart_loss = loss_obj(aart_y, aart_y_pred)

  return (mfcc_loss, chroma_loss, lyric_loss, aart_loss)

In [44]:
def train_step(mfcc_train_x, chroma_train_x, lyric_train_x, aart_train_x, train_y, reset):

  with tf.GradientTape(persistent=True) as tape:
    if reset==1:
      tape.reset()
    mfcc_y_pred = mfcc_model(mfcc_train_x, training=True)
    chroma_y_pred = chroma_model(chroma_train_x, training=True)
    lyric_y_pred = lyric_model(lyric_train_x, training=True)
    aart_y_pred = aart_model(aart_train_x, training=True)

    mfcc_loss = loss_obj(train_y, mfcc_y_pred)
    chroma_loss = loss_obj(train_y, chroma_y_pred)
    lyric_loss = loss_obj(train_y, lyric_y_pred)
    aart_loss = loss_obj(train_y, aart_y_pred)

    #total_losses = total_loss(mfcc_y_pred, train_y, chroma_y_pred, train_y, lyric_y_pred, train_y, aart_y_pred, train_y)
    effective_loss = mfcc_loss + chroma_loss + lyric_loss + aart_loss

  # Calculate the gradients for generator and discriminator
  mfcc_gradients = tape.gradient(effective_loss, mfcc_model.trainable_variables)
  chroma_gradients = tape.gradient(effective_loss, chroma_model.trainable_variables)
    
  lyric_gradients = tape.gradient(effective_loss, lyric_model.trainable_variables)
  aart_gradients = tape.gradient(effective_loss, aart_model.trainable_variables)
    
  # Apply the gradients to the optimizer
  mfcc_optimizer.apply_gradients(zip(mfcc_gradients, mfcc_model.trainable_variables))
  chroma_optimizer.apply_gradients(zip(chroma_gradients, chroma_model.trainable_variables))
   
  lyric_optimizer.apply_gradients(zip(lyric_gradients, lyric_model.trainable_variables))
  aart_optimizer.apply_gradients(zip(aart_gradients, aart_model.trainable_variables))

  return effective_loss

In [45]:
from sklearn.metrics import accuracy_score
EPOCH_COUNT = 50
BATCH_SIZE = 128
TRAIN_SIZE = y_train.shape[0]
epoch = 0
while epoch < EPOCH_COUNT:
  remaining = TRAIN_SIZE
  epoch = epoch + 1
  reset = 1
  print('\n')
  while remaining > 0 :
    cur_batch_size=min(remaining, BATCH_SIZE)
    cur_offset = TRAIN_SIZE - remaining
    remaining = remaining - cur_batch_size
    cur_end = cur_offset + cur_batch_size
    loss = train_step(x_train_mfcc[cur_offset:cur_end], x_train_chroma[cur_offset:cur_end], x_train_lyric[cur_offset:cur_end], x_train_aart[cur_offset:cur_end], y_train[cur_offset:cur_end], reset)
    reset = 0

    model_test_inputs = [x_test_mfcc, x_test_chroma, x_test_lyric, x_test_aart]
    models = [mfcc_model, chroma_model, lyric_model, aart_model]

    #def ensemble_predictions():
	  # make predictions
    y_test_preds = [model.predict(model_test_inputs[i]) for i, model in enumerate(models)]
	  # sum across ensemble members
    y_test_pred_ensemble_sum = np.sum(np.array(y_test_preds), axis=0)

    accuracy = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_test_pred_ensemble_sum, axis=1))
    print('\r', 'epoch: ', epoch, ', batch: ', (cur_end/TRAIN_SIZE), ', loss: ', np.mean(loss), ', accuracy: ', accuracy, ".")




 epoch:  1 , batch:  0.037317784256559766 , loss:  87.80377 , accuracy:  0.15034013605442176 .
 epoch:  1 , batch:  0.07463556851311953 , loss:  116.633255 , accuracy:  0.1945578231292517 .
 epoch:  1 , batch:  0.1119533527696793 , loss:  80.83679 , accuracy:  0.18639455782312925 .
 epoch:  1 , batch:  0.14927113702623906 , loss:  72.16894 , accuracy:  0.1870748299319728 .
 epoch:  1 , batch:  0.18658892128279883 , loss:  63.002983 , accuracy:  0.19183673469387755 .
 epoch:  1 , batch:  0.2239067055393586 , loss:  56.085472 , accuracy:  0.19319727891156463 .
 epoch:  1 , batch:  0.2612244897959184 , loss:  58.86895 , accuracy:  0.2054421768707483 .
 epoch:  1 , batch:  0.29854227405247813 , loss:  58.612392 , accuracy:  0.23129251700680273 .
 epoch:  1 , batch:  0.3358600583090379 , loss:  57.380936 , accuracy:  0.25510204081632654 .
 epoch:  1 , batch:  0.37317784256559766 , loss:  54.584602 , accuracy:  0.2707482993197279 .
 epoch:  1 , batch:  0.41049562682215746 , loss:  47.96706

KeyboardInterrupt: ignored

In [None]:
observed_accuracy = 0.74
random_accuracy = 1/7

kappa = (observed_accuracy-random_accuracy)/(1-random_accuracy)
kappa

In [None]:
observed_accuracy = 0.477
random_accuracy = 1/15

kappa = (observed_accuracy-random_accuracy)/(1-random_accuracy)
kappa

In [None]:
random_accuracy