In [1]:
import os
import re
import string
import numpy as np

import tensorflow as tf
from tensorflow.keras import layers
from sklearn.metrics import f1_score

In [2]:
import pandas as pd
import tqdm

df = pd.read_csv('./test.csv')

df.head(5)

Unnamed: 0,Song,Song year,Artist,Genre,Lyrics,Track_id
0,craftsmanship,2005,buck-65,Hip-Hop,Most folks spend their days daydreaming of fin...,8294
1,come-on-out,2012,the-elwins,Indie,Take your cold hands and put them on my face\n...,21621
2,riot,2013,bullet-for-my-valentine,Metal,Are you ready it's time for war\nWe'll break d...,3301
3,that-s-what-girls-do,2007,dream-street,Pop,You ask me why I change the color of my hair\n...,2773
4,believe-in-a-dollar,2012,cassidy,Hip-Hop,Do you believe in magic in a young girl's hear...,16797


In [3]:
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
import re

# Preprocessing
def prepocessing(lyrics, remove_stopwords=False, stops=set(stopwords.words('english'))):
    lyric_text = BeautifulSoup(lyric, "html5lib").get_text()
    lyric_text = re.sub("[^a-zA-Z]", " ", lyric_text)
    lyric_text = lyric_text.lower()
    
    lyric_words = lyric_text.split()
    if remove_stopwords:
        lyric_words = lyric_text.split()
        lyric_words = [w for w in lyric_words if not w in stops]
        
    lyric_text = ' '.join(lyric_words)
    return lyric_text

In [4]:
processed_lyrics = []
for lyric in tqdm.tqdm(df['Lyrics'].values):
    processed = prepocessing(lyric)
    processed_lyrics.append(processed)

100%|████████████████████████████████████████████████████████████████████████████| 7935/7935 [00:03<00:00, 2428.43it/s]


In [63]:
num_data = []
total = len(x)

train_df = pd.read_csv('./train_filtered.csv')

for genre in train_df['Genre'].unique():
    print(genre)
    num_data.append(len(train_df[train_df['Genre'] == genre]))

print(num_data)

class_weight = [
    1 - num_data[0] / total,
    1 - num_data[1] / total,
    1 - num_data[2] / total,
    1 - num_data[3] / total,
    1 - num_data[4] / total,
    1 - num_data[5] / total,
    1 - num_data[6] / total,
    1 - num_data[7] / total,
    1 - num_data[8] / total,
    1 - num_data[9] / total
]

class_code = {
    'Rock': 0,
    'Metal': 1,
    'Pop': 2,
    'Indie': 3,
    'Folk': 4,
    'Electronic': 5,
    'R&B': 6,
    'Jazz': 7,
    'Hip-Hop': 8,
    'Country': 9,
}

Rock
Metal
Pop
Indie
Folk
Electronic
R&B
Jazz
Hip-Hop
Country
[107019, 19098, 86219, 7240, 8165, 2002, 2763, 13314, 2238, 1890]


In [64]:
import numpy as np
from sklearn.model_selection import train_test_split

y = []
for genre in df['Genre']:
    y.append(class_code[genre])

x = processed_lyrics
y = np.array(y)

In [45]:
word2vec = tf.keras.models.load_model('word2vec')

In [46]:
BUFFER_SIZE = 10000
BATCH_SIZE = 1024

train_ds = tf.data.Dataset.from_tensor_slices((x, y))
train_ds = train_ds.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_ds.map(lambda text, label: text))   

model = tf.keras.Sequential([
    encoder,
    word2vec.get_layer('w2v_embedding'),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

In [47]:
model.load_weights('./epoch50/variables/variables')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x27af421c4c8>

In [48]:
predict = model.predict(x)

In [50]:
weighted_predict = predict * class_weight

In [51]:
print(weighted_predict)

[[-6.91243990e+00 -1.29745270e-01 -2.51545708e+00 ... -3.82025614e-03
   1.72656344e-04  9.67033893e-04]
 [-5.25343731e+00 -1.56656532e-02 -3.23494140e+00 ... -6.89333322e-02
   9.93340555e-04  1.01759196e-02]
 [-1.73668316e+00 -3.46313128e-03 -6.06866257e+00 ... -7.55604179e-02
   5.68676271e-03  4.51359712e-03]
 ...
 [-1.91420971e+00 -6.58544493e-02 -5.37265835e+00 ... -2.33339775e-02
   1.34567604e-01  4.09620775e-05]
 [-6.10621691e+00 -8.41555303e-03 -1.40158774e+00 ... -1.54267746e-01
   7.80467441e-04  2.13163214e-02]
 [-6.60179579e+00 -7.69438777e-02 -2.84646162e+00 ... -5.06599987e-03
   1.24413194e-03  4.60612098e-03]]


In [60]:
w_predict = (np.argmax(weighted_predict, axis=1))

print(w_predict)

[5 6 6 ... 8 9 5]


In [76]:
f1 = f1_score(np.array(y), w_predict, average='micro')

In [78]:
f1

0.13673597983616886

In [68]:
df['Predict'] = w_predict

In [69]:
df['IntGenre'] = y

In [70]:
df

Unnamed: 0,Song,Song year,Artist,Genre,Lyrics,Track_id,IntGenre,Predict
0,craftsmanship,2005,buck-65,Hip-Hop,Most folks spend their days daydreaming of fin...,8294,8,5
1,come-on-out,2012,the-elwins,Indie,Take your cold hands and put them on my face\n...,21621,3,6
2,riot,2013,bullet-for-my-valentine,Metal,Are you ready it's time for war\nWe'll break d...,3301,1,6
3,that-s-what-girls-do,2007,dream-street,Pop,You ask me why I change the color of my hair\n...,2773,2,5
4,believe-in-a-dollar,2012,cassidy,Hip-Hop,Do you believe in magic in a young girl's hear...,16797,8,6
...,...,...,...,...,...,...,...,...
7930,too-little-too-late,2006,amanda-marshall,Rock,Tuesday night - 7:30\nI hear a voice on the te...,23453,0,3
7931,berserker,2007,aurora-borealis,Metal,Elite forces cloaked in fur un sensitive to pa...,2724,1,5
7932,natural-born-killaz,2010,dr-dre,Hip-Hop,[Dr. Dre]\nJourney with me\nInto the mind of a...,24147,8,8
7933,wide-awake,2011,chris-cornell,Rock,You can a look a hurricane right in the eye.\n...,4150,0,9


In [77]:
df[df['IntGenre'] != df['Predict']]['Lyrics'][1]

"Take your cold hands and put them on my face\nSharpen your axe and your criminal ways\nLet's go to town\nand do what we did before\nIt's gonna hurt\nbut we don't feel pain no more\nIf you're alive can you say that you've tried\nto be someone who's true?\nStuck in your shell\nLooking for help\nCome on out and things will soon change\nYou gotta keep your head up\nStart your day by doing something good\nTreat a friend the way you know you should\nLike a cat in its cage\nI feel we're pacing around and never getting our way\nBut if you know you're a mitt\ninside a sleeve, that's a fit\nand that's as good as it gets\nIf you're alive can you say that you've tried\nto be someone who's true?\nStuck in your shell\nLooking for help\nCome on out and things will soon change\nYou gotta keep your head up\nYou gotta keep your head up\nYou gotta keep your head up\nMoving quickly\nSure feels so empty\nI've thought about it and all the while\nI'm still doing the same old thing\nI can't say that I'll cha