## 6. Classification
### 1. Merge one hot encoded genres to lyrics

In [1]:
import pandas as pd

df = pd.read_csv('../dataset/Lyrics_en_artists_clean_train.csv', index_col=['Band'])
df_val = pd.read_csv('../dataset/Lyrics_en_artists_clean_test.csv', index_col=['Band'])
df

KeyboardInterrupt: 

### 2. Label exploration  

In [None]:
%matplotlib inline

y_train = df.drop(df.columns[-10:].append('Lyrics'), axis=1)
y_test = df_val.drop(df.columns[-10:].append('Lyrics'), axis=1)
test_lyrics = df_val['Lyrics']
del df
del df_val
#pd.scatter_matrix(df, alpha = 0.3, figsize = (14,8), diagonal = 'kde');

### 3. Doc2Vec Deep learning LSTM classifier

In [None]:
from gensim.models.doc2vec import Doc2Vec

doc2vec = Doc2Vec.load("../dataset/doc2vec")
doc_vectors = doc2vec.wv
print("Number of word vectors: {}".format(len(doc_vectors.vocab)))

In [None]:
from gensim.utils import simple_preprocess
import nltk
from nltk.corpus import stopwords
import spacy
import en_core_web_sm

nltk.download('stopwords')
stop_words = stopwords.words('english')
stop_words.extend(['s', 'chorus'])

# python -m spacy download en
nlp = en_core_web_sm.load(disable=['parser', 'ner'])
allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']
    
def transform_lyric(lyric):
    lyric = simple_preprocess(str(lyric), deacc=True)

    new_lyric = list()
    for word in lyric:
        if(word not in stop_words):
            new_lyric.append(word)
    lyric = new_lyric


    lemma_lyric = list()
    doc = nlp(" ".join(lyric)) 
    lemma_lyric = list()
    for token in doc:
        if(token.pos_ in allowed_postags):
            lemma_lyric.append(token.lemma_)
    lyric = lemma_lyric
    return lyric

In [None]:
import numpy as np
Text_INPUT_DIM = 100
train_size = y_train.shape[0] 
test_size = y_test.shape[0] 
text_train_arrays = np.zeros((train_size, Text_INPUT_DIM))
text_test_arrays = np.zeros((test_size, Text_INPUT_DIM))

for i in range(train_size):
    text_train_arrays[i] = doc2vec.docvecs[i]
    
for i in range(test_size):
    test_lyrics[i] = transform_lyric(test_lyrics[i])
    text_test_arrays[i] = doc2vec.infer_vector(test_lyrics[i])
#text_test_arrays = doc2vec.infer_vector(test_lyrics)
#del test_lyrics

In [None]:
from keras import backend as K
K.clear_session()

In [None]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten, LSTM, Conv1D, MaxPooling1D, Dropout, Activation
from keras.layers.embeddings import Embedding

model_conv = Sequential()
model_conv.add(Embedding(input_dim=train_size, output_dim=100))
model_conv.add(Dropout(0.2))
model_conv.add(Conv1D(64, 5, activation='relu'))
model_conv.add(MaxPooling1D(pool_size=4))
model_conv.add(LSTM(100))
model_conv.add(Dense(y_train.shape[1], activation='sigmoid'))
model_conv.compile(loss='binary_crossentropy', optimizer='adam',    metrics=['accuracy'])
model_conv.summary()

estimator = model_conv.fit(text_train_arrays, y_train, validation_data=(text_test_arrays,y_test), epochs = 2, batch_size=1024)

In [None]:
import pandas as pd

df = pd.read_csv('../dataset/Lyrics_en_artists_clean_train.csv', index_col=['Band'])
train_lyrics = df['Lyrics']
del df

In [None]:
#Bryce Fox
#Lucy
#genres: "indie poptimism","modern alternative rock","modern rock"

lyric = "So bring your vibe over here here here \
Throw it up in the air air air, oh \
I run my hands through your hair hair hair \
And give you that love cuz you're oh so rare \
You look better over here then take you do over there \
And I don't wanna share \
 \
You don't let a good thing go to waste \
So I took the lipstick off that face \
I don't wanna share, yeah \
 \
If your name was Lucy, I'd put Lucy in her Lucifer \
My god, what in the devil, bring the animal right out of her \
If your name was Lucy, I'd put Lucy in her Lucifer \
My god, what in the devil, bring the animal right out of her \
 \
She said she ain't Lucy \
I said not yet \
She said she ain't Lucy \
I'll fix you up something real real tall \
It's looking up, we should take it all off \
Yeah \
You remind me why the stars don't fall \
I swear I'll love you like your last name's Ball \
I'll take you to the lair if we make it past the stairs \
I don't wanna share, yeah \
 \
You don't let a good thing go to waste \
So I took the lipstick off that face \
I don't wanna share, yeah \
 \
If your name was Lucy, I'd put Lucy in her Lucifer \
My god, what in the devil, bring the animal right out of her \
If your name was Lucy, I'd put Lucy in her Lucifer \
My god, what in the devil, bring the animal right out of her \
 \
She said she ain't Lucy \
I said not yet \
She said she ain't Lucy \
 \
Said I'm just tryna get to know ya \
So bite your tongue before I bite it for ya \
Yeah \
Said I'm just tryna get to know ya \
So bite your tongue before I bite it for ya \
 \
If your name was Lucy, I'd put Lucy in her Lucifer \
My god, what in the devil, bring the animal right out of her \
If your name was Lucy, I'd put Lucy in her Lucifer \
My god, what in the devil, bring the animal right out of her \
 \
She said she ain't Lucy \
I said not yet \
She said she ain't Lucy \
I said not yet \
(Said I'm just tryna get to know ya) \
(So bite your tongue before I bite it for ya) \
She said she ain't Lucy \
I said not yet \
(Said I'm just tryna get to know ya) \
She said she ain't Lucy \
I said not yet" 

lyric = transform_lyric(lyric)
print(lyric)

In [None]:
vector = np.zeros((1, Text_INPUT_DIM))
vector[0] = doc2vec.infer_vector(lyric)

sims = doc2vec.docvecs.most_similar([vector[0]], topn=2)
for sim in sims:
    print(train_lyrics[sim[0]], sim[1], sim[0])

In [None]:
y_pred = model_conv.predict(vector)
idx = (-y_pred[0]).argsort()[:10]
dict(zip(y_train.columns.values[idx],y_pred[0][idx]))

In [None]:
lyric = "I hear you talkin' shit, bro you think you're the heat \
Please bow down to defeat you're barely mince meat \
Stop with the street talk, and start to do the street, walk \
Lock yourself in and tell me this, how you gonna battle with this sick shit that I spit bitch \
See you still flappin' your jaws, prepare to be thrown in the ocean \
In the middle with jaws \
Or prefer to be served and severed with claws, knock on the doors Of absolute death you may \
But please be ready for the automatic failures you may make \
So take this and wait for another dismembering, remembering \
The tethering of your dream and how it was minced to pieces \
Believe in yourself please, to save you now you'll be needin' \
Help from Jesus \
You think I'm copy, pastin' please, tell me how you're actin' up \
Better be quite or Imma bout to bitch smack your mouth shut \
For good, yeah man talk the hood talk better go back and walk the hood walk you ain't had the taste of a real G \
Get back down to your level and prepare to be beat \
Writin' on your sheet, like you're deep please bitch your weak \
You ain't on my level or a level at all, take a vacation \
Up the damn wall and fall \
Humpty Dumpty, I must be rusty but you better knock the shit Down if you goin' against me \
I'm hard to beat, you're hardly cheese compared to these beats And this emcee \
You ready, alright, so come on down, I bet you to do it \
Let's see if you got the balls or if you're just truant \
To me, yeah let's keep an eye, on your ass as the heat intensifies \
Talk to me in a smart tone, prepare to meet your demise \
Throw you, a hundred million miles is what im finna do \
Call me Son Goku \
I already told you, what I'm gonna do, finna plow through \
Yeah man try to tackle a topic more intense then mine \
I always rap from the heart, make the fake rappers sigh \
But I really really don't give at all a flying fuck \
My chance of making this shit is the same as seein' a flyin' truck \
Or it could all depend on some serious luck \
Maybe I'll release a track that is heard my millions \
Make it big and to the top and make billions \
But that's a dream for another day, I'm here to take you down \
Don't care what you say \
Freestylin' this rap shit to the grave \
Rap battlin' bitches in my spare time for loose change \
Please, all of you fake ass rappers are plan \
Make like like a plane and take the next trip away \
Far away, from here I don't need to here your shit anymore \
I'm tired of it now, next place to be, will be a fuckin' morgue \
But let me just let you all know this, I'm sick of it, all of it \
So listen next time I write another track \
Listen to the facts I spit, and find it through my old wack rap crap \
I know I've said some shit, that know I demise \
But just look at my own and, your own life \
But this time, through my eyes \
I got a couple little bitches that need a little call out \
But I'll reside, got more and more, don't get me wrong \
Now get the fuck back home and call your mom \
Continue to rap please, just leave me alone \
I'm done hearing wack disses like this \
I'm sick of this, listening to this petty as shit \
Bitch"
lyric = transform_lyric(lyric)
print(lyric)

In [None]:
vector = np.zeros((1, Text_INPUT_DIM))
vector[0] = doc2vec.infer_vector(lyric)

sims = doc2vec.docvecs.most_similar([vector[0]], topn=2)
for sim in sims:
    print(train_lyrics[sim[0]], sim[1], sim[0])

In [None]:
y_pred = model_conv.predict(vector)
idx = (-y_pred[0]).argsort()[:10]
dict(zip(y_train.columns.values[idx],y_pred[0][idx]))