In [1]:
import os
import sys
import json
import random
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import keras
from keras.layers import LSTM, Dense
from keras.models import Sequential

Using TensorFlow backend.


In [2]:
def load_json(json_path, artists=[]):
    if (os.path.isfile(json_path)):
        print("json")
        with open(json_path) as f:
            song_data = json.load(f)
            return song_data['songs']
        
    elif (os.path.isdir(json_path)):
        data = []
        json_files = []
        if (len(artists) > 0):
            for artist in artists:
                json_files = json_files + [json_file for json_file in os.listdir(json_path) if ((json_file.endswith('.json')) & (artist in json_file))]
        else:
            json_files = [json_file for json_file in os.listdir(json_path) if json_file.endswith('.json')]

        for json_file in json_files:
            path_to_json = os.path.join(json_path, json_file)
            with open(path_to_json) as f:
                song_data = json.load(f)
                data = data + song_data['songs']
        
        return data
    
    
def reweight_distribution(original_distribution, temperature=0.5):
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)
    
    return distribution / np.sum(distribution)


def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    
    return np.argmax(probas)


In [3]:
# Parameterts
maxlen = 60  # extraxt sequences of n characters
step = 3     # sample new seq every n characters
json_path = '../data/'
artists = ['2Pac']

## Datapreprocessing

In [4]:
# load data
data = load_json(json_path, artists)
df = json_normalize(data)
lyrics = " ".join(df.lyrics.values).lower()

print('Corpus length: {}'.format(len(lyrics)))

Corpus length: 388104


In [5]:
sentences = []
next_chars = []

for i in range(0, len(lyrics) - maxlen, step): # iterates by step size
    sentences.append(lyrics[i: i + maxlen]) # get maxlen amount of charachters
    next_chars.append(lyrics[i + maxlen])
    
print('Number of sequences:', len(sentences))

chars = sorted(list(set(lyrics))) # list of unique characters
print('Unique characters:', len(chars))

char_indices = dict((char, chars.index(char)) for char in chars) # maps char with index

Number of sequences: 129348
Unique characters: 70


In [20]:
chars

['\n',
 ' ',
 '!',
 '"',
 '&',
 "'",
 '(',
 ')',
 '*',
 '+',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '?',
 '[',
 ']',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '{',
 '}',
 '~',
 '\xa0',
 'é',
 'ë',
 '\u200b',
 '–',
 '—',
 '‘',
 '’',
 '“',
 '”',
 '•',
 '…']

In [6]:
print('Vectorization...')

x = np.zeros((len(sentences), maxlen, len(chars))) # (sentences)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1    # one hot encoding
    y[i, char_indices[next_chars[i]]] = 1  # one hot encoding

Vectorization...


## Build Model

In [7]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [22]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               101888    
_________________________________________________________________
dense_1 (Dense)              (None, 70)                9030      
Total params: 110,918
Trainable params: 110,918
Non-trainable params: 0
_________________________________________________________________


## Train Model

In [16]:
model.fit(x, y, batch_size=128, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x236b2be80>

In [18]:
model.save('./model_100epochs.h5')

In [23]:
temperature = 0.2

start_index = random.randint(0, len(lyrics) - maxlen - 1)
generated_text = lyrics[start_index: start_index + maxlen]
print(generated_text)
#print('\n___________________\n')
for i in range(1500):
    sampled = np.zeros((1, maxlen, len(chars)))
            
    for t, char in enumerate(generated_text):
        sampled[0, t, char_indices[char]] = 1.
                      
    preds = model.predict(sampled, verbose=0)[0]
    next_index = sample(preds, temperature)
    next_char = chars[next_index]
    generated_text += next_char
    generated_text = generated_text[1:]
    sys.stdout.write(next_char)

ly baby, you need a thug in your life
these busters ain't lo
ving you bitch
it's all about you
and that when the streets, the streets, but i liet?
it's just me and my girlfriend't been gettin' my to the same of time
my nigga and be some of the pen
laugh and be the streets, the police
to be a motherfuckin' words of my pops to the streets
and what the world through my say i could see me
the pain, heard in my heart make a nigga got to be a motherfuckin' or the trath that i leave to be
i had my homies to be a motherfuckin' out, all your motherfucker
so i wome through the streets, that's why the world
i'm all your motherfuckers and that the stranger
so i'm worrivediction, but i leave to be a ride
hears in the bloody brown
that's why the world through my friends
inside the streets, we all the stranger
thinks it easy from the worst to be a manive life
that i wonder why that i was a panip
a ponce that i can easy pain
i don't care to be a manive breathin' for the streets
and what the world thro

