In [1]:
import os
import sys
import json
import random
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import keras
from keras.layers import LSTM, Dense, MaxPool1D, Conv1D, AveragePooling1D, Flatten
from keras.models import Sequential

Using TensorFlow backend.


In [2]:
def load_json(json_path, artists=[]):
    if (os.path.isfile(json_path)):
        print("json")
        with open(json_path) as f:
            song_data = json.load(f)
            return song_data['songs']
        
    elif (os.path.isdir(json_path)):
        data = []
        json_files = []
        if (len(artists) > 0):
            for artist in artists:
                json_files = json_files + [json_file for json_file in os.listdir(json_path) if ((json_file.endswith('.json')) & (artist in json_file))]
        else:
            json_files = [json_file for json_file in os.listdir(json_path) if json_file.endswith('.json')]

        for json_file in json_files:
            path_to_json = os.path.join(json_path, json_file)
            with open(path_to_json) as f:
                song_data = json.load(f)
                data = data + song_data['songs']
        
        return data
    
    
def reweight_distribution(original_distribution, temperature=0.5):
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)
    
    return distribution / np.sum(distribution)


def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    
    return np.argmax(probas)


In [3]:
# Parameterts
maxlen = 60  # extraxt sequences of n characters
step = 3     # sample new seq every n characters
n_grams = 0
json_path = '../data/deutsch'
artists = ['Bushido']

## Datapreprocessing

In [4]:
# load data
data = load_json(json_path, artists)
df = json_normalize(data)
lyrics = df.lyrics.map(lambda lyric: lyric.lower())

print('Number of Songs: {}'.format(len(df)))
print('Corpus length: {}'.format(len("".join(lyrics))))

Number of Songs: 100
Corpus length: 303654


In [5]:
sentences = []
next_chars = []
for lyric in lyrics:
    lyric = lyric.lower()
    for i in range(0, len(lyric) - maxlen, step): # iterates by step size
        sentences.append(lyric[i: i + maxlen]) # get maxlen amount of charachters
        next_chars.append(lyric[i + maxlen])
    
print('Number of sequences:', len(sentences))

chars = sorted(list(set("".join(lyrics).lower()))) # list of unique characters
print('Unique characters:', len(chars))

char_indices = dict((char, chars.index(char)) for char in chars) # maps char with index

Number of sequences: 99246
Unique characters: 78


In [6]:
print('Vectorization...')

x = np.zeros((len(sentences), maxlen, len(chars))) # (sentences)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1    # one hot encoding
    y[i, char_indices[next_chars[i]]] = 1  # one hot encoding

Vectorization...


## Build Model

In [23]:
model = Sequential()
model.add(Conv1D(input_shape=(maxlen, len(chars)),
                filters=10,
                kernel_size=3,
                padding='same',
                activation='relu',
                strides=1))
model.add(MaxPool1D(pool_size=2))

model.add(Conv1D(filters=20,
                kernel_size=3,
                padding='same',
                activation='relu',
                strides=1))
#model.add(MaxPool1D(pool_size=2))

"""model.add(Conv1D(filters=64,
                kernel_size=3,
                padding='same',
                activation='relu',
                strides=1))
model.add(MaxPool1D(pool_size=2))

model.add(Conv1D(filters=32,
                kernel_size=3,
                padding='same',
                activation='relu',
                strides=1))"""
model.add(AveragePooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(len(chars), activation='softmax'))


optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [24]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_4 (Conv1D)            (None, 60, 10)            2350      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 30, 10)            0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 30, 20)            620       
_________________________________________________________________
average_pooling1d_3 (Average (None, 15, 20)            0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 300)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 78)                23478     
Total params: 26,448
Trainable params: 26,448
Non-trainable params: 0
_________________________________________________________________


## Train Model

In [25]:
epochs = 60
model.fit(x, y, batch_size=128, epochs=epochs)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<keras.callbacks.History at 0x20be700f0>

In [26]:
artist = artists[0]
file_name = '{}_{}epochs_{}maxlen_{}ngrams_CNN2'.format(artist, epochs, maxlen, n_grams)

In [27]:
file_name

'Bushido_60epochs_60maxlen_0ngrams_CNN2'

In [28]:
model.save('./model_{}.h5'.format(file_name))

In [29]:
temperature = 0.5

#start_index = random.randint(0, len(lyrics) - maxlen - 1)
#generated_text = lyrics[start_index: start_index + maxlen]
lyrics_index = random.randint(0, len(lyrics))
chosen_lyric = lyrics[lyrics_index]
start_index = random.randint(0, len(chosen_lyric) - maxlen - 1)
generated_text_temp = chosen_lyric[start_index: start_index + maxlen]
generated_text = generated_text_temp
print(generated_text)
#print('\n___________________\n')
for i in range(1500):
    sampled = np.zeros((1, maxlen, len(chars)))
            
    for t, char in enumerate(generated_text_temp):
        sampled[0, t, char_indices[char]] = 1.
                      
    preds = model.predict(sampled, verbose=0)[0]
    next_index = sample(preds, temperature)
    next_char = chars[next_index]
    generated_text_temp += next_char
    generated_text += next_char
    generated_text_temp = generated_text_temp[1:]
    sys.stdout.write(next_char)

e fickst
ich hab diese welt geschaffen für mich
wie das geht
t zund
stase manse ich all ich danden
 deuten mie dichen sten, weiter schund
michen delr nach sgonci wierens der hand du del all deine mein anden traes du sakuendtd bind som-s wie waine deine dein ger dich dim dein hier fick jederau 



wang sesche dann sein, hals der der dicht schite seidt der nanemen dein dand hastoel deine mit hanf dicht dein nie dinden wahr du dits wie an dehren meine den dit dahr deun dein wai in deine wein seh ich dichen dein delrel banne deine dehr dein be hilr deinenms ich dahl ein dain gantenn deine nanmener dein schrebenla dein kannten dicht manden in du daine dit dest deinem inden in deuner wein
es dindt deine all all meinen stnapcs dand die ist dichenncht deinener be
rore deb ne aue nach del die dein ich be mie dimr wie dicher
latd scheist destseiben mir dein diten
 eunase delerz dinden kein ich ihren dinden wein, die al
die mein missennstdits seit ihn mehren ball wie mehr ich dus dichent aumenert alt ich mand wai eine wein ge eins hanc jetzt nicher dichen
ranrerlich was ich gin mein nach dit du dahr sigs ab weid ich binnensenn hiesensteich dein verm dich del bindt neckende ditänst du dand seine art dandas del der del scho ich wie dein reih(du ich wons waisi beh ist dich dand gean mans dei

In [30]:
with open(file_name + '.txt', 'w') as text_file:
    text_file.write(generated_text)