In [1]:
!pip install tensorflow-gpu

Collecting tensorflow-gpu
[?25l  Downloading https://files.pythonhosted.org/packages/0a/93/c7bca39b23aae45cd2e85ad3871c81eccc63b9c5276e926511e2e5b0879d/tensorflow_gpu-2.1.0-cp36-cp36m-manylinux2010_x86_64.whl (421.8MB)
[K     |████████████████████████████████| 421.8MB 38kB/s 
Collecting tensorflow-estimator<2.2.0,>=2.1.0rc0
[?25l  Downloading https://files.pythonhosted.org/packages/18/90/b77c328a1304437ab1310b463e533fa7689f4bfc41549593056d812fab8e/tensorflow_estimator-2.1.0-py2.py3-none-any.whl (448kB)
[K     |████████████████████████████████| 450kB 61.1MB/s 
Collecting gast==0.2.2
  Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz
Building wheels for collected packages: gast
  Building wheel for gast (setup.py) ... [?25l[?25hdone
  Created wheel for gast: filename=gast-0.2.2-cp36-none-any.whl size=7540 sha256=71b9678fd74350ca29a51e30973235e9765285a23ab9d945782dd0f053a27fc2
  Stored in directo

In [0]:
import tensorflow as tf
import glob
import time
from IPython import display

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
import tensorflow.keras.utils as ku 
from tensorflow.keras import layers

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()

import pandas as pd
import numpy as np
import string, os 

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)

cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [0]:
booba = pd.read_csv("booba.csv",index_col=0)
damso = pd.read_csv("damso.csv",index_col=0)
guizmo = pd.read_csv("guizmo.csv",index_col=0)
kaaris = pd.read_csv("kaaris.csv",index_col=0)
lomepal = pd.read_csv("lomepal.csv",index_col=0)
nekfeu = pd.read_csv("nekfeu.csv",index_col=0)
nepal = pd.read_csv("nepal.csv",index_col=0)
orelsan = pd.read_csv("orelsan.csv",index_col=0)
pnl = pd.read_csv("pnl.csv",index_col=0)
sch = pd.read_csv("sch.csv",index_col=0)
vald = pd.read_csv("vald.csv",index_col=0)
df = booba.append(damso).append(guizmo).append(kaaris).append(lomepal).append(nekfeu).append(nepal).append(orelsan).append(pnl).append(sch).append(vald).reset_index(drop=True)

In [0]:
def get_sequence_of_tokens(df):
    sequences = []
    for i in range(len(df)):
      sequences.extend(df.lyrics_clean[i].replace("’"," ").replace("“"," ").split("\n"))
    ## tokenization
    tokenizer.fit_on_texts(sequences)
    total_words = len(tokenizer.word_index) + 1
    
    ## convert data to sequence of tokens 
    input_sequences = []
    for line in sequences:
      token_list = tokenizer.texts_to_sequences([line])[0]
      input_sequences.append(token_list)
    return input_sequences, total_words

def generate_padded_sequences(input_sequences,total_words):
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
    k = input_sequences/(total_words)
    
    return k, max_sequence_len

def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

def train_step(text):
    noise = tf.random.normal([256, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
      generated_text = model(noise, training=True)

      real_output = discr(text, training=True)
      fake_output = discr(generated_text, training=True)

      gen_loss = generator_loss(fake_output)
      disc_loss = discriminator_loss(real_output, fake_output)

      #print(gen_loss)
      #print(disc_loss)

    gradients_of_generator = gen_tape.gradient(gen_loss, model.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discr.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, model.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discr.trainable_variables))

def generate_and_save_images(model, epoch, test_input):
  # Notice `training` is set to False.
  # This is so all layers run in inference mode (batchnorm).
  generated = list()
  predictions = model(test_input, training=False)
  for prediction in predictions:
    prediction = [round(i*total_words) for i in prediction] 
    generated.append(prediction)
  sentences = list()
  for i in generated:
    sentence = list()
    for j in i:
      for word,index in tokenizer.word_index.items():
        if index == j:
          sentence.append(word)
          break
    sentences.append(sentence)
  print([" ".join(sentence) for sentence in sentences])

In [0]:
inp_sequences, total_words = get_sequence_of_tokens(df)
k, max_sequence_len = generate_padded_sequences(inp_sequences,total_words)

In [0]:
model = tf.keras.Sequential()

model.add(layers.Dense(4*25, use_bias=False, input_shape=(100,)))
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())

model.add(layers.Reshape((1,100)))

model.add(layers.LSTM(256, return_sequences=True))
model.add(layers.BatchNormalization())

model.add(layers.LSTM(128, return_sequences=True))
model.add(layers.BatchNormalization())

model.add(layers.LSTM(64))

model.add(layers.Dense(max_sequence_len, activation="softmax"))

discr = tf.keras.Sequential()

discr.add(layers.Dense(max_sequence_len, input_shape=(max_sequence_len,)))

discr.add(layers.Reshape((1,max_sequence_len)))

discr.add(layers.LSTM(128, return_sequences=True))
discr.add(layers.Dropout(0.3))

discr.add(layers.LSTM(64))
discr.add(layers.Dropout(0.3))

discr.add(layers.Flatten())
discr.add(layers.Dense(1))

In [0]:
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

EPOCHS = 50
noise_dim = 100
num_examples_to_generate = 5

seed = tf.random.normal([num_examples_to_generate, noise_dim])

train_dataset = tf.data.Dataset.from_tensor_slices(k)
train_dataset = train_dataset.shuffle(100).batch(64)

In [0]:
for epoch in range(EPOCHS):
  start = time.time()

  for seq in train_dataset:
    train_step(seq)

  display.clear_output(wait=True)
  generate_and_save_images(model,
                             epoch + 1,
                             seed)

  print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

display.clear_output(wait=True)
generate_and_save_images(model,
                          EPOCHS,
                          seed)