In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
import re
import string

from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow import keras


# requires update to tensorflow 2.4
# >>> conda activate PIC16B
# >>> pip install tensorflow==2.4
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.layers.experimental.preprocessing import StringLookup

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# for embedding viz
import plotly.express as px 
import plotly.io as pio
pio.templates.default = "plotly_white"

In [2]:
df = pd.read_csv("../datasets/tcc_ceds_music.csv")

In [4]:
df.columns

Index(['Unnamed: 0', 'artist_name', 'track_name', 'release_date', 'genre',
       'lyrics', 'len', 'dating', 'violence', 'world/life', 'night/time',
       'shake the audience', 'family/gospel', 'romantic', 'communication',
       'obscene', 'music', 'movement/places', 'light/visual perceptions',
       'family/spiritual', 'like/girls', 'sadness', 'feelings', 'danceability',
       'loudness', 'acousticness', 'instrumentalness', 'valence', 'energy',
       'topic', 'age'],
      dtype='object')

In [5]:
scalars = ['dating', 
           'violence', 
           'world/life', 
           'night/time',
           'shake the audience',
           'family/gospel', 
           'romantic', 
           'communication',
           'obscene', 
           'music', 
           'movement/places', 
           'light/visual perceptions',
           'family/spiritual', 
           'like/girls', 
           'sadness', 
           'feelings', 
           'danceability',
           'loudness', 
           'acousticness', 
           'instrumentalness', 
           'valence', 
           'energy']

In [7]:
data = {
    "lyrics" : df["lyrics"],
    "scalars": df[scalars],
    "genre"  : df["genre"]
}

In [14]:
# parameters for pipeline
num_genres = len(df["genre"].unique())
size_vocabulary = 2000

In [20]:
# inputs

lyrics_input = keras.Input(
    shape = (None,), 
    name = "lyrics"
)

scalars_input = keras.Input(
    shape = len(scalars), 
    name = "scalars"
)

In [21]:
lyrics_features = layers.Embedding(size_vocabulary, 30)(lyrics_input)
lyrics_features = layers.LSTM(128)(lyrics_features)

merged = layers.concatenate([lyrics_features, scalars_input])

hidden_1 = layers.Dense(128, name = "hidden_1")(merged)
output = layers.Dense(num_genres)(hidden_1)

model = keras.Model(
    inputs = [lyrics_input, scalars_input],
    outputs = output
)

In [22]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
lyrics (InputLayer)             [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 30)     60000       lyrics[0][0]                     
__________________________________________________________________________________________________
lstm_2 (LSTM)                   (None, 128)          81408       embedding_2[0][0]                
__________________________________________________________________________________________________
scalars (InputLayer)            [(None, 22)]         0                                            
______________________________________________________________________________________________

In [23]:
keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True)

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')
