In [None]:
%load_ext tensorboard 

In [1]:
import numpy as np
import sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import nltk # just for tokenization
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import tensorflow as tf




In [3]:
import datetime

In [4]:
log = ""

In [5]:
def precision(y_true, y_pred, num_classes):
    # Initialize arrays to store true positives, false positives, and precision
    TP = np.zeros(num_classes)
    FP = np.zeros(num_classes)
    precision_scores = np.zeros(num_classes)

    # Calculate true positives and false positives for each class
    for i in range(num_classes):
        TP[i] = np.sum((y_true == i) & (y_pred == i))
        FP[i] = np.sum((y_true != i) & (y_pred == i))

    # Compute precision for each class
    for i in range(num_classes):
        if TP[i] + FP[i] > 0:
            precision_scores[i] = TP[i] / (TP[i] + FP[i])

    return np.mean(precision_scores)

In [6]:
def hamming_loss(y_true, y_pred):
    # Calculate number of mismatches
    num_mismatches = np.sum(y_true != y_pred)

    # Compute Hamming Loss
    hamming_loss = num_mismatches / (y_true.shape[0] * y_true.shape[1])

    return hamming_loss

In [7]:
def top3_accuracy(predicted_probs, true_labels):

    sorted_indices = np.argsort(predicted_probs, axis=1)[:, ::-1]

    # Check if true labels are in top-3 predicted labels
    top3_correct = np.any(true_labels[np.arange(len(true_labels))[:, None], sorted_indices[:, :3]], axis=1)
    # Calculate top-3 accuracy
    top3_accuracy = np.mean(top3_correct)
    
    return top3_accuracy

In [71]:
class NeuralNetwork:
    
    def __init__(self, raw_data, embeddings, hidden_neurons, test_df, test_embeddings):
        
        self.raw_data = raw_data
        self.random_state = 42
        
        self.X_train, self.X_test, self.y_train, self.y_test = None, None, None, None
        self.labels = ['Joy', 'Trust', 'Fear', 'Surprise','Sadness', 'Disgust', 'Anger', 'Anticipation']
        self.emotions_onehot = np.array(raw_data.loc[:, self.labels])
        self.X_train, self.y_train = embeddings, self.emotions_onehot
        self.X_test, self.y_test = test_embeddings, np.array(test_df.loc[:, self.labels])
        # self.__pre_process(embeddings)
        
        self.n_classes = self.y_train.shape[1]
        self.n_input_features = self.X_train.shape[1]
        self.n_hidden_neurons = hidden_neurons
        
        
        #weights from input layer to hidden layer1
        np.random.seed(self.random_state)
        # limit1 = np.sqrt(2 / float(self.n_input_features + self.n_hidden_neurons))
        # limit1 = np.sqrt(2 / float(self.n_input_features + self.n_hidden_neurons))
        
        # self.w1 = np.random.normal(
        #         0.0, limit1, size=(self.v_count, self.n)
        #     )     # embedding matrix
        # self.w2 = np.random.normal(
        #         0.0, limit2, size=(self.n, self.v_count)
        #     ) 
        # self.W01 = np.random.randn(self.n_input_features, self.n_hidden_neurons)
        # self.W12 = np.random.randn(self.n_hidden_neurons, self.n_classes)

        limit1 = np.sqrt(2 / float(self.n_input_features + self.n_hidden_neurons))
        limit2 = np.sqrt(2 / float(self.n_hidden_neurons + self.n_classes))

        # self.w1 = np.random.normal(
        #         0.0, limit1, size=(self.v_count, self.n)
        #     )     # embedding matrix
        # self.w2 = np.random.normal(
        #         0.0, limit2, size=(self.n, self.v_count)
        #     ) 
        self.W01 = np.random.normal(0.0, limit1, size=(self.n_input_features, self.n_hidden_neurons))
        self.W12 = np.random.normal(0.0, limit2, size=(self.n_hidden_neurons, self.n_classes))
         
        self.b01 = np.zeros((1, self.n_hidden_neurons))
        self.b12 = np.zeros((1, self.n_classes))
        
    def __activation(self, activation_function, X):
        if activation_function == "sigmoid":
            return 1/(1+np.exp(-X))
        elif activation_function == "relu":
            return (X > 0) * X
        elif activation_function == "tanh":
            return (np.exp(X) + np.exp(-X))/(np.exp(X) - np.exp(-X))
        
    def __activation_derivative(self, activation_function, X):
        if activation_function == "sigmoid":
            return self.__activation(activation_function, X) * (1 - self.__activation(activation_function, X))
        elif activation_function == "relu":
            return X > 0
        elif activation_function == "tanh":
            return 1 - self.__activation(activation_function, X)**2
    
    def __error(self, preds, ground, error="mean"):
        return -np.mean(ground * np.log(preds) + (1 - ground) * np.log(1 - preds))
        # return 0.5 * preds.shape[1] * ((ground - preds)**2).sum()
        
    
    #Note: output activation will always be sigmoid
    def train(self, epochs=100, lr = 1e-1, hidden_layer_activation = "relu", batch_size = 16, thresold = 0.6):
        log_folder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        summary_writer = tf.summary.create_file_writer(log_folder)
       
        with summary_writer.as_default():
            tf.summary.text("Hidden neuron", str(self.n_hidden_neurons), step=0)
            tf.summary.text("Batch size", str(batch_size), step=0)
            tf.summary.text("Epochs", str(epochs), step=0)
            tf.summary.text("Learning rate", str(lr), step=0)
            tf.summary.text("Hidden Layer Activation", hidden_layer_activation, step=0)

        # forward the data, and then calculate the training accuracy
        self.hidden_layer_activation = hidden_layer_activation
        print(f"number of batches {self.X_train.shape[0]//batch_size}")
        train_error = 0
        for epoch in range(epochs):
            #batch gd
            batches = (self.X_train.shape[0] % batch_size)
            exact_batches = True if batches == 0 else False
            n_batches = (self.X_train.shape[0]//batch_size) if exact_batches else (self.X_train.shape[0]//batch_size + 1)
            for batch in range(n_batches):
                b = batch*batch_size
                b_1 = self.X_train.shape[0] if (not exact_batches) and (batch == n_batches-1) else (batch+1)*batch_size
                self.X_batch = self.X_train[b:b_1]
                self.Y_batch = self.y_train[b:b_1]
                self.Z01 = self.X_batch.dot(self.W01) + self.b01
                self.A01 = self.__activation(hidden_layer_activation, self.Z01)
                self.Z02 = self.A01.dot(self.W12) + self.b12
                self.A02 = self.__activation("sigmoid", self.Z02)
                
                train_error = self.__error(self.A02, self.Y_batch)

                self.backward()

                self.W12 -= lr * self.A01.T.dot(self.d_error_W12)
                self.b12 -= lr * np.sum(self.d_error_W12, axis=0, keepdims = True)
                self.W01 -= lr * self.X_batch.T.dot(self.d_error_W01)

            # print(f"Error {error}")
            if epoch % 10 == 0:
                test_error, precision_metric, hamming_loss_metric, top3_metric = self.test(epoch)

                with summary_writer.as_default():
                    tf.summary.scalar("Top 3 accuracy", top3_metric, step=epoch)
                    tf.summary.scalar("Hamming Loss", hamming_loss_metric, step=epoch)
                    tf.summary.scalar("Precision", precision_metric, step=epoch)
                    tf.summary.scalar("Loss/Test", test_error, step=epoch)
                    tf.summary.scalar("Loss/Train", train_error, step=epoch)
                print(f"Epoch {epoch}, Train error {train_error}, Test error {test_error}, Precision {precision_metric}, top3metric {top3_metric}, Hamming loss {hamming_loss_metric}")
                # train_accuracy = self.accuracy(self.A02, self.Y_batch)
                # print(self.Y_batch, b, b_1)

                
        # log += f"{train_accuracy},"
    def backward(self):
        self.d_error_A02 = (self.A02 - self.Y_batch)/len(self.Y_batch)
        self.d_error_W12 = (self.d_error_A02) * self.__activation_derivative("sigmoid", self.Z02)
        
        self.d_error_W01 = (
            (self.d_error_W12).dot(self.W12.T) * self.__activation_derivative(self.hidden_layer_activation, self.Z01))
    
    
    # def __pre_process(self, embeddings, train_test_ratio = 0.3):
    #     # self.raw_data = self.raw_data.drop("Id",axis=1)
    #     # species_np = np.array(self.raw_data["Species"])
    #     # onehotencoder  = OneHotEncoder(sparse_output = False)
    #     # target_onehot = onehotencoder.fit_transform(species_np.reshape(-1,1))
    #     # self.raw_data=self.raw_data.drop("Species", axis=1)
        
    #     X = embeddings 
    #     y = self.emotions_onehot
        
    #     self.X_train, self.X_test, self.y_train, self.y_test = X, X, y, y
  

    def accuracy(self, y_pred, y_ground):
        y_train_predicted_classes = np.argmax(y_pred, axis = 1)
        y_train_ground_classes = np.argmax(y_ground, axis=1)
        accuracy = ((y_train_predicted_classes == y_train_ground_classes).sum())/len(y_train_predicted_classes)
        return accuracy
               
    def test(self, epoch):
        X, y = self.X_test, self.y_test
        Z01 = X.dot(self.W01) + self.b01
        A01 = self.__activation(self.hidden_layer_activation, Z01)
        Z02 = A01.dot(self.W12) + self.b12
        A02 = self.__activation("sigmoid", Z02)
        predictions = A02.round()
        error = self.__error(A02, y)
        precision_metric = precision(y, predictions, y.shape[1])
        hamming_loss_metric = hamming_loss(y, predictions)
        top3_metric = top3_accuracy(A02, y)
        return error, precision_metric, hamming_loss_metric, top3_metric


    def predict(self, X):
        Z01 = X.dot(self.W01) + self.b01
        A01 = self.__activation(self.hidden_layer_activation, Z01)
        Z02 = A01.dot(self.W12) + self.b12
        A02 = self.__activation("sigmoid", Z02)
        return A02

        
    def print_shapes(self):
        print(f"Xtrain shape {self.X_train.shape}")
        print(f"ytrain shape {self.y_train.shape}")
        print(f"Xtest shape {self.X_test.shape}")
        print(f"ytest shape {self.y_test.shape}")
    
        

# Preprocessing

In [9]:
test_data = pd.read_csv("../../data/EdmondsDance.csv")
train_data = pd.read_csv("../../data/train.csv")
train_data = train_data.rename(columns={"lyrics":"Lyrics"})

In [11]:
test_data.head()

Unnamed: 0,Song,Artists,Lyrics,Joy,Trust,Fear,Surprise,Sadness,Disgust,Anger,Anticipation
0,Apollo,"Hardwell, Amba Shepherd",Just one day in the life<br>So I can understan...,1,1,0,1,0,0,0,0
1,Lullaby,"R3HAB, Mike Williams","Hypnotized, this love out of me<br>Without you...",0,0,1,0,1,0,0,0
2,Melody (Tip Of My Tongue),Mike Williams,I stand a little too close<br>You stare a litt...,1,1,0,0,0,0,0,1
3,Take Me Home,"Cash Cash, Bebe Rexha",I'm falling to pieces<br>But I need this<br>Ye...,0,0,0,1,1,1,0,0
4,City of Dreams,"Dirty South, Alesso","Everything seems like a city of dreams,<br>I n...",0,0,0,1,1,0,0,0


In [13]:
train_data.describe()

Unnamed: 0,Joy,Trust,Fear,Surprise,Sadness,Disgust,Anger,Anticipation
count,1753.0,1753.0,1753.0,1753.0,1753.0,1753.0,1753.0,1753.0
mean,0.214489,0.149458,0.345693,0.147176,0.439247,0.284084,0.297205,0.304621
std,0.410585,0.356641,0.475729,0.354383,0.496437,0.451106,0.457158,0.460378
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [14]:
def load_embedding_model():
    """ Load GloVe Vectors
        Return:
            wv_from_bin: All 3000000 embeddings, each lengh 300
    """
    import gensim.downloader as api
    wv_from_bin = api.load("word2vec-google-news-300")
    print("Loaded vocab size %i" % len(list(wv_from_bin.index_to_key)))
    return wv_from_bin
wv_from_bin = load_embedding_model()

Loaded vocab size 3000000


In [15]:
def tokenize(lyric: str) -> list[str]:
    # lowercase the text, remove stop words, punctuation and keep only the words
    lyric.replace("<br>", "\n")
    tokens = nltk.tokenize.word_tokenize(lyric.lower())
    stop_words = stopwords.words("english") + list(string.punctuation)
    lemmatizer = WordNetLemmatizer()
    alpha_tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalpha() and token not in stop_words]

    return alpha_tokens

In [16]:
def vectorise(lyrics: str) -> np.ndarray:
    tokens = tokenize(lyrics)
    lyric_vector = np.zeros(300)
    for token in tokens:
        try:
            lyric_vector += wv_from_bin.get_vector(token.lower())
        except:
            continue
    return lyric_vector / np.linalg.norm(lyric_vector)

In [17]:
# go through each lyrics, tokenize it, vectorize each word, then combine all of them into single average vector and store it in the list

def get_embeddings(raw_data):
    lyrics = raw_data
    lyrics_embeddings = []
    unsupported_tokens = set()
    label_embedding_map = {} # dict{str: np.array([])}
    for lyric in tqdm(lyrics):
        lyric_vector = np.zeros(300)
        for token in tokenize(lyric):
            try:
                lyric_vector += wv_from_bin.get_vector(token.lower())
            except KeyError as e:
                # if the word is not present in the glove then key error is raised, so handle the exception and move on
                unsupported_tokens.add(token)
                continue
        lyrics_embeddings.append(lyric_vector)


    lyrics_embeddings = np.stack(lyrics_embeddings)
    scaled_lyrics_embeddings = lyrics_embeddings / np.linalg.norm(lyrics_embeddings, axis=1, keepdims=True)
    return scaled_lyrics_embeddings

In [18]:
test_data

Unnamed: 0,Song,Artists,Lyrics,Joy,Trust,Fear,Surprise,Sadness,Disgust,Anger,Anticipation
0,Apollo,"Hardwell, Amba Shepherd",Just one day in the life<br>So I can understan...,1,1,0,1,0,0,0,0
1,Lullaby,"R3HAB, Mike Williams","Hypnotized, this love out of me<br>Without you...",0,0,1,0,1,0,0,0
2,Melody (Tip Of My Tongue),Mike Williams,I stand a little too close<br>You stare a litt...,1,1,0,0,0,0,0,1
3,Take Me Home,"Cash Cash, Bebe Rexha",I'm falling to pieces<br>But I need this<br>Ye...,0,0,0,1,1,1,0,0
4,City of Dreams,"Dirty South, Alesso","Everything seems like a city of dreams,<br>I n...",0,0,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
519,Ashes To Ashes (Remix),"Tigerlily, Noah Neiman",ashes to ashes<br>we're falling down<br>so we ...,0,0,0,0,0,1,1,0
520,Midnight,Third Party,I want to hold you<br>I want to hold you<br>I ...,0,0,0,0,0,0,0,1
521,Chicago (Remix),"Win and Woo, Bryce Fox, SHADES",There's not enough room in here<br>For room fo...,0,0,0,1,1,0,0,0
522,Haunted,PATAY,I see you everywhere<br>I never moved on<br>Wi...,0,0,0,0,1,0,0,0


In [20]:
train_embeddings = get_embeddings(train_data["Lyrics"])
test_embeddings = get_embeddings(test_data["Lyrics"])

100%|██████████| 1753/1753 [00:05<00:00, 336.20it/s] 
100%|██████████| 524/524 [00:02<00:00, 260.52it/s]


# Train

In [75]:
#  raw_data, embeddings, hidden_neurons, test_df, test_embeddings
nn = NeuralNetwork(train_data, train_embeddings, hidden_neurons = 128, test_df = test_data, test_embeddings = test_embeddings)

In [76]:
nn.train(epochs=1000, lr=1e-2, hidden_layer_activation="relu", batch_size=32)

number of batches 54
Epoch 0, Train error 0.6883572400135539, Test error 0.6926479661510743, Precision 0.1120817832310925, top3metric 0.5515267175572519, Hamming loss 0.4749522900763359
Epoch 10, Train error 0.5730987282177529, Test error 0.6494314373158827, Precision 0.14830305927342258, top3metric 0.6622137404580153, Hamming loss 0.3139312977099237
Epoch 20, Train error 0.5151478570350969, Test error 0.6396027703338423, Precision 0.08575858778625954, top3metric 0.7137404580152672, Hamming loss 0.3139312977099237
Epoch 30, Train error 0.4841878743868999, Test error 0.6402867055990336, Precision 0.08575858778625954, top3metric 0.7156488549618321, Hamming loss 0.3139312977099237
Epoch 40, Train error 0.4653313595516975, Test error 0.6430544434996643, Precision 0.08575858778625954, top3metric 0.7194656488549618, Hamming loss 0.3139312977099237
Epoch 50, Train error 0.4517714644189524, Test error 0.6455172120262306, Precision 0.08575858778625954, top3metric 0.7156488549618321, Hamming los

In [77]:
test_error, precision_score, hamming_score, top3_accuracy = nn.test(1) #error, precision, hamming loss, top3 accuracy
print(f"Test error {test_error}, Precision {precision_score}, top3metric {top3_accuracy}, Hamming loss {hamming_score}")

Test error 0.6426446039868796, Precision 0.14937506618805785, top3metric 0.8816793893129771, Hamming loss 0.33134541984732824


In [392]:
song = """
Look at her face, it's a wonderful face  
And it means something special to me  
Look at the way that she smiles when she sees me  
How lucky can one fellow be?  
  
She's just my kind of girl, she makes me feel fine  
Who could ever believe that she could be mine?  
She's just my kind of girl, without her I'm blue  
And if she ever leaves me what could I do, what could I do?  
  
And when we go for a walk in the park  
And she holds me and squeezes my hand  
We'll go on walking for hours and talking  
About all the things that we plan  
  
She's just my kind of girl, she makes me feel fine  
Who could ever believe that she could be mine?  
She's just my kind of girl, without her I'm blue  
And if she ever leaves me what could I do, what could I do?
"""

In [393]:
def predict(lyrics: str) -> str:
    song_vector = vectorise(lyrics)[None,:]
    return nn.predict(song_vector)

In [394]:
probs = predict(song)

In [395]:
probs

array([[0.13788182, 0.17893933, 0.29581816, 0.06733809, 0.56789511,
        0.04453371, 0.06220842, 0.72906543]])

In [402]:
nn.labels

['Joy',
 'Trust',
 'Fear',
 'Surprise',
 'Sadness',
 'Disgust',
 'Anger',
 'Anticipation']

In [396]:
np.array(nn.labels)[np.argsort(probs[0])[::-1]]

array(['Anticipation', 'Sadness', 'Fear', 'Trust', 'Joy', 'Surprise',
       'Anger', 'Disgust'], dtype='<U12')

In [323]:
import pickle

with open("../embeddings/nn.pickle", "wb") as f:
    pickle.dump(nn, f)

In [138]:
import pickle


with open("../embeddings/nn.pickle", "rb") as f:
    a = pickle.load(f)

# Generating labels for spotify dataset

In [326]:
spotify_dataset = pd.read_csv("../../data/million_songs.csv")

In [333]:
spotify_embeddings = get_embeddings(spotify_dataset["text"])

  0%|          | 0/57650 [00:00<?, ?it/s]

100%|██████████| 57650/57650 [02:20<00:00, 411.08it/s]


In [398]:
probs = predict(song)

In [399]:
probs

array([[0.13788182, 0.17893933, 0.29581816, 0.06733809, 0.56789511,
        0.04453371, 0.06220842, 0.72906543]])

In [400]:
nn.predict(np.random.randn(2,300))

array([[8.65818075e-20, 2.86602776e-30, 9.99931780e-01, 1.00000000e+00,
        3.02954141e-46, 9.99999999e-01, 1.84947211e-16, 6.51872386e-01],
       [2.94239799e-21, 2.86853735e-17, 5.56229063e-02, 1.00000000e+00,
        1.29993595e-18, 7.74292586e-15, 4.46743071e-05, 4.32534962e-17]])

In [401]:
np.array(nn.labels)[np.argsort(probs[0])[::-1]]

array(['Anticipation', 'Sadness', 'Fear', 'Trust', 'Joy', 'Surprise',
       'Anger', 'Disgust'], dtype='<U12')

In [378]:
(nn.predict(spotify_embeddings), axis=1)

array([7, 7, 7, ..., 4, 4, 7], dtype=int64)

In [427]:
nn.labels

['Joy',
 'Trust',
 'Fear',
 'Surprise',
 'Sadness',
 'Disgust',
 'Anger',
 'Anticipation']

In [437]:
top3_labels = np.array(nn.labels)[np.argsort(nn.predict(spotify_embeddings), axis=1)[:, ::-1][:, :3]]

In [444]:
labels = []
for prediction in top3_labels:
    labels.append(", ".join(prediction))

In [445]:
labels

['Anticipation, Sadness, Fear',
 'Anticipation, Surprise, Joy',
 'Anticipation, Sadness, Joy',
 'Anticipation, Surprise, Joy',
 'Anticipation, Surprise, Joy',
 'Anticipation, Sadness, Fear',
 'Sadness, Anticipation, Trust',
 'Anticipation, Sadness, Trust',
 'Sadness, Anticipation, Fear',
 'Sadness, Fear, Joy',
 'Anticipation, Joy, Sadness',
 'Joy, Anticipation, Surprise',
 'Sadness, Anticipation, Fear',
 'Anticipation, Sadness, Joy',
 'Sadness, Anticipation, Fear',
 'Sadness, Anticipation, Fear',
 'Sadness, Trust, Anger',
 'Sadness, Anger, Anticipation',
 'Sadness, Anticipation, Surprise',
 'Joy, Disgust, Anticipation',
 'Sadness, Anticipation, Fear',
 'Sadness, Anticipation, Fear',
 'Sadness, Anticipation, Fear',
 'Anticipation, Surprise, Fear',
 'Anticipation, Sadness, Trust',
 'Sadness, Anticipation, Trust',
 'Anticipation, Joy, Surprise',
 'Sadness, Disgust, Surprise',
 'Sadness, Fear, Anticipation',
 'Anger, Sadness, Fear',
 'Anticipation, Sadness, Trust',
 'Anger, Fear, Joy',
 'S

In [446]:
spotify_dataset["top3_labels"] = labels

In [447]:
spotify_dataset

Unnamed: 0,artist,song,link,text,top3_labels
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA...","Anticipation, Sadness, Fear"
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen...","Anticipation, Surprise, Joy"
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...,"Anticipation, Sadness, Joy"
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...,"Anticipation, Surprise, Joy"
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...,"Anticipation, Surprise, Joy"
...,...,...,...,...,...
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...,"Sadness, Anticipation, Fear"
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...,"Sadness, Fear, Anger"
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...,"Sadness, Fear, Anticipation"
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...,"Sadness, Anticipation, Fear"


In [448]:
spotify_dataset.to_csv("spotify_predictions_nn.csv")

In [391]:
print(spotify_dataset.loc[0]["text"])

Look at her face, it's a wonderful face  
And it means something special to me  
Look at the way that she smiles when she sees me  
How lucky can one fellow be?  
  
She's just my kind of girl, she makes me feel fine  
Who could ever believe that she could be mine?  
She's just my kind of girl, without her I'm blue  
And if she ever leaves me what could I do, what could I do?  
  
And when we go for a walk in the park  
And she holds me and squeezes my hand  
We'll go on walking for hours and talking  
About all the things that we plan  
  
She's just my kind of girl, she makes me feel fine  
Who could ever believe that she could be mine?  
She's just my kind of girl, without her I'm blue  
And if she ever leaves me what could I do, what could I do?




In [389]:
np.array(nn.labels)[np.argsort(predict(spotify_dataset.loc[0]["text"]))[::-1]]

array([['Disgust', 'Anger', 'Surprise', 'Joy', 'Trust', 'Fear',
        'Sadness', 'Anticipation']], dtype='<U12')

In [383]:
np.array(nn.labels)[np.argsort(nn.predict(spotify_embeddings[0]))[::-1]]

array([['Disgust', 'Anger', 'Surprise', 'Joy', 'Trust', 'Fear',
        'Sadness', 'Anticipation']], dtype='<U12')

In [379]:

np.array(nn.labels)[np.argsort(nn.predict(spotify_embeddings), axis=1)[::-1]]

array([['Disgust', 'Anger', 'Trust', ..., 'Joy', 'Sadness',
        'Anticipation'],
       ['Disgust', 'Anger', 'Joy', ..., 'Fear', 'Anticipation',
        'Sadness'],
       ['Disgust', 'Surprise', 'Trust', ..., 'Anticipation', 'Fear',
        'Sadness'],
       ...,
       ['Disgust', 'Fear', 'Anger', ..., 'Joy', 'Sadness',
        'Anticipation'],
       ['Disgust', 'Anger', 'Fear', ..., 'Joy', 'Surprise',
        'Anticipation'],
       ['Disgust', 'Anger', 'Surprise', ..., 'Fear', 'Sadness',
        'Anticipation']], dtype='<U12')

In [325]:
nn.predict()

array([[0.49550327, 0.31631921, 0.41202535, 0.30108115, 0.30323528,
        0.65006798, 0.51942872, 0.20887872],
       [0.49550327, 0.31631921, 0.41202535, 0.30108115, 0.30323528,
        0.65006798, 0.51942872, 0.20887872]])