# SORTIFY

#### Topic Classification
sadness       --> 0  
violence      --> 1  
world/life    --> 2  
obscene       --> 3  
music         --> 4  
night/time    --> 5  
romantic      --> 6

#### Sentiment Analysis
positive  
neutral  
negative  

In [11]:
topic_dict = {
    0:"sadness",
    1:"violence",
    2:"world/life",
    3:"obscence",
    4:"music",
    5:"night/time",
    6:"romantic"
}

### libraries

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler
from torch.utils.data import TensorDataset, DataLoader
from torchtext.data import get_tokenizer
import torch.nn.functional as F
tokenizer = get_tokenizer("basic_english")
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [13]:
device = torch.device("cpu")

### word vectors for embedding

In [14]:
# Load GloVe 100D embeddings
glove = dict()
embedding_dim = 100

with open('glove.6B.100d.txt',encoding="utf8") as fp:
    for line in fp.readlines():
        records = line.split()
        word = records[0]
        vector_dimensions = np.asarray(records[1:], dtype='float32')
        glove [word] = vector_dimensions

In [15]:
# embedding words

def embedd_list(lst):
    n = len(lst)
    lst = lst[0]
    embedded_tens = np.zeros((n, 100))
    for i in range(n-len(lst),n):
        try:
            embedded_tens[i] = glove[lst[i-n+len(lst)]]
        except:
            pass       
    return embedded_tens

### topic classification model

In [16]:
class CNN(nn.Module):
    def __init__(self, embedding_dim, n_filters, filter_sizes, output_dim, dropout):
        super(CNN,self).__init__()
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        self.linear = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        self.dropout = nn.Dropout(dropout)
    def forward(self, input):
        conved = [F.relu(conv(input)).squeeze(3) for conv in self.convs]
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        cat = self.dropout(torch.cat(pooled, dim = 1))
        return self.linear(cat)

In [17]:
model = torch.load('genre_classifier_model.h5')
model

CNN(
  (convs): ModuleList(
    (0): Conv2d(1, 100, kernel_size=(2, 100), stride=(1, 1))
    (1): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
    (2): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
  )
  (linear): Linear(in_features=300, out_features=8, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

### lyrics and preprocessing (Embedded vectors)

In [18]:
from lyrics_extractor import SongLyrics
import re
 
# pass the GCS_API_KEY, GCS_ENGINE_ID
extract_lyrics = SongLyrics("AIzaSyAQgGgdjd16moeIgdv7a5baI7tNUc6sPGg","b7agaez7ax89f4caa")

### spotipy API

In [19]:
# SPOTIFY WEB API
cid = "da864090557945fadg14cb475474067e"
secret = "4c354sfh65a8sdg1abb70a1bd80d4074"
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

### Topic probabilities

In [20]:
def topic_probabilities(lyrics):
    topic_classifier_lyrics = lyrics.replace("\n"," ")
    # splitting into words
    words = []
    words.append(topic_classifier_lyrics.split())
    # word embedding
    map_object_words = map(embedd_list, words)
    words_embedded = list(map_object_words)
    words_embedded = np.stack(words_embedded)
    words_embedded = torch.from_numpy(words_embedded).to(device)
    words_embedded = words_embedded.view(len(words_embedded), 1, len(words[0]), 100).to(device)
    # prediction
    loader = DataLoader(TensorDataset(words_embedded), batch_size= 32)
    y_pred = model(words_embedded.float())
    print("\n"+track_name+"\n")
    for i,j in zip(F.softmax(y_pred, dim=1).detach().numpy()[0], topic_dict.values()):
        print(f"{j:12s} {i*100:.2f}%")

## APPLICATION

In [22]:
playlist_link = "https://open.spotify.com/playlist/3bdSB9sZ6xcG1EN9YYXg6z?si=2d34d65fcd554681"
method = "Neutral"
playlist_uri = playlist_link.split("/")[-1].split("?")[0]
items = sp.playlist_tracks(playlist_uri)["items"]
track_ids = []
for track in items:
    # Track name
    track_name = track["track"]["name"]

    # extract lyrics
    lyric = extract_lyrics.get_lyrics(track_name)
    lyric['lyrics'] = lyric['lyrics'].replace("\n\n", "\n")
    lyric['lyrics'] = lyric['lyrics'].replace(","," ")
    lyric['lyrics'] = re.sub("\[.*?\]", "", lyric['lyrics'])
    lyric['lyrics'] = re.sub("\(.*?\)","",lyric['lyrics'])
    lyric['lyrics'] = re.sub("  +", " ", lyric['lyrics']).strip()
    sentiment_analyzer_lyrics = lyric['lyrics']

    # Sentiment analyzer
    num_positive = 0
    num_negative = 0
    num_neutral = 0
    for sentence in sentiment_analyzer_lyrics.split("\n"):
        comp = sid.polarity_scores(sentence)
        comp = comp['compound']
        if comp >= 0.5:
            num_positive += 1
        elif -0.5 < comp < 0.5:
            num_neutral += 1
        else:
            num_negative += 1
    num_total = num_negative + num_neutral + num_positive
    percent_negative = (num_negative / float(num_total)) * 100
    percent_neutral = (num_neutral / float(num_total)) * 100
    percent_positive = (num_positive / float(num_total)) * 100
    if percent_positive > percent_negative and percent_positive > percent_neutral:
        if method == "Positive":
            topic_probabilities(sentiment_analyzer_lyrics)
    elif percent_negative>percent_neutral:
        if method == "Negative":
            topic_probabilities(sentiment_analyzer_lyrics)
    else:
        if method == "Neutral":
            topic_probabilities(sentiment_analyzer_lyrics)


I Can't Carry This Anymore

sadness      0.14%
violence     0.49%
world/life   89.67%
obscence     0.83%
music        0.16%
night/time   6.62%
romantic     2.09%

Goodbye

sadness      9.34%
violence     13.35%
world/life   22.24%
obscence     17.99%
music        10.63%
night/time   10.08%
romantic     12.82%

Break My Heart Again

sadness      0.64%
violence     4.70%
world/life   67.26%
obscence     6.89%
music        1.19%
night/time   12.51%
romantic     6.81%

Sexy And I Know It

sadness      1.33%
violence     3.59%
world/life   59.97%
obscence     8.69%
music        2.68%
night/time   14.69%
romantic     9.01%

Good Time

sadness      0.09%
violence     7.29%
world/life   11.35%
obscence     16.03%
music        1.81%
night/time   17.30%
romantic     46.13%

Summer of Love (Shawn Mendes & Tainy)

sadness      1.13%
violence     6.84%
world/life   36.11%
obscence     33.77%
music        1.90%
night/time   10.04%
romantic     10.17%

I Like Me Better

sadness      9.34%
violence  