In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing Data

In [2]:
Dataset = pd.read_csv('./Data/Training/TrainingDataset.csv')
Dataset = Dataset.drop(['track_name','artist_name','album_name','album_cover','release_date',
                        'preview_link','spotify_link','name', 'length','timeSignature'], axis=1)

In [3]:
Dataset.isnull().values.any()

True

In [4]:
Dataset.dropna(inplace=True)

In [5]:
Dataset = Dataset.sample(frac=1)
Dataset = Dataset.drop_duplicates(subset=["id"], keep=False)
Dataset

Unnamed: 0,id,playlist_mood,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence
3786,2vR23BjIzpLMtzsyKrM6Mb,Sad,0.650000,0.473,0.5620,0.000117,0.1200,-8.624,0.0451,74.653,0.316
4331,5anCkDvJ17aznvK5TED5uo,Angry,0.000297,0.580,0.9160,0.025900,0.1260,-4.358,0.0387,118.004,0.683
2747,5wMvw6fFXoeINA8v4Ru13n,Sleepy,0.831000,0.617,0.0373,0.941000,0.1050,-20.707,0.0415,111.705,0.432
4132,2UnVVvPuDs4I812CXFL1pr,Angry,0.000251,0.474,0.9890,0.000096,0.3280,-4.110,0.1980,109.982,0.106
3113,63M0QKW2WEhUPJO13b2Nga,Depressed,0.590000,0.595,0.5950,0.000020,0.1640,-7.102,0.0289,88.976,0.526
...,...,...,...,...,...,...,...,...,...,...,...
368,7gEbuXht8gYcA5OdN9zUpy,Excited,0.001980,0.595,0.8870,0.000039,0.0518,-3.607,0.0387,124.073,0.571
3188,0oYotQ2JWzGJ4VkGz3TfCk,Depressed,0.297000,0.402,0.4230,0.063000,0.3440,-11.150,0.0594,94.710,0.410
3909,0W9Xvd4Qx1aZPxEi94vgRY,Angry,0.000049,0.328,0.9390,0.006500,0.3370,-1.654,0.0423,185.971,0.329
2535,2C8lzS60apZalxtP83IM7w,Sleepy,0.428000,0.669,0.1440,0.883000,0.2940,-22.429,0.0761,185.921,0.280


In [6]:
X = Dataset.iloc[:, 2:]
y = Dataset.iloc[:, 1]

# Data Preprocessing

In [7]:
#Encoding
yLabels = y.copy()

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [8]:
DecodingMap = pd.DataFrame({'encoding': y,'mood': yLabels}).drop_duplicates().set_index('mood')

In [9]:
#train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [10]:
print(X_train)

      acousticness  danceability  energy  instrumentalness  liveness  \
2735      0.994000         0.242  0.0159          0.844000    0.1070   
529       0.503000         0.510  0.4800          0.000000    0.1060   
3387      0.903000         0.578  0.3190          0.000122    0.1090   
1794      0.023100         0.387  0.8430          0.000000    0.5030   
4084      0.000011         0.429  0.9730          0.003810    0.0776   
...            ...           ...     ...               ...       ...   
2254      0.162000         0.550  0.5960          0.004810    0.0805   
718       0.943000         0.439  0.0695          0.000006    0.1330   
678       0.977000         0.496  0.1330          0.012300    0.1190   
2854      0.980000         0.187  0.0213          0.970000    0.0990   
3702      0.719000         0.319  0.5520          0.000000    0.1440   

      loudness  speechiness    tempo  valence  
2735   -33.716       0.0561  138.096   0.1100  
529     -6.567       0.0452  182.862   

In [11]:
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0, max_iter=2000)
classifier.fit(X_train, y_train)

LogisticRegression(max_iter=2000, random_state=0)

In [13]:
y_pred = classifier.predict(X_test)

In [14]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.5616083009079118

# KNN

In [15]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier()

In [16]:
y_pred = classifier.predict(X_test)

In [17]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.5603112840466926

# SVM

In [18]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)

SVC(kernel='linear', random_state=0)

In [19]:
y_pred = classifier.predict(X_test)

In [20]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.5836575875486382

# Kernel SVM

In [21]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)

SVC(random_state=0)

In [22]:
y_pred = classifier.predict(X_test)

In [23]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.6212710765239948

# Naive Bayes 

In [24]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB()

In [25]:
y_pred = classifier.predict(X_test)

In [26]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.490272373540856

# Descision Tree

In [27]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy', random_state=0)

In [28]:
y_pred = classifier.predict(X_test)

In [29]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.51621271076524

# Random Forest

In [30]:
from sklearn.ensemble import RandomForestClassifier
RFclassifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
RFclassifier.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)

In [31]:
y_pred = RFclassifier.predict(X_test)

In [32]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.6057068741893644

# Deploying the model

In [33]:
Test_Data = pd.read_csv('../Dataset.csv')
Test_Data = Test_Data.drop_duplicates(subset=["id"], keep='first')
X = Test_Data[['acousticness','danceability','energy','instrumentalness','liveness','loudness','speechiness','tempo','valence']]

In [34]:
X = sc.transform(X)
X

array([[-0.73156701,  0.50698796,  0.19847943, ..., -0.73145496,
        -0.53370286, -0.25088996],
       [ 0.56809905,  0.02932305, -0.49080638, ..., -0.38269871,
         0.06236629, -1.0641939 ],
       [ 0.57068288,  0.58497407, -0.28951053, ..., -0.67529929,
        -0.75500962,  0.21038691],
       ...,
       [-1.07831689, -0.31674031,  1.34525583, ...,  0.79804808,
        -0.70970248,  0.22657206],
       [ 1.24764612,  0.01957479, -1.16179257, ..., -0.33540973,
         0.43540658, -0.20638078],
       [-0.57653726,  1.61829001,  0.93656424, ...,  1.16749326,
         0.1849492 ,  1.85722625]])

In [35]:
RFclassifier.predict(X)

array([2, 7, 3, ..., 0, 4, 6])

In [36]:
Test_Data['playlist_mood'] = RFclassifier.predict(X)
Test_Data

Unnamed: 0,id,genre,track_name,artist_name,album_name,album_cover,release_date,preview_link,spotify_link,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,playlist_mood
0,4gs07VlJST4bdxGbBsXVue,acoustic,Heartbreak Warfare,John Mayer,Battle Studies,https://i.scdn.co/image/ab67616d0000b2731e3dbe...,2009-11-13,https://p.scdn.co/mp3-preview/cce088114300e08d...,https://open.spotify.com/track/4gs07VlJST4bdxG...,0.1910,0.624,0.554,0.001310,0.2990,-8.113,0.0225,97.031,0.3110,2
1,0SuG9kyzGRpDqrCWtgD6Lq,acoustic,Give Me Love,Ed Sheeran,+,https://i.scdn.co/image/ab67616d0000b273ed139c...,2011-09-09,https://p.scdn.co/mp3-preview/1750e0969080492c...,https://open.spotify.com/track/0SuG9kyzGRpDqrC...,0.6940,0.526,0.328,0.000000,0.1120,-9.864,0.0461,116.068,0.1100,7
2,0vWjNkwgkTnyzfeLdTKWJ7,acoustic,Roar,"Alex Goot,Sam Tsui",Roar,https://i.scdn.co/image/ab67616d0000b273742fa6...,2013-09-17,https://p.scdn.co/mp3-preview/bf9138f2eb6dbed3...,https://open.spotify.com/track/0vWjNkwgkTnyzfe...,0.6950,0.640,0.394,0.000000,0.0741,-7.258,0.0263,89.963,0.4250,3
3,1NdgDGYG4J827IvfeDgF4o,acoustic,Dear Mr. President,P!nk,The People Speak,https://i.scdn.co/image/ab67616d0000b27380f5f9...,2009-01-01,,https://open.spotify.com/track/1NdgDGYG4J827Iv...,0.8490,0.476,0.299,0.000000,0.1130,-9.154,0.0385,128.039,0.2970,7
4,54KFQB6N4pn926IUUYZGzK,acoustic,To Build A Home,"The Cinematic Orchestra,Patrick Watson",Ma Fleur,https://i.scdn.co/image/ab67616d0000b273e91011...,2007-05-07,https://p.scdn.co/mp3-preview/9f30342a6568f55c...,https://open.spotify.com/track/54KFQB6N4pn926I...,0.8850,0.264,0.122,0.349000,0.0940,-15.399,0.0349,148.658,0.0735,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11152,5EcaGTbrp1SkFrnrpAoGbm,world-music,De Nua feat. Ana Moura,"Sara Tavares feat. Ana Moura,Sara Tavares,Ana ...",Balancé,https://i.scdn.co/image/ab67616d0000b273cfcdea...,2005-10-18,,https://open.spotify.com/track/5EcaGTbrp1SkFrn...,0.8900,0.599,0.299,0.092300,0.0788,-14.672,0.0873,113.604,0.3160,4
11153,131sSSMin9GSNXBMCPsJpv,world-music,Vision,Mo'kalamity,Warriors of light,https://i.scdn.co/image/ab67616d0000b273cc5fe2...,2007-02-27,https://p.scdn.co/mp3-preview/e23062ec725a33c0...,https://open.spotify.com/track/131sSSMin9GSNXB...,0.0266,0.884,0.471,0.000046,0.1400,-6.919,0.0425,107.027,0.6460,2
11154,3CCGsOj2efotChiScMmCWg,world-music,A Minha Menina,Os Mutantes,"""Os Mutantes""",https://i.scdn.co/image/ab67616d0000b27369a068...,1968-01-01,,https://open.spotify.com/track/3CCGsOj2efotChi...,0.0568,0.455,0.930,0.000000,0.4640,-3.210,0.1260,91.410,0.4290,0
11155,5i2sgqnzaxYpvXmGAB592h,world-music,Corcovado - Quiet Nights Of Quiet Stars,"Stan Getz,João Gilberto,Astrud Gilberto,Antôni...",Getz/Gilberto (Classics International Version),https://i.scdn.co/image/ab67616d0000b2738d9a27...,1964-04,,https://open.spotify.com/track/5i2sgqnzaxYpvXm...,0.9570,0.524,0.108,0.001230,0.1390,-18.361,0.0493,127.982,0.3220,4


In [37]:
import json
d = DecodingMap.to_dict()['encoding']
d = dict([(value,key) for key, value in d.items()])
d

{7: 'Sad',
 0: 'Angry',
 8: 'Sleepy',
 4: 'Depressed',
 6: 'Happy',
 1: 'Calm',
 2: 'Content',
 5: 'Excited',
 3: 'Delighted'}

In [38]:
Test_Data['playlist_mood'] = Test_Data['playlist_mood'].map(d)

In [39]:
Test_Data['playlist_mood'].unique()

array(['Content', 'Sad', 'Delighted', 'Calm', 'Angry', 'Depressed',
       'Happy', 'Excited', 'Sleepy'], dtype=object)

In [40]:
ExcitedPlaylist = Test_Data.loc[Test_Data['playlist_mood'] == "Excited"]
HappyPlaylist = Test_Data.loc[Test_Data['playlist_mood'] == "Happy"]
DelightedPlaylist = Test_Data.loc[Test_Data['playlist_mood'] == "Delighted"]
ContentPlaylist = Test_Data.loc[Test_Data['playlist_mood'] == "Content"]
CalmPlaylist = Test_Data.loc[Test_Data['playlist_mood'] == "Calm"]
SadPlaylist = Test_Data.loc[Test_Data['playlist_mood'] == "Sad"]
DepressedPlaylist = Test_Data.loc[Test_Data['playlist_mood'] == "Depressed"]
SleepyPlaylist = Test_Data.loc[Test_Data['playlist_mood'] == "Sleepy"]
AngryPlaylist = Test_Data.loc[Test_Data['playlist_mood'] == "Angry"]

In [41]:
import os
path = './Data/Models/M1Playlists'
if not os.path.exists(path):
    os.makedirs(path)

csvPath = os.path.join(path, 'Excited.csv')
ExcitedPlaylist.to_csv(csvPath, index=False)

csvPath = os.path.join(path, 'Delighted.csv')
DelightedPlaylist.to_csv(csvPath, index=False)

csvPath = os.path.join(path, 'Happy.csv')
HappyPlaylist.to_csv(csvPath, index=False)

csvPath = os.path.join(path, 'Content.csv')
ContentPlaylist.to_csv(csvPath, index=False)

csvPath = os.path.join(path, 'Calm.csv')
CalmPlaylist.to_csv(csvPath, index=False)

csvPath = os.path.join(path, 'Sleepy.csv')
SleepyPlaylist.to_csv(csvPath, index=False)

csvPath = os.path.join(path, 'Depressed.csv')
DepressedPlaylist.to_csv(csvPath, index=False)

csvPath = os.path.join(path, 'Sad.csv')
SadPlaylist.to_csv(csvPath, index=False)

csvPath = os.path.join(path, 'Angry.csv')
AngryPlaylist.to_csv(csvPath, index=False)