In [1]:
#Script to obtain data 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

#Libraries to create the multiclass model
from keras.models import Sequential
from keras.layers import Dense

from keras.utils import np_utils
#Import tensorflow and disable the v2 behavior and eager mode
import tensorflow as tf
# tf.compat.v1.disable_eager_execution()
# tf.compat.v1.disable_v2_behavior()
from sklearn.model_selection import GridSearchCV


#Library to validate the model
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.preprocessing import LabelEncoder,MinMaxScaler,StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import metrics 
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, classification_report

In [2]:
df = pd.read_csv("songs_default.csv")

In [3]:
# encoding y variable 'mood' to be 1 for postive else 0 for negative
# drop duplicate name with the same mood

print(f'before drop: {df.shape}')
df['mood'] = df['mood'].replace(['calm', 'happy','angry','depress'], [1,1,0,0])
df.drop_duplicates(subset=['name', 'artist', 'mood'], keep='last',inplace=True)
print(f'after drop: {df.shape}')

before drop: (796, 20)
after drop: (788, 20)


# Preparing Data

In [4]:
col_features = df.columns[7:-3]
scaler = MinMaxScaler()
scaler_fit = scaler.fit(df[col_features])
df1 = scaler.transform(df[col_features])

X = pd.DataFrame(df1, columns = col_features)
y = df[['mood']]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=15)

# Running different ML Model

In [5]:
score_list = []
def test_scores(y_test,y_pred):
    # test recall and f1-score
    test_acc = metrics.accuracy_score(y_test, y_pred) 
    test_prec = metrics.precision_score(y_test, y_pred)
    test_recall = metrics.recall_score(y_test, y_pred) 
    test_f1 = metrics.f1_score(y_test, y_pred)
    scores = {'test' : [round(test_acc, 5), round(test_prec, 5), 
                        round(test_recall, 5), round(test_f1, 5)]}
    return scores

def train_scores(y_train,y_pred):
    # test recall and f1-score
    test_acc = metrics.accuracy_score(y_train, y_pred) 
    test_prec = metrics.precision_score(y_train, y_pred)
    test_recall = metrics.recall_score(y_train, y_pred) 
    test_f1 = metrics.f1_score(y_train, y_pred)
    scores = {'train' : [round(test_acc, 5), round(test_prec, 5), 
                        round(test_recall, 5), round(test_f1, 5)]}
    return scores

def runway_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train.values.ravel())
    y_pred = model.predict(X_test)
    scores = test_scores(y_test,y_pred)
    y_pred_train = model.predict(X_train)
    scores['train_score'] = train_scores(y_train,y_pred_train)
    return scores

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier 
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import StackingClassifier
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC

models = {
    'LinearSVC': LinearSVC(random_state=15),
    'Logistic Regression': LogisticRegression(max_iter= 10000, random_state= 15),
    'Decision Tree': DecisionTreeClassifier(random_state=15),
    'Random Forest': RandomForestClassifier(random_state= 15),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Neural Network': MLPClassifier(max_iter= 10000, random_state= 15),
    'Gradient Boosting': GradientBoostingClassifier(random_state= 15),
    'LightGBM': LGBMClassifier(random_state= 15),
    'CatBoost': CatBoostClassifier(verbose=0),
    'GNB': GaussianNB(),
    'Random Forest': RandomForestClassifier(random_state=15),
    'Adaboost': AdaBoostClassifier(random_state=15),
    'Adaboost_LSVC': AdaBoostClassifier(base_estimator=LinearSVC(random_state=15),random_state=15,algorithm='SAMME'),
}    

In [7]:
score_list = []

for name, model in models.items():
    temp_dict = {}
    temp_dict[name] = runway_model(model, X_train, X_test, y_train, y_test)
    score_list.append(temp_dict)
score_list

[{'LinearSVC': {'test': [0.82278, 0.8, 0.8, 0.8],
   'train_score': {'train': [0.79683, 0.81373, 0.77812, 0.79553]}}},
 {'Logistic Regression': {'test': [0.79114, 0.78462, 0.72857, 0.75556],
   'train_score': {'train': [0.78889, 0.82578, 0.74062, 0.78089]}}},
 {'Decision Tree': {'test': [0.6962, 0.64865, 0.68571, 0.66667],
   'train_score': {'train': [0.99683, 1.0, 0.99375, 0.99687]}}},
 {'Random Forest': {'test': [0.76582, 0.73239, 0.74286, 0.73759],
   'train_score': {'train': [0.99683, 0.99688, 0.99688, 0.99688]}}},
 {'K-Nearest Neighbors': {'test': [0.76582, 0.71429, 0.78571, 0.7483],
   'train_score': {'train': [0.83968, 0.83692, 0.85, 0.84341]}}},
 {'Neural Network': {'test': [0.76582, 0.69412, 0.84286, 0.76129],
   'train_score': {'train': [0.80952, 0.79762, 0.8375, 0.81707]}}},
 {'Gradient Boosting': {'test': [0.78481, 0.75714, 0.75714, 0.75714],
   'train_score': {'train': [0.95873, 0.96519, 0.95312, 0.95912]}}},
 {'LightGBM': {'test': [0.78481, 0.74324, 0.78571, 0.76389],
   

# Hyperparameter tuning best ensemble model

In [8]:
from sklearn.model_selection import GridSearchCV

param_grid = [{'n_estimators' : [10,100,1000,10000],
}]
best_clf = GridSearchCV(CatBoostClassifier(), param_grid = param_grid, scoring= 'f1', 
                   cv = 5, verbose = True, n_jobs = -1)

In [9]:
# Fit on data
best_clf.fit(X_train, y_train.values.ravel())

Fitting 5 folds for each of 4 candidates, totalling 20 fits


1 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "D:\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "D:\Anaconda3\lib\site-packages\catboost\core.py", line 5128, in fit
    self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
  File "D:\Anaconda3\lib\site-packages\catboost\core.py", line 2355, in _fit
    self._train(
  File "D:\Anaconda3\lib\site-packages\catboost\core.py", line 1759, in _train
    self._object._train(train_pool, test_

Learning rate set to 0.069864
0:	learn: 0.6745992	total: 2.06ms	remaining: 204ms
1:	learn: 0.6567838	total: 4.09ms	remaining: 200ms
2:	learn: 0.6416843	total: 6.39ms	remaining: 207ms
3:	learn: 0.6248493	total: 8.49ms	remaining: 204ms
4:	learn: 0.6103756	total: 10.6ms	remaining: 201ms
5:	learn: 0.5942556	total: 12.7ms	remaining: 198ms
6:	learn: 0.5805402	total: 14.8ms	remaining: 197ms
7:	learn: 0.5690003	total: 16.9ms	remaining: 194ms
8:	learn: 0.5597138	total: 18.9ms	remaining: 192ms
9:	learn: 0.5492111	total: 21.1ms	remaining: 190ms
10:	learn: 0.5405728	total: 23.1ms	remaining: 187ms
11:	learn: 0.5327103	total: 25.1ms	remaining: 184ms
12:	learn: 0.5238201	total: 27.2ms	remaining: 182ms
13:	learn: 0.5158782	total: 29.2ms	remaining: 179ms
14:	learn: 0.5086837	total: 31.1ms	remaining: 176ms
15:	learn: 0.5031294	total: 32.9ms	remaining: 173ms
16:	learn: 0.4963063	total: 34.7ms	remaining: 169ms
17:	learn: 0.4890296	total: 36.6ms	remaining: 167ms
18:	learn: 0.4838765	total: 38.4ms	remaining

In [10]:
print(best_clf.best_params_)

{'n_estimators': 100}


In [11]:
model = CatBoostClassifier()
score = runway_model(model,X_train, X_test, y_train, y_test)
score

Learning rate set to 0.008458
0:	learn: 0.6871946	total: 2.56ms	remaining: 2.56s
1:	learn: 0.6810464	total: 4.73ms	remaining: 2.36s
2:	learn: 0.6762756	total: 7.4ms	remaining: 2.46s
3:	learn: 0.6701238	total: 9.84ms	remaining: 2.45s
4:	learn: 0.6650019	total: 12.1ms	remaining: 2.4s
5:	learn: 0.6608685	total: 14.4ms	remaining: 2.39s
6:	learn: 0.6552826	total: 17.1ms	remaining: 2.42s
7:	learn: 0.6501994	total: 19.5ms	remaining: 2.42s
8:	learn: 0.6455755	total: 22.1ms	remaining: 2.43s
9:	learn: 0.6414217	total: 25ms	remaining: 2.48s
10:	learn: 0.6370172	total: 28.3ms	remaining: 2.55s
11:	learn: 0.6323704	total: 31.3ms	remaining: 2.58s
12:	learn: 0.6271378	total: 34.3ms	remaining: 2.6s
13:	learn: 0.6237204	total: 37.1ms	remaining: 2.62s
14:	learn: 0.6189174	total: 39.9ms	remaining: 2.62s
15:	learn: 0.6153860	total: 42.7ms	remaining: 2.62s
16:	learn: 0.6116996	total: 45.5ms	remaining: 2.63s
17:	learn: 0.6074665	total: 48.2ms	remaining: 2.63s
18:	learn: 0.6033842	total: 50.8ms	remaining: 2.6

181:	learn: 0.3602997	total: 455ms	remaining: 2.04s
182:	learn: 0.3596338	total: 458ms	remaining: 2.04s
183:	learn: 0.3588165	total: 460ms	remaining: 2.04s
184:	learn: 0.3577712	total: 462ms	remaining: 2.04s
185:	learn: 0.3569885	total: 464ms	remaining: 2.03s
186:	learn: 0.3565171	total: 467ms	remaining: 2.03s
187:	learn: 0.3557217	total: 469ms	remaining: 2.02s
188:	learn: 0.3551886	total: 471ms	remaining: 2.02s
189:	learn: 0.3546788	total: 473ms	remaining: 2.02s
190:	learn: 0.3539721	total: 476ms	remaining: 2.01s
191:	learn: 0.3533864	total: 478ms	remaining: 2.01s
192:	learn: 0.3525403	total: 480ms	remaining: 2.01s
193:	learn: 0.3517912	total: 483ms	remaining: 2.01s
194:	learn: 0.3515028	total: 486ms	remaining: 2s
195:	learn: 0.3507685	total: 488ms	remaining: 2s
196:	learn: 0.3502596	total: 490ms	remaining: 2s
197:	learn: 0.3497928	total: 493ms	remaining: 2s
198:	learn: 0.3491726	total: 495ms	remaining: 1.99s
199:	learn: 0.3485595	total: 497ms	remaining: 1.99s
200:	learn: 0.3477804	to

357:	learn: 0.2797629	total: 894ms	remaining: 1.6s
358:	learn: 0.2793849	total: 897ms	remaining: 1.6s
359:	learn: 0.2792120	total: 900ms	remaining: 1.6s
360:	learn: 0.2788799	total: 903ms	remaining: 1.6s
361:	learn: 0.2783771	total: 906ms	remaining: 1.6s
362:	learn: 0.2780070	total: 909ms	remaining: 1.59s
363:	learn: 0.2777193	total: 913ms	remaining: 1.59s
364:	learn: 0.2773292	total: 916ms	remaining: 1.59s
365:	learn: 0.2770003	total: 919ms	remaining: 1.59s
366:	learn: 0.2766749	total: 922ms	remaining: 1.59s
367:	learn: 0.2764250	total: 924ms	remaining: 1.59s
368:	learn: 0.2759862	total: 926ms	remaining: 1.58s
369:	learn: 0.2755899	total: 929ms	remaining: 1.58s
370:	learn: 0.2753464	total: 931ms	remaining: 1.58s
371:	learn: 0.2750068	total: 933ms	remaining: 1.57s
372:	learn: 0.2747013	total: 936ms	remaining: 1.57s
373:	learn: 0.2742484	total: 938ms	remaining: 1.57s
374:	learn: 0.2738283	total: 940ms	remaining: 1.57s
375:	learn: 0.2735639	total: 942ms	remaining: 1.56s
376:	learn: 0.273

520:	learn: 0.2331335	total: 1.31s	remaining: 1.21s
521:	learn: 0.2328371	total: 1.31s	remaining: 1.2s
522:	learn: 0.2326540	total: 1.31s	remaining: 1.2s
523:	learn: 0.2324438	total: 1.32s	remaining: 1.2s
524:	learn: 0.2320701	total: 1.32s	remaining: 1.19s
525:	learn: 0.2319041	total: 1.32s	remaining: 1.19s
526:	learn: 0.2316915	total: 1.32s	remaining: 1.19s
527:	learn: 0.2313981	total: 1.33s	remaining: 1.19s
528:	learn: 0.2311669	total: 1.33s	remaining: 1.18s
529:	learn: 0.2310113	total: 1.33s	remaining: 1.18s
530:	learn: 0.2307748	total: 1.33s	remaining: 1.18s
531:	learn: 0.2306380	total: 1.34s	remaining: 1.18s
532:	learn: 0.2304940	total: 1.34s	remaining: 1.17s
533:	learn: 0.2302122	total: 1.34s	remaining: 1.17s
534:	learn: 0.2299385	total: 1.34s	remaining: 1.17s
535:	learn: 0.2296659	total: 1.35s	remaining: 1.17s
536:	learn: 0.2294768	total: 1.35s	remaining: 1.16s
537:	learn: 0.2292260	total: 1.35s	remaining: 1.16s
538:	learn: 0.2290762	total: 1.35s	remaining: 1.16s
539:	learn: 0.2

690:	learn: 0.1948254	total: 1.73s	remaining: 772ms
691:	learn: 0.1945871	total: 1.73s	remaining: 770ms
692:	learn: 0.1944408	total: 1.73s	remaining: 767ms
693:	learn: 0.1942633	total: 1.73s	remaining: 764ms
694:	learn: 0.1941001	total: 1.74s	remaining: 762ms
695:	learn: 0.1939601	total: 1.74s	remaining: 759ms
696:	learn: 0.1937286	total: 1.74s	remaining: 756ms
697:	learn: 0.1935719	total: 1.74s	remaining: 754ms
698:	learn: 0.1933065	total: 1.74s	remaining: 751ms
699:	learn: 0.1931482	total: 1.75s	remaining: 748ms
700:	learn: 0.1929576	total: 1.75s	remaining: 746ms
701:	learn: 0.1927697	total: 1.75s	remaining: 743ms
702:	learn: 0.1925657	total: 1.75s	remaining: 741ms
703:	learn: 0.1923003	total: 1.75s	remaining: 738ms
704:	learn: 0.1920186	total: 1.76s	remaining: 736ms
705:	learn: 0.1918721	total: 1.76s	remaining: 733ms
706:	learn: 0.1916403	total: 1.76s	remaining: 731ms
707:	learn: 0.1914467	total: 1.76s	remaining: 728ms
708:	learn: 0.1912251	total: 1.77s	remaining: 726ms
709:	learn: 

850:	learn: 0.1663749	total: 2.11s	remaining: 370ms
851:	learn: 0.1663234	total: 2.12s	remaining: 368ms
852:	learn: 0.1661293	total: 2.12s	remaining: 365ms
853:	learn: 0.1660113	total: 2.12s	remaining: 363ms
854:	learn: 0.1658939	total: 2.12s	remaining: 360ms
855:	learn: 0.1657800	total: 2.13s	remaining: 358ms
856:	learn: 0.1655855	total: 2.13s	remaining: 355ms
857:	learn: 0.1654477	total: 2.13s	remaining: 353ms
858:	learn: 0.1653217	total: 2.13s	remaining: 350ms
859:	learn: 0.1651195	total: 2.13s	remaining: 347ms
860:	learn: 0.1649203	total: 2.14s	remaining: 345ms
861:	learn: 0.1647300	total: 2.14s	remaining: 342ms
862:	learn: 0.1646042	total: 2.14s	remaining: 340ms
863:	learn: 0.1644569	total: 2.14s	remaining: 338ms
864:	learn: 0.1643350	total: 2.15s	remaining: 335ms
865:	learn: 0.1641749	total: 2.15s	remaining: 333ms
866:	learn: 0.1640907	total: 2.15s	remaining: 330ms
867:	learn: 0.1640104	total: 2.15s	remaining: 328ms
868:	learn: 0.1639138	total: 2.16s	remaining: 325ms
869:	learn: 

{'test': [0.81013, 0.77027, 0.81429, 0.79167],
 'train_score': {'train': [0.99048, 0.99684, 0.98438, 0.99057]}}

Tuned model is not better as CatBoost default setting is able to identify the best solution for it. as show above, train_score is 99% based on default setting

# Keras Model

In [12]:
# Keras
def base_model():
    #Create the model
    model = Sequential()
    #Add 1 layer with 32 nodes  with relu function
#     model.add(Dense(32,input_dim=X_train.shape[1],activation='relu'))
    #Add 1 layer with 16 nodes with relu function
    model.add(Dense(16,input_dim=X_train.shape[1],activation='relu'))
    #Add 1 layer with 8 nodes with relu function
    model.add(Dense(8,input_dim=X_train.shape[1],activation='relu'))
    #Add 1 layer with output 2 and sigmoid function
    model.add(Dense(1, activation='sigmoid'))
    #Compile the model using sigmoid loss function and adam optim
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# # fix random seed for reproducibility
# seed = 20
# tf.random.set_seed(seed)

# model = base_model()
# history = model.fit(X_train, y_train, epochs=100, batch_size=100, validation_data = (X_test,y_test))
# y_pred = model.predict(X_test)
# y_pred =(y_pred>0.5)
# scores = test_scores(y_test,y_pred)
# score_list.append({"Keras":scores})
# print(scores)

# Hyperparamenter Tuning for CNN Model (Keras)

In [13]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier

# fix random seed for reproducibility
seed = 20
tf.random.set_seed(seed)

# create model
model = KerasClassifier(model=base_model, verbose=0)
# define the grid search parameters
batch_size = [100, 200, 300, 400]
epochs = [100, 200, 300]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train.to_numpy(), y_train.to_numpy())
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print(f'Best estimator: {grid_result.best_estimator_}')

Best: 0.790476 using {'batch_size': 100, 'epochs': 200}
Best estimator: KerasClassifier(
	model=<function base_model at 0x00000198678C6D30>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=100
	validation_batch_size=None
	verbose=0
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=200
	class_weight=None
)


In [14]:
# Update model accordingly to best param Best: 0.790476 using {'batch_size': 100, 'epochs': 200}
# fix random seed for reproducibility
seed = 20
tf.random.set_seed(seed)

model = base_model()
model.fit(X_train, y_train, epochs=100, batch_size=200, validation_data = (X_test,y_test))
y_pred = model.predict(X_test)
y_pred =(y_pred>0.5)
scores = test_scores(y_test,y_pred)
score_list.append({"Keras_Tuned":scores})
print(scores)

y_pred_train = model.predict(X_train)
y_pred_train =(y_pred_train>0.5)
scores = train_scores(y_train,y_pred_train)
score_list.append({"Keras_Tuned":scores})
print(scores)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200


Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200


Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200


Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
{'test': [0.79747, 0.75, 0.81429, 0.78082]}
{'train': [0.80476, 0.81672, 0.79375, 0.80507]}


# Overall models score

In [15]:
score_list

[{'LinearSVC': {'test': [0.82278, 0.8, 0.8, 0.8],
   'train_score': {'train': [0.79683, 0.81373, 0.77812, 0.79553]}}},
 {'Logistic Regression': {'test': [0.79114, 0.78462, 0.72857, 0.75556],
   'train_score': {'train': [0.78889, 0.82578, 0.74062, 0.78089]}}},
 {'Decision Tree': {'test': [0.6962, 0.64865, 0.68571, 0.66667],
   'train_score': {'train': [0.99683, 1.0, 0.99375, 0.99687]}}},
 {'Random Forest': {'test': [0.76582, 0.73239, 0.74286, 0.73759],
   'train_score': {'train': [0.99683, 0.99688, 0.99688, 0.99688]}}},
 {'K-Nearest Neighbors': {'test': [0.76582, 0.71429, 0.78571, 0.7483],
   'train_score': {'train': [0.83968, 0.83692, 0.85, 0.84341]}}},
 {'Neural Network': {'test': [0.76582, 0.69412, 0.84286, 0.76129],
   'train_score': {'train': [0.80952, 0.79762, 0.8375, 0.81707]}}},
 {'Gradient Boosting': {'test': [0.78481, 0.75714, 0.75714, 0.75714],
   'train_score': {'train': [0.95873, 0.96519, 0.95312, 0.95912]}}},
 {'LightGBM': {'test': [0.78481, 0.74324, 0.78571, 0.76389],
   

# Spotify API to get songs features and predict

In [16]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="3f27ae63f74843ba9f372448388ae599",
                                                           client_secret="916b7434b6924e0ab3d7c859e40d9d22"))

In [17]:
def get_songs_features(ids):
    meta = sp.track(ids)
    features = sp.audio_features(ids)

    # meta
    name = meta['name']
    album = meta['album']['name']
    artist = meta['album']['artists'][0]['name']
    release_date = meta['album']['release_date']
    length = meta['duration_ms']
    popularity = meta['popularity']
    ids =  meta['id']

    # features
    acousticness = features[0]['acousticness']
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    valence = features[0]['valence']
    loudness = features[0]['loudness']
    speechiness = features[0]['speechiness']
    tempo = features[0]['tempo']
    key = features[0]['key']
    time_signature = features[0]['time_signature']

    track = [name, album, artist, ids, release_date, popularity, length, danceability, acousticness,
            energy, instrumentalness, liveness, valence, loudness, speechiness, tempo, key, time_signature]
    columns = ['name','album','artist','id','release_date','popularity','length','danceability','acousticness','energy','instrumentalness',
                'liveness','valence','loudness','speechiness','tempo','key','time_signature']
    return track,columns

# Fitting best model and exporting as pkl model file for application

In [18]:
model = LinearSVC(random_state=15)
model.fit(X_train, y_train.values.ravel())
y_pred = model.predict(X_test) # make prediction

# test recall and f1-score
scores = test_scores(y_test,y_pred)
print(scores)

import pickle
# save the model to disk
filename = 'LSVC_best.pkl'
pickle.dump(model, open(filename, 'wb'))

{'test': [0.82278, 0.8, 0.8, 0.8]}


# Use clean unscaled dataset to predict using pipeline

In [19]:
col_features = df.columns[7:-3]
# scaler = MinMaxScaler()
# df1 = scaler.fit_transform(df[col_features])

X = pd.DataFrame(df, columns = col_features)
y = df[['mood']]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=15)

In [20]:
# Return Postive or Negative of the track by artist and track
def get_song(artist,track,model):
    # pipline
    seed = 20
    tf.random.set_seed(seed)
    
#     pip = Pipeline([('minmaxscaler',MinMaxScaler()),('keras',model)])
    pip = Pipeline([('minmaxscaler',MinMaxScaler()),('bestmodel', model)])       
#     pip = Pipeline([('StandardScaler',StandardScaler()),('keras',KerasClassifier(model=base_model,epochs=300,
#                                                                              batch_size=100,verbose=0))])
    
    #Fit into the Pipeline
    pip.fit(X_train,y_train.values.ravel())
    results = sp.search(q='artist:' + artist + ' track:' + track, type='track')
    song_id = results['tracks']['items'][0]['id']
#     print('song_id: '+song_id)
    
    song_feature = get_songs_features(song_id)
    song_feature = np.array(song_feature[0][6:-2]).reshape(-1,1).T
    song_feature = pd.DataFrame(song_feature, columns = col_features)
    mood = pip.predict(song_feature)
    mood = 'Positive' if mood == 1 else 'Negative'
    return print(f"{track} by {artist} is a {mood} song")

In [21]:
check_list = [('Vampire Weekend','A-Punk'),('Linkin Park','Battle Symphony'),
              ('The Fratellis','Chelsea Dagger'),('Justin Bieber','Peaches'),
              ('MKTO','Classic'),('Pharrell Williams','Happy'),('Ed Sheeran','Photograph'),
              ('Sam Smith','I am not the only one')]

# Best model SVC, thrown into pipeline to fit and transform
model = LinearSVC(random_state=15)
for artist,track in check_list:
    get_song(artist,track,model)

A-Punk by Vampire Weekend is a Positive song
Battle Symphony by Linkin Park is a Positive song
Chelsea Dagger by The Fratellis is a Negative song
Peaches by Justin Bieber is a Negative song
Classic by MKTO is a Positive song
Happy by Pharrell Williams is a Positive song
Photograph by Ed Sheeran is a Negative song
I am not the only one by Sam Smith is a Negative song


In [22]:
# Importing models
# model = pickle.load(open('LSVC_best.pkl', 'rb'))