In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from sklearn.linear_model import LogisticRegression, Perceptron, SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, ComplementNB, MultinomialNB, BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process import GaussianProcessClassifier

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [2]:
data = pd.read_csv("final_data.csv")

In [3]:
data.head()

Unnamed: 0,Title,Artist,Featuring,duration_ms,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,Hit
0,Boulevard of Broken Dreams,Green Day,0,262333,0.00552,0.496,0.682,2.9e-05,8,0.0589,-4.095,0.0294,167.06,0.474,0
1,In The End,Linkin Park,0,216933,0.0103,0.542,0.853,0.0,3,0.108,-6.407,0.0498,105.256,0.37,0
2,Seven Nation Army,The White Stripes,0,231733,0.00817,0.737,0.463,0.447,0,0.255,-7.828,0.0792,123.881,0.324,0
3,By The Way,Red Hot Chili Peppers,0,216933,0.0264,0.451,0.97,0.00355,0,0.102,-4.938,0.107,122.444,0.198,0
4,How You Remind Me,Nickelback,0,223826,0.000954,0.447,0.766,0.0,10,0.113,-5.065,0.0313,172.011,0.574,0


In [5]:
min_max=MinMaxScaler()
data[['duration_ms', 'acousticness', 'instrumentalness', 'key', 'loudness', 'tempo']] = min_max.fit_transform(data[['duration_ms', 'acousticness', 'instrumentalness', 'key', 'loudness', 'tempo']])
data.head()

Unnamed: 0,Title,Artist,Featuring,duration_ms,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,Hit
0,Boulevard of Broken Dreams,Green Day,0,0.140059,0.005541,0.496,0.682,2.9e-05,0.727273,0.0589,0.859242,0.0294,0.689425,0.474,0
1,In The End,Linkin Park,0,0.114658,0.01034,0.542,0.853,0.0,0.272727,0.108,0.801948,0.0498,0.434371,0.37,0
2,Seven Nation Army,The White Stripes,0,0.122938,0.008202,0.737,0.463,0.448345,0.0,0.255,0.766734,0.0792,0.511233,0.324,0
3,By The Way,Red Hot Chili Peppers,0,0.114658,0.026505,0.451,0.97,0.003561,0.0,0.102,0.838352,0.107,0.505303,0.198,0
4,How You Remind Me,Nickelback,0,0.118514,0.000957,0.447,0.766,0.0,0.909091,0.113,0.835204,0.0313,0.709856,0.574,0


In [6]:
data_scaled = data.copy()

In [7]:
data_scaled = data_scaled.drop(['Title', 'Artist'], axis=1)
data_scaled.head()

Unnamed: 0,Featuring,duration_ms,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,Hit
0,0,0.140059,0.005541,0.496,0.682,2.9e-05,0.727273,0.0589,0.859242,0.0294,0.689425,0.474,0
1,0,0.114658,0.01034,0.542,0.853,0.0,0.272727,0.108,0.801948,0.0498,0.434371,0.37,0
2,0,0.122938,0.008202,0.737,0.463,0.448345,0.0,0.255,0.766734,0.0792,0.511233,0.324,0
3,0,0.114658,0.026505,0.451,0.97,0.003561,0.0,0.102,0.838352,0.107,0.505303,0.198,0
4,0,0.118514,0.000957,0.447,0.766,0.0,0.909091,0.113,0.835204,0.0313,0.709856,0.574,0


In [8]:
y = data_scaled["Hit"]
X = data_scaled.drop("Hit", axis=1)

In [9]:
X.head()

Unnamed: 0,Featuring,duration_ms,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence
0,0,0.140059,0.005541,0.496,0.682,2.9e-05,0.727273,0.0589,0.859242,0.0294,0.689425,0.474
1,0,0.114658,0.01034,0.542,0.853,0.0,0.272727,0.108,0.801948,0.0498,0.434371,0.37
2,0,0.122938,0.008202,0.737,0.463,0.448345,0.0,0.255,0.766734,0.0792,0.511233,0.324
3,0,0.114658,0.026505,0.451,0.97,0.003561,0.0,0.102,0.838352,0.107,0.505303,0.198
4,0,0.118514,0.000957,0.447,0.766,0.0,0.909091,0.113,0.835204,0.0313,0.709856,0.574


In [10]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Hit, dtype: int64

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [75]:
models = []

models.append(('Support Vector Classifier:', SVC(gamma='auto')))
models.append(('Linear Support Vector Classifier:', LinearSVC(random_state=0, tol=1e-5)))

models.append(('Logistic Regression:', LogisticRegression()))
models.append(('Stochastic Gradient Descent Classifier:', SGDClassifier(max_iter=1000, tol=1e-3)))
models.append(('Perceptron:', Perceptron(alpha=0.00001)))

models.append(('Linear Discriminant Analysis:', LinearDiscriminantAnalysis()))
models.append(('Quadratic Discriminant Analysis:', QuadraticDiscriminantAnalysis()))

models.append(('K Nearest Neighbours:', KNeighborsClassifier(n_neighbors=28)))

models.append(('Gaussian Naive Bayes:', GaussianNB()))
models.append(('Complement Naive Bayes:', ComplementNB()))
models.append(('Multinomial Naive Bayes:', MultinomialNB()))
models.append(('Bernoulli Naive Bayes:', BernoulliNB()))

models.append(('Decision Tree Classifier:', DecisionTreeClassifier(max_depth = 9)))

models.append(('Random Forest Classifier:', RandomForestClassifier(n_estimators=100)))
models.append(('AdaBoost Classifier Classifier:', AdaBoostClassifier()))
models.append(('Gradient Boosting Classifier:', GradientBoostingClassifier(n_estimators=60, learning_rate=1, max_depth=1, random_state=0)))

models.append(('Multi Layer Perceptron:', MLPClassifier(hidden_layer_sizes=(10, 10), activation='relu',solver='lbfgs' ,alpha=1, max_iter=1000)))


# USE GPU TO TRAIN THIS MODEL
# models.append(('Gaussian Process Classifier:', GaussianProcessClassifier(1.0 * RBF(1.0))))

In [76]:
for model in models:
    clf = model[1].fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    print(model[0], accuracy_score(y_pred, y_test) * 100)
    # conf_matrix = confusion_matrix(y_test, y_pred)
    # print(conf_matrix)

    # print(classification_report(y_test, y_pred))

Multi Layer Perceptron: 78.37619496431938


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
