In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
import sklearn.neural_network
from sklearn.metrics import confusion_matrix, classification_report
import itertools
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler 


In [4]:
data=pd.read_csv("../project_train.csv")
features=data.drop("Label", axis=1)
print(data.head())
targets=data["Label"]
X_train, X_test, Y_train, Y_test=train_test_split(features, targets, test_size=0.3, random_state=20)

exclude=['mode', 'key']
columns_to_standardize=X_train.drop(exclude, axis=1)
excluded_cols=X_train[exclude]
scaler = StandardScaler()
scaled_columns = pd.DataFrame(scaler.fit_transform(columns_to_standardize), columns=columns_to_standardize.columns)

X_train_scaled = pd.concat([excluded_cols.reset_index(drop=True), scaled_columns], axis=1)

exclude=['mode', 'key']
columns_to_standardize=X_test.drop(exclude, axis=1)
excluded_cols=X_test[exclude]
X_test_scaled=scaler.transform(columns_to_standardize)
X_test_scaled = pd.DataFrame(scaler.transform(columns_to_standardize), columns=columns_to_standardize.columns)
X_test_scaled = pd.concat([excluded_cols.reset_index(drop=True), X_test_scaled], axis=1)


X_train_scaled.head()



   danceability  energy  key  loudness  mode  speechiness  acousticness  \
0         0.545   0.884    5    -4.807     0        0.367      0.290000   
1         0.795   0.545    7    -8.153     1        0.343      0.003960   
2         0.489   0.871    5    -5.825     1        0.386      0.002850   
3         0.539   0.931    4    -1.803     0        0.262      0.000713   
4         0.918   0.734   11    -2.832     0        0.269      0.029400   

   instrumentalness  liveness  valence    tempo  Label  
0          0.000000     0.370    0.641   86.049      1  
1          0.000000     0.273    0.809   91.967      1  
2          0.000004     0.130    0.341  117.431      1  
3          0.000000     0.204    0.685   85.571      0  
4          0.000008     0.191    0.608   97.044      1  


Unnamed: 0,mode,key,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,1,6,-0.941674,1.113544,0.447007,0.590914,-0.446703,1.207821,1.799241,-1.147204,1.090975
1,1,8,2.153748,0.028723,0.751596,-0.528547,-0.720671,-0.544607,0.343606,1.847869,0.286133
2,1,6,-0.460163,1.061732,0.509547,-0.481518,-0.635472,-0.301691,-0.516541,0.566433,0.441445
3,1,0,-0.396668,0.553323,0.742662,-0.438688,-0.866948,-0.544149,-0.701804,0.145606,0.366534
4,1,6,-1.206239,-1.752325,-1.631097,-0.642761,1.725877,1.431852,0.089125,-1.617696,-1.172393


In [5]:
layer_sizes=[10, 25, 50,75]
max_layers=4
hidden_layer_sizes = [tuple(layer_size) for n in range(1, max_layers + 1) for layer_size in itertools.product(layer_sizes, repeat=n)]

params={'hidden_layer_sizes' : hidden_layer_sizes, 'alpha' : [0, 0.0001, 0.001],
        'learning_rate_init': [0.00001, 0.0001, 0.001],  'max_iter': [10000]  }


#neural_network=GridSearchCV(sklearn.neural_network.MLPClassifier(), params, n_jobs=-1)
neural_network=sklearn.neural_network.MLPClassifier(alpha=0.0001, hidden_layer_sizes=(25, 10, 75, 10), learning_rate_init=0.0001, max_iter=15000)
neural_network.fit(X_train_scaled, Y_train)

y_pred=neural_network.predict(X_test_scaled)
Y_test.reset_index(drop=True)


con_matrix=confusion_matrix(Y_test, y_pred)
report = classification_report(Y_test, y_pred)


#{'alpha': 0.0001, 'hidden_layer_sizes': (25,), 'learning_rate_init': 0.001, 'max_iter': 10000} NOT STANDARDIZED
#{'alpha': 0.0001, 'hidden_layer_sizes': (25, 10, 75, 10), 'learning_rate_init': 0.0001, 'max_iter': 10000} STANDARDIZED


In [30]:
print(con_matrix)
print(report)


[[77 12]
 [15 48]]
              precision    recall  f1-score   support

           0       0.84      0.87      0.85        89
           1       0.80      0.76      0.78        63

    accuracy                           0.82       152
   macro avg       0.82      0.81      0.82       152
weighted avg       0.82      0.82      0.82       152



In [7]:
from sklearn.svm import SVC 
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis, LinearDiscriminantAnalysis

#svm_params={'C':[1,10,100,1000],'gamma':[1,0.1,0.001,0.0001], 'kernel':['linear','rbf'], 'probability': [True]}
#svm_model=GridSearchCV(SVC(),svm_params, n_jobs=-1) #{'C': 10, 'gamma': 1, 'kernel': 'linear', 'probability': True}
svm_model=SVC(C=1000,kernel='linear', gamma=1, probability=True)
svm_model.fit(X_train_scaled,Y_train)
#print(svm_model.best_params_)

#rfc_params={'n_estimators':[100, 200, 400, 800], 'criterion':["gini", "entropy"]}
#rfc_model=GridSearchCV(RandomForestClassifier(), rfc_params, n_jobs=-1)
rfc_model=RandomForestClassifier(criterion='log_loss',max_depth=30, max_features=None, n_estimators=100)
rfc_model.fit(X_train_scaled,Y_train)


meta_model=LinearDiscriminantAnalysis()

stacked_model = StackingClassifier(
    estimators=[('svm', svm_model), ('ann', neural_network), ('rf', rfc_model)],
    final_estimator=meta_model,
    n_jobs=-1
)

stacked_model.fit(X_train_scaled,Y_train)
accuracy = stacked_model.score(X_test_scaled, Y_test)
print(f"Accuracy: {accuracy}")


Accuracy: 0.8486842105263158


In [9]:
from sklearn.ensemble import RandomForestClassifier
rfc_params={'n_estimators':[100, 200, 400, 800], 'criterion':["gini", "entropy"]}
rfc_model=GridSearchCV(RandomForestClassifier(), rfc_params, n_jobs=-1)
rfc_model.fit(X_train_scaled,Y_train)
print(rfc_model.best_params_)

{'criterion': 'gini', 'n_estimators': 800}


In [10]:
from sklearn.svm import SVC 
from sklearn.ensemble import RandomForestClassifier
svm_params={'C':[1,10,100,1000],'gamma':[1,0.1,0.001,0.0001], 'kernel':['linear','rbf'], 'probability': [True]}
svm_model=GridSearchCV(SVC(),svm_params, n_jobs=-1)
svm_model.fit(X_train_scaled,Y_train)


In [23]:
from sklearn.neighbors import KNeighborsClassifier

knn_params={'n_neighbors': [3,5,7], 'weights':['uniform','distance'], 'p':[1,2,3]}
knn_model=GridSearchCV(KNeighborsClassifier(), knn_params, n_jobs=-1)
knn_model.fit(X_train_scaled, Y_train)
print(knn_model.score(X_test_scaled,Y_test))


{'n_neighbors': 3, 'p': 1, 'weights': 'distance'}
