# Classical Machine Learning 

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectFromModel

import lightgbm as lgbm
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [None]:
os.getcwd()
os.chdir("C:\\Users\\manuj\\OneDrive\\Desktop\\Data")


In [None]:
df=pd.read_csv('train.csv')
df_t=pd.read_csv('test.csv')
print(df_t.head())
df.head()

In [None]:
X_train=df.drop('label',axis=1).values
y_train=df['label'].values
print(X_train.shape)
print(y_train.shape)
X_test=df_t.drop('label',axis=1).values
y_test=df_t['label'].values
print(X_test.shape)
print(y_test.shape)

In [None]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,random_state=42)

In [None]:
def confusion_matrix(cm, classes,model_name,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(f"{title} of {model_name}")
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
from collections import OrderedDict
import itertools
genres = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 
          'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}
keys = OrderedDict(sorted(genres.items(), key=lambda t: t[1])).keys()

## Support Vector Machines

In [None]:
params = {
    "cls__C": [0.5, 1, 5, 10],
    "cls__kernel": ['rbf', 'sigmoid','poly'],
}

svm_pipe = Pipeline([
    ('scale', StandardScaler()),
    ('var_tresh', VarianceThreshold(threshold=0.1)),
    ('feature_selection', SelectFromModel(lgbm.LGBMClassifier())),
    ('cls', SVC())
])

grids_svm = GridSearchCV(pipe_svm, params, scoring='accuracy', n_jobs=-1, cv=9,verbose=2)
grids_svm.fit(X_train, y_train)

In [None]:
preds = grids_svm.predict(X_test)
print("Best score on validation set (accuracy) = {:.4f}".format(grids_svm.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, preds)))

In [None]:
y_pred = grids_svm.predict(X_test)
# y_pred = np.argmax(y_pred)
cm = confusion_matrix(y_pred, y_test)

plt.figure(figsize=(10,10))
confusion_matrix(cm, keys,"SVM Model", normalize=True)

In [None]:
grids_svm


In [None]:
pwd

In [None]:
import joblib

joblib.dump(grids_svm, "models/svm_pipe.joblib")

## Xtreme Gradient Boost -XGB-

In [None]:
params = {
    "cls__max_depth": [4, 6, 10, 20],
    'cls__booster':['gbtree','dart']
}

xbg_pipe = Pipeline([
    ('var_tresh', VarianceThreshold(threshold=0.1)),
    ('cls', XGBClassifier(objective='multi:softmax',num_class=10,verbosity=1))
])

grids_xgb = GridSearchCV(pipe_xbg, params, scoring='accuracy', n_jobs=-1, cv=3)
grids_xgb.fit(X_train, y_train)

In [None]:
preds1 = grids_xgb.predict(X_test)
print("Best score on validation set (accuracy) = {:.4f}".format(grids_xgb.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, preds1)))



In [None]:
y_pred = grids_xgb.predict(X_test)
# y_pred = np.argmax(y_pred, axis=1)
cm = confusion_matrix(y_pred, y_test)

plt.figure(figsize=(10,10))
confusion_matrix(cm, keys,"XGBOOST Model", normalize=True)

In [None]:
joblib.dump(grids_xgb, "models/xgb_pipe.joblib")



## Deep Neural Network

In [None]:
import tensorflow as tf

In [None]:
import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense,Dropout,Input
from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.activations import relu
from tensorflow.keras.callbacks import History 
from keras.utils.vis_utils import plot_model
from tensorflow.keras.optimizers import Adam

In [None]:
import tensorflow as tf

In [None]:
tf.__version__

In [None]:
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)
input_shape=X_train.shape[1]
y_train_nn=to_categorical(y_train,10,'int')
y_train_nn[:5]

In [None]:
hist = History()

In [None]:
inp=Input(shape=(input_shape,))
model = Dense(500,activation='relu')(inp)
model = Dropout(0.3)(model)
model = Dense(8000,activation='relu')(model)
model = Dropout(0.2)(model)
model = Dense(4000,activation='relu')(model)
model = Dropout(0.2)(model)
model = Dense(2000,activation='relu')(model)
model = Dropout(0.2)(model)
model = Dense(1000,activation='relu')(model)
model = Dense(500,activation='relu')(model)
model = Dense(100,activation='relu')(model)
model = Dense(100,activation='relu')(model)
model = Dense(50,activation='relu')(model)
model = Dense(10,activation='softmax')(model)

model = Model(inputs=inp,outputs=model)
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
lr=ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=3,verbose=1)
es=EarlyStopping(monitor='val_loss',patience=20,verbose=1)

In [None]:
model.summary()

In [None]:
model.fit(X_train, y_train_nn, epochs = 30,validation_split = 0.055, callbacks = [hist,lr,es])

In [None]:
y_pred =  model.predict(X_test)
y_pred = np.round(y_pred[:,1])
print(accuracy_score(y_pred,y_test))

plt.plot(hist.history['accuracy'], color = 'red')
plt.plot(hist.history['val_accuracy'], color = 'blue')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()

In [None]:
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred,axis=1)
cm = confusion_matrix(y_pred, y_test)
cm 

In [None]:
plt.figure(figsize=(10,10))
plot_confusion_matrix(cm, keys,"Deep Learning - ANN Model", normalize=True)

In [None]:
y_pred = grid_xgb.predict(X_test)
# y_pred = np.argmax(y_pred)
cm = confusion_matrix(y_pred, y_test)
cm 

In [None]:
plt.figure(figsize=(10,10))
plot_confusion_matrix(cm, keys,"XGBOOST Model", normalize=True)

In [None]:
y_pred = grid_svm.predict(X_test)
# y_pred = np.argmax(y_pred)
cm = confusion_matrix(y_pred, y_test)
cm 