In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectFromModel

import lightgbm as lgbm
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

os.getcwd()

df=pd.read_csv('train_data.csv')
df_t=pd.read_csv('test_data.csv')
print(df_t.head())
df.head()



X_train=df.drop('label',axis=1).values
y_train=df['label'].values
print(X_train.shape)
print(y_train.shape)
X_test=df_t.drop('label',axis=1).values
y_test=df_t['label'].values
print(X_test.shape)
print(y_test.shape)



from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

clfRF = RandomForestClassifier(max_depth=2, random_state=0)
clfRF.fit(X_train, y_train)
preds = clfRF.predict(X_test)


print("RF Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, preds)))



import joblib

joblib.dump(clfRF, "models/pipe_RF.joblib")




from sklearn import tree

clfDT = tree.DecisionTreeClassifier()
clfDT = clfDT.fit(X_train, y_train)

preds = clfDT.predict(X_test)


print("DT Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, preds)))
joblib.dump(clfDT, "models/pipe_DT.joblib")



from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)


preds = knn.predict(X_test)


print("KNN Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, preds)))



import joblib

joblib.dump(clfRF, "models/pipe_KNN.joblib")



from sklearn.naive_bayes import GaussianNB

NB = GaussianNB()
y_pred = NB.fit(X_train, y_train)

preds = NB.predict(X_test)


print("NB Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, preds)))



import joblib

joblib.dump(clfRF, "models/pipe_NB.joblib")




params = {
    "cls__C": [0.5, 1, 5, 10],
    "cls__kernel": ['rbf', 'sigmoid','poly'],
}

pipe_svm = Pipeline([
    ('scale', StandardScaler()),
    ('var_tresh', VarianceThreshold(threshold=0.1)),
    ('feature_selection', SelectFromModel(lgbm.LGBMClassifier())),
    ('cls', SVC())
])

grid_svm = GridSearchCV(pipe_svm, params, scoring='accuracy', n_jobs=-1, cv=9,verbose=2)
grid_svm.fit(X_train, y_train)


preds = grid_svm.predict(X_test)

print("SVM Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, preds)))



import joblib

joblib.dump(grid_svm, "models/pipe_svm.joblib")



from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense,Dropout,Input
from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.activations import relu
from tensorflow.keras.callbacks import History 
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam


sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)
input_shape=X_train.shape[1]
y_train_nn=to_categorical(y_train,10,'int')
y_train_nn[:5]


hist = History()

inp=Input(shape=(input_shape,))
model = Dense(500,activation='relu')(inp)
model = Dropout(0.3)(model)
model = Dense(8000,activation='relu')(model)
model = Dropout(0.2)(model)
model = Dense(4000,activation='relu')(model)
model = Dropout(0.2)(model)
model = Dense(2000,activation='relu')(model)
model = Dropout(0.2)(model)
model = Dense(1000,activation='relu')(model)
model = Dense(500,activation='relu')(model)
model = Dense(10,activation='softmax')(model)

model = Model(inputs=inp,outputs=model)
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
lr=ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=3,verbose=1)
es=EarlyStopping(monitor='val_loss',patience=20,verbose=1)
#plot_model(model)


model.fit(X_train, y_train_nn, epochs = 20, validation_split = .055, callbacks = [hist,lr,es])


y_pred =  model.predict(X_test)
y_pred = np.round(y_pred[:,1])
print(accuracy_score(y_pred,y_test))

plt.plot(hist.history['accuracy'], color = 'red')
plt.plot(hist.history['val_accuracy'], color = 'blue')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()






   centroid_max  centroid_min  centroid_mean  centroid_std  centroid_kurtosis  \
0   4053.985673    925.934083    1727.645355    376.901924           6.096711   
1   5430.122461    372.728961    1450.533158    615.185827           4.766277   
2   4709.894987    356.735645    1493.679993    419.421447          11.343314   
3   3692.033659    261.951363     996.753301    428.061426           2.700672   
4   4484.855104    783.165978    1749.555484    599.338500           1.670321   

   centroid_skew   flux_max  flux_min  flux_mean  flux_std  ...  \
0       1.660034  10.981944       0.0   1.391008  1.481524  ...   
1       1.475144  17.791128       0.0   1.444008  1.996799  ...   
2       1.900807  14.719387       0.0   1.638258  1.950675  ...   
3       1.111419  14.619913       0.0   1.247379  1.750351  ...   
4       1.118021  12.954987       0.0   1.644384  1.958142  ...   

   mfcc_18_kurtosis  mfcc_18_skew  mfcc_19_max  mfcc_19_min  mfcc_19_mean  \
0          0.954234      0.303920

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   44.7s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:  1.9min finished


SVM Best score on test set (accuracy) = 0.9565


ModuleNotFoundError: No module named 'tensorflow'