In [1]:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [9]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

In [12]:
# load dataset
dataframe = pandas.read_pickle("D:/AI/preprocessed_train_data_B.pkl")
dataset = dataframe.values
X = dataset[:,0:32].astype(float)
Y = dataset[:,24:32]

In [13]:
Y

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]])

In [14]:
# define baseline model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(8, input_dim=32, activation='relu'))
    model.add(Dense(8, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [15]:
estimator = KerasClassifier(build_fn=baseline_model, epochs=20, batch_size=100, verbose=0)

In [16]:
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)

In [17]:
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 9.86% (7.81%)


In [19]:
from sklearn.model_selection import train_test_split
import pandas as pd

In [21]:
dataframe = pandas.read_pickle("D:/AI/preprocessed_train_data_B.pkl")
dataset = dataframe.values
X1 = dataset[:,0:32].astype(float)
Y1 = dataset[:,24:32]

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X1, Y1, test_size=0.2, train_size=0.8)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(11606, 32) (11606, 8)
(2902, 32) (2902, 8)


In [23]:
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.naive_bayes import GaussianNB

# initialize binary relevance multi-label classifier
# with a gaussian naive bayes base classifier
classifier = BinaryRelevance(GaussianNB())

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

In [24]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,predictions)

0.02653342522398346

In [26]:
from sklearn.metrics import accuracy_score,recall_score,confusion_matrix, classification_report
print(classification_report(y_test, predictions))

             precision    recall  f1-score   support

          0       0.27      0.30      0.28       218
          1       0.48      0.28      0.35       506
          2       0.16      0.35      0.22        93
          3       0.37      0.96      0.54       336
          4       0.07      0.91      0.14        97
          5       0.29      0.98      0.45       443
          6       0.22      0.54      0.32       319
          7       0.42      0.99      0.59       890

avg / total       0.35      0.74      0.44      2902



In [27]:
from sklearn.neighbors import KNeighborsClassifier

In [28]:
knn=KNeighborsClassifier()

In [29]:
knn.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [30]:
knn.score(X_test,y_test)

0.7580978635423845

In [31]:
predictions = knn.predict(X_test)

In [32]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,predictions)

0.7580978635423845

In [33]:
from sklearn.metrics import accuracy_score,recall_score,confusion_matrix, classification_report
print(classification_report(y_test, predictions))

             precision    recall  f1-score   support

          0       0.80      0.66      0.72       218
          1       0.66      0.59      0.63       506
          2       0.61      0.22      0.32        93
          3       0.81      0.76      0.79       336
          4       0.41      0.16      0.24        97
          5       0.93      0.95      0.94       443
          6       0.76      0.56      0.65       319
          7       0.99      0.97      0.98       890

avg / total       0.83      0.76      0.79      2902

