In [85]:
import scipy as sp
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

#Importing classifiers
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [2]:
df = pd.read_csv("dataset/leaf.csv")

In [3]:
df.head()

Unnamed: 0,1,1.1,0.72694,1.4742,0.32396,0.98535,1.2,0.83592,0.0046566,0.0039465,0.04779,0.12795,0.016108,0.0052323,0.00027477,1.1756
0,1,2,0.74173,1.5257,0.36116,0.98152,0.99825,0.79867,0.005242,0.005002,0.02416,0.090476,0.008119,0.002708,7.5e-05,0.69659
1,1,3,0.76722,1.5725,0.38998,0.97755,1.0,0.80812,0.007457,0.010121,0.011897,0.057445,0.003289,0.000921,3.8e-05,0.44348
2,1,4,0.73797,1.4597,0.35376,0.97566,1.0,0.81697,0.006877,0.008607,0.01595,0.065491,0.004271,0.001154,6.6e-05,0.58785
3,1,5,0.82301,1.7707,0.44462,0.97698,1.0,0.75493,0.007428,0.010042,0.007938,0.045339,0.002051,0.00056,2.4e-05,0.34214
4,1,6,0.72997,1.4892,0.34284,0.98755,1.0,0.84482,0.004945,0.004451,0.010487,0.058528,0.003414,0.001125,2.5e-05,0.34068


In [4]:
columns = ['Species-(Class)', 'Specimen-Number', 'Eccentricity', 'Aspect-Ratio', 'Elongation', 'Solidity', 'Stochastic-Convexity', 'Isoperimetric-Factor', 'Maximal-Indentation-Depth', 'Lobedness', 'Average-Intensity', 'Average-Contrast', 'Smoothness', 'Third-moment', 'Uniformity', 'Entropy']

In [5]:
leaf_dataset = pd.read_csv('dataset/leaf.csv', names=columns)

In [6]:
leaf_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 340 entries, 0 to 339
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Species-(Class)            340 non-null    int64  
 1   Specimen-Number            340 non-null    int64  
 2   Eccentricity               340 non-null    float64
 3   Aspect-Ratio               340 non-null    float64
 4   Elongation                 340 non-null    float64
 5   Solidity                   340 non-null    float64
 6   Stochastic-Convexity       340 non-null    float64
 7   Isoperimetric-Factor       340 non-null    float64
 8   Maximal-Indentation-Depth  340 non-null    float64
 9   Lobedness                  340 non-null    float64
 10  Average-Intensity          340 non-null    float64
 11  Average-Contrast           340 non-null    float64
 12  Smoothness                 340 non-null    float64
 13  Third-moment               340 non-null    float64

In [7]:
leaf_dataset.head()

Unnamed: 0,Species-(Class),Specimen-Number,Eccentricity,Aspect-Ratio,Elongation,Solidity,Stochastic-Convexity,Isoperimetric-Factor,Maximal-Indentation-Depth,Lobedness,Average-Intensity,Average-Contrast,Smoothness,Third-moment,Uniformity,Entropy
0,1,1,0.72694,1.4742,0.32396,0.98535,1.0,0.83592,0.004657,0.003947,0.04779,0.12795,0.016108,0.005232,0.000275,1.1756
1,1,2,0.74173,1.5257,0.36116,0.98152,0.99825,0.79867,0.005242,0.005002,0.02416,0.090476,0.008119,0.002708,7.5e-05,0.69659
2,1,3,0.76722,1.5725,0.38998,0.97755,1.0,0.80812,0.007457,0.010121,0.011897,0.057445,0.003289,0.000921,3.8e-05,0.44348
3,1,4,0.73797,1.4597,0.35376,0.97566,1.0,0.81697,0.006877,0.008607,0.01595,0.065491,0.004271,0.001154,6.6e-05,0.58785
4,1,5,0.82301,1.7707,0.44462,0.97698,1.0,0.75493,0.007428,0.010042,0.007938,0.045339,0.002051,0.00056,2.4e-05,0.34214


In [8]:
#leaf_dataset[leaf_dataset.isnull().any(axis=1)].head()

In [40]:
X = leaf_dataset.iloc[:, 2:16] #The independent variable also known as predictors
y = leaf_dataset.iloc[:, 0] #The dependent variable also known as response variables

In [41]:
X.head()

Unnamed: 0,Eccentricity,Aspect-Ratio,Elongation,Solidity,Stochastic-Convexity,Isoperimetric-Factor,Maximal-Indentation-Depth,Lobedness,Average-Intensity,Average-Contrast,Smoothness,Third-moment,Uniformity,Entropy
0,0.72694,1.4742,0.32396,0.98535,1.0,0.83592,0.004657,0.003947,0.04779,0.12795,0.016108,0.005232,0.000275,1.1756
1,0.74173,1.5257,0.36116,0.98152,0.99825,0.79867,0.005242,0.005002,0.02416,0.090476,0.008119,0.002708,7.5e-05,0.69659
2,0.76722,1.5725,0.38998,0.97755,1.0,0.80812,0.007457,0.010121,0.011897,0.057445,0.003289,0.000921,3.8e-05,0.44348
3,0.73797,1.4597,0.35376,0.97566,1.0,0.81697,0.006877,0.008607,0.01595,0.065491,0.004271,0.001154,6.6e-05,0.58785
4,0.82301,1.7707,0.44462,0.97698,1.0,0.75493,0.007428,0.010042,0.007938,0.045339,0.002051,0.00056,2.4e-05,0.34214


In [42]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: Species-(Class), dtype: int64

In [43]:
X = leaf_dataset.iloc[:, 2:16].values
y = leaf_dataset.iloc[:, 0].values

In [44]:
print(X)

[[7.2694e-01 1.4742e+00 3.2396e-01 ... 5.2323e-03 2.7477e-04 1.1756e+00]
 [7.4173e-01 1.5257e+00 3.6116e-01 ... 2.7080e-03 7.4846e-05 6.9659e-01]
 [7.6722e-01 1.5725e+00 3.8998e-01 ... 9.2068e-04 3.7886e-05 4.4348e-01]
 ...
 [3.5344e-01 1.0329e+00 7.8147e-01 ... 1.6123e-02 4.5288e-04 1.6935e+00]
 [5.9988e-01 1.1427e+00 7.1532e-01 ... 6.1900e-03 2.6454e-04 1.1526e+00]
 [4.7195e-01 1.0901e+00 8.5409e-01 ... 1.3487e-02 3.2855e-04 1.5623e+00]]


In [45]:
print(y)

[ 1  1  1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  2  3  3
  3  3  3  3  3  3  3  3  4  4  4  4  4  4  4  4  5  5  5  5  5  5  5  5
  5  5  5  5  6  6  6  6  6  6  6  6  7  7  7  7  7  7  7  7  7  7  8  8
  8  8  8  8  8  8  8  8  8  9  9  9  9  9  9  9  9  9  9  9  9  9  9 10
 10 10 10 10 10 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11 11 11 11
 11 11 11 11 12 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13
 13 13 13 13 13 14 14 14 14 14 14 14 14 14 14 14 14 15 15 15 15 15 15 15
 15 15 15 22 22 22 22 22 22 22 22 22 22 22 22 23 23 23 23 23 23 23 23 23
 23 23 24 24 24 24 24 24 24 24 24 24 24 24 24 25 25 25 25 25 25 25 25 25
 26 26 26 26 26 26 26 26 26 26 26 26 27 27 27 27 27 27 27 27 27 27 27 28
 28 28 28 28 28 28 28 28 28 28 28 29 29 29 29 29 29 29 29 29 29 29 29 30
 30 30 30 30 30 30 30 30 30 30 30 31 31 31 31 31 31 31 31 31 31 31 32 32
 32 32 32 32 32 32 32 32 32 33 33 33 33 33 33 33 33 33 33 33 34 34 34 34
 34 34 34 34 34 34 34 35 35 35 35 35 35 35 35 35 35

In [46]:
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X[:, :]) #Selecting and filling the missing values from all rows and all the coloumns in X array
X[:, :] = imputer.transform(X[:, :])

In [47]:
print(X)

[[7.2694e-01 1.4742e+00 3.2396e-01 ... 5.2323e-03 2.7477e-04 1.1756e+00]
 [7.4173e-01 1.5257e+00 3.6116e-01 ... 2.7080e-03 7.4846e-05 6.9659e-01]
 [7.6722e-01 1.5725e+00 3.8998e-01 ... 9.2068e-04 3.7886e-05 4.4348e-01]
 ...
 [3.5344e-01 1.0329e+00 7.8147e-01 ... 1.6123e-02 4.5288e-04 1.6935e+00]
 [5.9988e-01 1.1427e+00 7.1532e-01 ... 6.1900e-03 2.6454e-04 1.1526e+00]
 [4.7195e-01 1.0901e+00 8.5409e-01 ... 1.3487e-02 3.2855e-04 1.5623e+00]]


In [17]:
#label_encode = LabelEncoder()
#y = label_encode.fit_transform(y)

In [48]:
#print(y)

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [50]:
print(X_train)

[[4.8967e-01 1.1232e+00 4.6175e-01 ... 1.0102e-02 6.8131e-04 2.4808e+00]
 [8.8661e-01 2.2553e+00 5.6472e-01 ... 2.0893e-03 1.6304e-04 6.0779e-01]
 [9.3396e-01 2.9367e+00 6.6689e-01 ... 1.7096e-03 4.8527e-05 4.6762e-01]
 ...
 [8.1957e-01 1.5600e+00 4.9513e-01 ... 1.5521e-02 7.1910e-04 1.5452e+00]
 [8.7259e-01 2.1845e+00 5.6922e-01 ... 6.2705e-03 3.1494e-04 1.4969e+00]
 [8.8965e-01 2.3863e+00 5.9051e-01 ... 1.3364e-02 2.6357e-04 1.0734e+00]]


In [51]:
print(X_test)

[[3.6462e-01 1.0811e+00 6.7755e-01 4.9042e-01 6.8772e-01 1.4118e-01
  1.2430e-01 2.8118e+00 3.7866e-02 1.1692e-01 1.3485e-02 4.6475e-03
  1.7690e-04 9.2290e-01]
 [5.8232e-01 1.3430e+00 2.9271e-01 9.5055e-01 9.9825e-01 7.1499e-01
  2.6876e-02 1.3146e-01 6.8134e-02 1.3204e-01 1.7136e-02 3.6798e-03
  1.4697e-03 1.6315e+00]
 [5.8637e-01 1.1419e+00 3.0339e-01 9.3305e-01 9.2105e-01 5.7323e-01
  4.1282e-02 3.1016e-01 2.2886e-02 9.3704e-02 8.7041e-03 3.2291e-03
  1.0441e-04 5.4481e-01]
 [9.1298e-01 2.4550e+00 6.1097e-01 9.6979e-01 9.9825e-01 5.6210e-01
  9.5958e-03 1.6758e-02 9.0447e-02 1.8710e-01 3.3823e-02 1.1500e-02
  5.6400e-04 1.6879e+00]
 [3.2200e-01 1.0698e+00 1.3626e-01 9.8017e-01 9.9825e-01 8.1953e-01
  1.2913e-02 3.0347e-02 7.6267e-02 1.8383e-01 3.2690e-02 1.4159e-02
  2.6309e-04 1.4955e+00]
 [5.5112e-01 1.2821e+00 3.9683e-01 8.8375e-01 9.9649e-01 4.9492e-01
  3.0576e-02 1.7015e-01 6.2015e-02 1.4621e-01 2.0930e-02 6.3685e-03
  6.7747e-04 1.2659e+00]
 [9.4560e-01 3.1174e+00 7.0912e-01

In [22]:
print(y_train)

[18  1 20  9  2  2  4 26  9 27  0  6 17 12 10 12 25 17 18 21 22  0  6  7
  4 12  5 26 23 22 11 25 16  7 13 11  2 15  5 25 29 18  0 23  4 12 27  1
  9  1  8 17 19  7 17  3 21 20 17 17  5 12 28 16 11 13 10 17  9 16 24 29
  2  6 23 14 15  6 24  3 27 14  7 13 25  0 11 22  8 28 13 17 26 19 27  9
 20 10 27  9 20 24  8 24 12  2  6 19 21  6 11 21  4  8 18 12  4 26 11 22
  9 21 21 16  7 19 21  6 27  2 26 23  5  3 28 24  2  4 10 15 18 22 27 20
 14 14  1 16  8 26 15 22  8  5 28  9 18 19 13 29 24 10 23 10 11 14  7  0
  3 15 20 14  5  1  4 14 17 14 13 26 19 22 20 28  1 20  7  4  8  9 17 24
 19  4 22 11 19 23  5  6  0 27 20 15 25 29 21 10  0 29  8 15  0 25 23  2
 18 25 25 17 11 14  1 29 23 13  4 10 15 10  0 28 24 24  1  1 13  3 15 20
 12  1  8  9 29 22 28 16  7 20 12 22 17  3  8  4  3 10 27 13  7 15 22 26
 21 10 12  2 25 19  9 26]


In [52]:
print(y_test)

[11 24 23 14 26  9 22  8 26  4  1 15 24 34 12 12  1  8 34 35  4  8 10 29
 27 25  3 10 12 15 31 29 24 23  7 29 14  6 32 14 13  9 28 28  8  2  3 10
 15 13 34 12  8 15  5 31  5 14 23 27 11  4  9 26 26 35 29 10]


In [53]:
## I am using feature scaling after the splitting to avoid the information leakage

In [54]:
sc = StandardScaler()
X_train[:, :] = sc.fit_transform(X_train[:, :])
X_test[:, :] = sc.transform(X_test[:, :])

In [55]:
print(X_train)

[[-1.12243825 -0.51348853 -0.28031049 ...  0.77417827  0.71307594
   2.29856706]
 [ 0.81261453 -0.03696943  0.26113291 ... -0.71682103 -0.53697284
  -0.9519075 ]
 [ 1.04344224  0.24984279  0.79836971 ... -0.78747542 -0.81317412
  -1.19516248]
 ...
 [ 0.48579955 -0.32963239 -0.10478965 ...  1.78254314  0.80422408
   0.67490035]
 [ 0.74426808 -0.06677029  0.2847951  ...  0.06121462 -0.17059545
   0.59107916]
 [ 0.82743431  0.01817058  0.39674353 ...  1.38116964 -0.29449806
  -0.14387473]]


In [56]:
print(X_test)

[[-1.73204764e+00 -5.31209099e-01  8.54422797e-01 -3.46518623e+00
  -2.06766068e+00 -1.78036019e+00  2.15235330e+00  2.04695735e+00
  -3.76681777e-01 -1.46254895e-01 -3.05801851e-01 -2.40792422e-01
  -5.03543015e-01 -4.05056686e-01]
 [-6.70776437e-01 -4.20971176e-01 -1.16916738e+00  4.17510546e-01
   4.76860665e-01  8.35042320e-01 -2.82492216e-01 -3.85963382e-01
   4.74417118e-01  1.44378790e-01 -4.04288965e-02 -4.20861564e-01
   2.61464455e+00  8.24667808e-01]
 [-6.51032990e-01 -5.05617401e-01 -1.11300912e+00  2.69840988e-01
  -1.55725713e-01  1.88906039e-01  7.75462189e-02 -2.23758984e-01
  -7.97900934e-01 -5.92508309e-01 -6.53301594e-01 -5.04727600e-01
  -6.78386313e-01 -1.06120477e+00]
 [ 9.41166311e-01  4.70875111e-02  5.04327604e-01  5.79862677e-01
   4.76860665e-01  1.38175954e-01 -7.14363392e-01 -4.90077366e-01
   1.10183121e+00  1.20273135e+00  1.17246579e+00  1.03431743e+00
   4.30128400e-01  9.22545965e-01]
 [-1.93981695e+00 -5.35965451e-01 -1.99182272e+00  6.67451821e-01
  

In [57]:
dtclf = DecisionTreeClassifier() #default criterion is Gini

In [28]:
#dtclf = DecisionTreeClassifier(criterion='entropy', random_state=0)

In [58]:
dtclf.fit(X_train, y_train)

In [59]:
y_pred = dtclf.predict(X_test)

In [60]:
cm = confusion_matrix(y_test, y_pred)

In [61]:
print(cm)

[[2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [62]:
acc_score = accuracy_score(y_test, y_pred)

In [63]:
print(acc_score)

0.5882352941176471


In [67]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.67      1.00      0.80         2
           2       0.00      0.00      0.00         1
           3       0.50      0.50      0.50         2
           4       0.00      0.00      0.00         3
           5       0.50      0.50      0.50         2
           6       1.00      1.00      1.00         1
           7       0.00      0.00      0.00         1
           8       1.00      0.80      0.89         5
           9       1.00      0.67      0.80         3
          10       0.67      1.00      0.80         4
          11       1.00      1.00      1.00         2
          12       0.75      0.75      0.75         4
          13       0.33      0.50      0.40         2
          14       0.80      1.00      0.89         4
          15       1.00      1.00      1.00         4
          22       0.00      0.00      0.00         1
          23       0.67      0.67      0.67         3
          24       0.33    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [71]:
knnclf = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2) #default number of neighbors = 5

In [72]:
knnclf.fit(X_train, y_train)

In [79]:
y_pred = knnclf.predict(X_test)

In [80]:
cm = confusion_matrix(y_test, y_pred)

In [81]:
print(cm)

[[2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 

In [82]:
acc_score = accuracy_score(y_test, y_pred)

In [83]:
print(acc_score)

0.6764705882352942


In [84]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.67      1.00      0.80         2
           2       0.50      1.00      0.67         1
           3       0.50      1.00      0.67         2
           4       0.00      0.00      0.00         3
           5       0.67      1.00      0.80         2
           6       1.00      1.00      1.00         1
           7       0.50      1.00      0.67         1
           8       1.00      1.00      1.00         5
           9       0.60      1.00      0.75         3
          10       1.00      0.25      0.40         4
          11       1.00      1.00      1.00         2
          12       0.67      0.50      0.57         4
          13       0.67      1.00      0.80         2
          14       1.00      0.75      0.86         4
          15       1.00      1.00      1.00         4
          22       0.00      0.00      0.00         1
          23       1.00      0.67      0.80         3
          24       0.33    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [86]:
nbclf = GaussianNB()

In [88]:
nbclf.fit(X_train, y_train)

In [89]:
y_pred = nbclf.predict(X_test)

In [90]:
cm = confusion_matrix(y_test, y_pred)

In [91]:
print(cm)

[[2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 

In [92]:
acc_score = accuracy_score(y_test, y_pred)

In [93]:
print(acc_score)

0.6911764705882353


In [94]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.67      1.00      0.80         2
           2       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         2
           4       0.50      0.33      0.40         3
           5       1.00      0.50      0.67         2
           6       1.00      1.00      1.00         1
           7       0.00      0.00      0.00         1
           8       1.00      0.80      0.89         5
           9       0.60      1.00      0.75         3
          10       0.80      1.00      0.89         4
          11       1.00      1.00      1.00         2
          12       0.50      0.25      0.33         4
          13       1.00      1.00      1.00         2
          14       0.75      0.75      0.75         4
          15       1.00      1.00      1.00         4
          22       0.25      1.00      0.40         1
          23       1.00      1.00      1.00         3
          24       0.33    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
