In [1]:
import scipy as sp
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

#Importing classifiers
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/leaf-classification/dataset/leaf.csv")

In [4]:
df.head()

Unnamed: 0,1,1.1,0.72694,1.4742,0.32396,0.98535,1.2,0.83592,0.0046566,0.0039465,0.04779,0.12795,0.016108,0.0052323,0.00027477,1.1756
0,1,2,0.74173,1.5257,0.36116,0.98152,0.99825,0.79867,0.005242,0.005002,0.02416,0.090476,0.008119,0.002708,7.5e-05,0.69659
1,1,3,0.76722,1.5725,0.38998,0.97755,1.0,0.80812,0.007457,0.010121,0.011897,0.057445,0.003289,0.000921,3.8e-05,0.44348
2,1,4,0.73797,1.4597,0.35376,0.97566,1.0,0.81697,0.006877,0.008607,0.01595,0.065491,0.004271,0.001154,6.6e-05,0.58785
3,1,5,0.82301,1.7707,0.44462,0.97698,1.0,0.75493,0.007428,0.010042,0.007938,0.045339,0.002051,0.00056,2.4e-05,0.34214
4,1,6,0.72997,1.4892,0.34284,0.98755,1.0,0.84482,0.004945,0.004451,0.010487,0.058528,0.003414,0.001125,2.5e-05,0.34068


In [5]:
columns = ['Species-(Class)', 'Specimen-Number', 'Eccentricity', 'Aspect-Ratio', 'Elongation', 'Solidity', 'Stochastic-Convexity', 'Isoperimetric-Factor', 'Maximal-Indentation-Depth', 'Lobedness', 'Average-Intensity', 'Average-Contrast', 'Smoothness', 'Third-moment', 'Uniformity', 'Entropy']

In [6]:
leaf_dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/leaf-classification/dataset/leaf.csv', names=columns)

In [7]:
print(leaf_dataset.columns)

Index(['Species-(Class)', 'Specimen-Number', 'Eccentricity', 'Aspect-Ratio',
       'Elongation', 'Solidity', 'Stochastic-Convexity',
       'Isoperimetric-Factor', 'Maximal-Indentation-Depth', 'Lobedness',
       'Average-Intensity', 'Average-Contrast', 'Smoothness', 'Third-moment',
       'Uniformity', 'Entropy'],
      dtype='object')


In [8]:
leaf_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 340 entries, 0 to 339
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Species-(Class)            340 non-null    int64  
 1   Specimen-Number            340 non-null    int64  
 2   Eccentricity               340 non-null    float64
 3   Aspect-Ratio               340 non-null    float64
 4   Elongation                 340 non-null    float64
 5   Solidity                   340 non-null    float64
 6   Stochastic-Convexity       340 non-null    float64
 7   Isoperimetric-Factor       340 non-null    float64
 8   Maximal-Indentation-Depth  340 non-null    float64
 9   Lobedness                  340 non-null    float64
 10  Average-Intensity          340 non-null    float64
 11  Average-Contrast           340 non-null    float64
 12  Smoothness                 340 non-null    float64
 13  Third-moment               340 non-null    float64

In [9]:
leaf_dataset.head()

Unnamed: 0,Species-(Class),Specimen-Number,Eccentricity,Aspect-Ratio,Elongation,Solidity,Stochastic-Convexity,Isoperimetric-Factor,Maximal-Indentation-Depth,Lobedness,Average-Intensity,Average-Contrast,Smoothness,Third-moment,Uniformity,Entropy
0,1,1,0.72694,1.4742,0.32396,0.98535,1.0,0.83592,0.004657,0.003947,0.04779,0.12795,0.016108,0.005232,0.000275,1.1756
1,1,2,0.74173,1.5257,0.36116,0.98152,0.99825,0.79867,0.005242,0.005002,0.02416,0.090476,0.008119,0.002708,7.5e-05,0.69659
2,1,3,0.76722,1.5725,0.38998,0.97755,1.0,0.80812,0.007457,0.010121,0.011897,0.057445,0.003289,0.000921,3.8e-05,0.44348
3,1,4,0.73797,1.4597,0.35376,0.97566,1.0,0.81697,0.006877,0.008607,0.01595,0.065491,0.004271,0.001154,6.6e-05,0.58785
4,1,5,0.82301,1.7707,0.44462,0.97698,1.0,0.75493,0.007428,0.010042,0.007938,0.045339,0.002051,0.00056,2.4e-05,0.34214


In [None]:
#leaf_dataset[leaf_dataset.isnull().any(axis=1)].head()

In [None]:
X = leaf_dataset.iloc[:, 2:16] #The independent variable also known as predictors or features
y = leaf_dataset.iloc[:, 0] #The dependent variable also known as response variables or labels

In [None]:
X.head()

Unnamed: 0,Eccentricity,Aspect-Ratio,Elongation,Solidity,Stochastic-Convexity,Isoperimetric-Factor,Maximal-Indentation-Depth,Lobedness,Average-Intensity,Average-Contrast,Smoothness,Third-moment,Uniformity,Entropy
0,0.72694,1.4742,0.32396,0.98535,1.0,0.83592,0.004657,0.003947,0.04779,0.12795,0.016108,0.005232,0.000275,1.1756
1,0.74173,1.5257,0.36116,0.98152,0.99825,0.79867,0.005242,0.005002,0.02416,0.090476,0.008119,0.002708,7.5e-05,0.69659
2,0.76722,1.5725,0.38998,0.97755,1.0,0.80812,0.007457,0.010121,0.011897,0.057445,0.003289,0.000921,3.8e-05,0.44348
3,0.73797,1.4597,0.35376,0.97566,1.0,0.81697,0.006877,0.008607,0.01595,0.065491,0.004271,0.001154,6.6e-05,0.58785
4,0.82301,1.7707,0.44462,0.97698,1.0,0.75493,0.007428,0.010042,0.007938,0.045339,0.002051,0.00056,2.4e-05,0.34214


In [None]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: Species-(Class), dtype: int64

In [None]:
X = leaf_dataset.iloc[:, 2:16].values
y = leaf_dataset.iloc[:, 0].values

In [None]:
print(X)

[[7.2694e-01 1.4742e+00 3.2396e-01 ... 5.2323e-03 2.7477e-04 1.1756e+00]
 [7.4173e-01 1.5257e+00 3.6116e-01 ... 2.7080e-03 7.4846e-05 6.9659e-01]
 [7.6722e-01 1.5725e+00 3.8998e-01 ... 9.2068e-04 3.7886e-05 4.4348e-01]
 ...
 [3.5344e-01 1.0329e+00 7.8147e-01 ... 1.6123e-02 4.5288e-04 1.6935e+00]
 [5.9988e-01 1.1427e+00 7.1532e-01 ... 6.1900e-03 2.6454e-04 1.1526e+00]
 [4.7195e-01 1.0901e+00 8.5409e-01 ... 1.3487e-02 3.2855e-04 1.5623e+00]]


In [None]:
print(y)

[ 1  1  1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  2  3  3
  3  3  3  3  3  3  3  3  4  4  4  4  4  4  4  4  5  5  5  5  5  5  5  5
  5  5  5  5  6  6  6  6  6  6  6  6  7  7  7  7  7  7  7  7  7  7  8  8
  8  8  8  8  8  8  8  8  8  9  9  9  9  9  9  9  9  9  9  9  9  9  9 10
 10 10 10 10 10 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11 11 11 11
 11 11 11 11 12 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13
 13 13 13 13 13 14 14 14 14 14 14 14 14 14 14 14 14 15 15 15 15 15 15 15
 15 15 15 22 22 22 22 22 22 22 22 22 22 22 22 23 23 23 23 23 23 23 23 23
 23 23 24 24 24 24 24 24 24 24 24 24 24 24 24 25 25 25 25 25 25 25 25 25
 26 26 26 26 26 26 26 26 26 26 26 26 27 27 27 27 27 27 27 27 27 27 27 28
 28 28 28 28 28 28 28 28 28 28 28 29 29 29 29 29 29 29 29 29 29 29 29 30
 30 30 30 30 30 30 30 30 30 30 30 31 31 31 31 31 31 31 31 31 31 31 32 32
 32 32 32 32 32 32 32 32 32 33 33 33 33 33 33 33 33 33 33 33 34 34 34 34
 34 34 34 34 34 34 34 35 35 35 35 35 35 35 35 35 35

In [None]:
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X[:, :]) #Selecting and filling the missing values from all rows and all the coloumns in X array
X[:, :] = imputer.transform(X[:, :])

In [None]:
print(X)

[[7.2694e-01 1.4742e+00 3.2396e-01 ... 5.2323e-03 2.7477e-04 1.1756e+00]
 [7.4173e-01 1.5257e+00 3.6116e-01 ... 2.7080e-03 7.4846e-05 6.9659e-01]
 [7.6722e-01 1.5725e+00 3.8998e-01 ... 9.2068e-04 3.7886e-05 4.4348e-01]
 ...
 [3.5344e-01 1.0329e+00 7.8147e-01 ... 1.6123e-02 4.5288e-04 1.6935e+00]
 [5.9988e-01 1.1427e+00 7.1532e-01 ... 6.1900e-03 2.6454e-04 1.1526e+00]
 [4.7195e-01 1.0901e+00 8.5409e-01 ... 1.3487e-02 3.2855e-04 1.5623e+00]]


In [None]:
#label_encode = LabelEncoder()
#y = label_encode.fit_transform(y)

In [None]:
#print(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
print(X_train)

[[8.7163e-01 2.0659e+00 5.5339e-01 ... 6.6921e-03 1.6133e-04 1.4016e+00]
 [3.9092e-01 1.0870e+00 6.8174e-01 ... 2.1177e-03 2.1044e-04 9.0082e-01]
 [7.7982e-01 1.6215e+00 3.9222e-01 ... 2.4664e-03 1.4676e-04 6.6975e-01]
 ...
 [3.8564e-01 1.0471e+00 2.3328e-01 ... 2.2000e-02 8.6835e-04 1.9757e+00]
 [4.9634e-01 1.1832e+00 2.2855e-01 ... 5.1731e-03 2.3865e-04 1.1489e+00]
 [2.3041e-01 1.1294e+00 2.5496e-01 ... 2.4180e-02 4.2353e-04 1.6648e+00]]


In [None]:
print(X_test)

[[9.9593e-01 1.0120e+01 9.2461e-01 8.0662e-01 9.0702e-01 1.5053e-01
  1.2257e-01 2.7342e+00 8.2902e-03 5.2333e-02 2.7313e-03 9.0459e-04
  1.5362e-05 2.7303e-01]
 [5.0692e-01 1.1270e+00 6.7203e-01 5.3024e-01 7.5263e-01 1.6792e-01
  1.3006e-01 3.0788e+00 1.5279e-02 5.7592e-02 3.3059e-03 7.2847e-04
  1.0983e-04 6.7289e-01]
 [2.4465e-01 1.0470e+00 6.0511e-01 5.6524e-01 7.9474e-01 2.1788e-01
  1.2522e-01 2.8540e+00 3.7595e-02 1.2700e-01 1.5874e-02 6.5870e-03
  1.0798e-04 8.3310e-01]
 [8.6545e-01 1.8803e+00 6.2039e-01 8.2443e-01 8.5439e-01 3.3077e-01
  4.7000e-02 4.0204e-01 3.9518e-02 1.1570e-01 1.3210e-02 4.2406e-03
  2.0084e-04 1.0136e+00]
 [8.2866e-01 1.9848e+00 5.0917e-01 9.4180e-01 9.9825e-01 5.5942e-01
  2.5524e-02 1.1857e-01 8.0103e-02 1.6692e-01 2.7107e-02 8.6548e-03
  4.2650e-04 1.8038e+00]
 [7.2719e-01 1.4779e+00 3.2980e-01 9.9388e-01 1.0000e+00 8.4230e-01
  2.9668e-03 1.6019e-03 2.6340e-02 8.1903e-02 6.6634e-03 1.7846e-03
  1.9390e-04 9.8050e-01]
 [9.6652e-01 3.8162e+00 7.4273e-01

In [None]:
print(y_train)

[ 7 11  1 13 35 33 33  8 13 24 29  7 10 30  9 24 13  2  3 31 13 26 28 30
 25 28 14 25  2 24  8 35  2 13 35  3 28 27 15 28 13 22  6 11 12  2  1 33
 26  9  3  3 32 33  6 23  1 27 28  9 35 24 27 15 26 22  9  7 27 10 31 10
 22  7  3  4 11  7 30 14 12 13 24  9 32 28 32 14 29 15 27 33 29 12  4 29
 27 22 11 30 26 11 34 35 28 33 15 24 26 14 26  1 11 23 26 11 29 32 12  8
  3  9 31 10 23 31 12 23 28  5 34  7 29  9 23  8 14  2  4  3 15 13 28 12
 31  5 10 29 22 24 11  5  9 30 24 13  3 36 13 31  1  1 33 32 35 10 26 23
 32 26 25 10 22 10  4 25  7 14  5  4 31  1 36 24 33  3 30 28 27 15 32 13
  7 12 12  1  7  2 14  5 34 35 26 32 27 25 25 34 29 36 34 30 24 15  5 26
 23 29 10  6  1  5  8 24  4 30  9  6 30 28  2  9 30 15 36 12 35 28  6  5
 22 23 36 23 15  6  5 27 29  2 34 14 31 23 32 29 34 14 12 14 10  9 36 25
 11 36  2 23  8 10 30 10]


In [None]:
print(y_test)

[31 11 11  5 12 27 22 31 36  3 12 26 29 14 11 22 11 35 30  1 31  6 15 11
 13  9 11 25 13 34 22  4 10 34  8  1 33 35 11  8  9  1 14 27  5 34 32 32
  8  4 35 22 24  9  7 24  6 36 36  5 33 33 34 22 25  8  8  2]


In [None]:
## I am using feature scaling after the splitting to avoid the information leakage

In [None]:
sc = StandardScaler()
X_train[:, :] = sc.fit_transform(X_train[:, :])
X_test[:, :] = sc.transform(X_test[:, :])

In [None]:
print(X_train)

[[ 0.76163594 -0.09999249  0.28756748 ...  0.12964517 -0.54447525
   0.34892358]
 [-1.54720948 -0.48953753  0.96697955 ... -0.75242946 -0.43518234
  -0.48890675]
 [ 0.32067341 -0.27683774 -0.56557506 ... -0.68519017 -0.57690037
  -0.87549857]
 ...
 [-1.57256927 -0.5054154  -1.40691323 ...  3.08144437  1.02897767
   1.30942198]
 [-1.04087825 -0.45125555 -1.43195117 ... -0.1632613  -0.37240178
  -0.07385633]
 [-2.31813743 -0.47266481 -1.29215162 ...  3.50181047  0.0390434
   0.78927052]]


In [None]:
print(X_test)

[[ 1.35864762e+00  3.10506906e+00  2.25259553e+00 -9.70838256e-01
  -3.76868278e-01 -1.89570817e+00  2.37951287e+00  2.26641625e+00
  -1.22313269e+00 -1.42133641e+00 -1.12200236e+00 -9.86351627e-01
  -8.69322882e-01 -1.53923124e+00]
 [-9.90062617e-01 -4.73619866e-01  9.15580323e-01 -3.55530582e+00
  -1.77293671e+00 -1.81221115e+00  2.58400054e+00  2.61172239e+00
  -1.03112097e+00 -1.32002515e+00 -1.07996110e+00 -1.02031258e+00
  -6.59087033e-01 -8.70245192e-01]
 [-2.24974285e+00 -5.05455195e-01  5.61343812e-01 -3.22801598e+00
  -1.39215789e+00 -1.57233126e+00  2.45186165e+00  2.38646174e+00
  -4.18006607e-01  1.70753024e-02 -1.60401773e-01  1.09378897e-01
  -6.63204155e-01 -6.02205740e-01]
 [ 7.31953456e-01 -1.73850449e-01  6.42227463e-01 -8.04294486e-01
  -8.52773983e-01 -1.03029681e+00  3.16344225e-01 -7.05226962e-02
  -3.65173710e-01 -2.00611926e-01 -3.55316362e-01 -3.43073865e-01
  -4.56546865e-01 -3.00220090e-01]
 [ 5.55251447e-01 -1.32265551e-01  5.34918880e-02  2.93248598e-01
  

In [None]:
dtclf = DecisionTreeClassifier(criterion='gini', max_depth=None, random_state=42) #default criterion is Gini

In [None]:
#dtclf = DecisionTreeClassifier(criterion='entropy', random_state=42)

In [None]:
dtclf.fit(X_train, y_train)

DecisionTreeClassifier(random_state=42)

In [None]:
y_pred = dtclf.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)

In [None]:
cm.shape

(30, 30)

In [None]:
print(cm)

[[2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 5 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 2 0 0 0 0 0 0 0 1 0 0 0 0 0

In [None]:
acc_score = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(acc_score * 100))

Accuracy: 61.76%


In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.50      0.67      0.57         3
           2       1.00      1.00      1.00         1
           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         2
           5       0.75      1.00      0.86         3
           6       1.00      1.00      1.00         2
           7       0.50      1.00      0.67         1
           8       0.80      0.80      0.80         5
           9       0.67      0.67      0.67         3
          10       0.50      1.00      0.67         1
          11       1.00      0.71      0.83         7
          12       0.50      0.50      0.50         2
          13       0.50      0.50      0.50         2
          14       0.25      0.50      0.33         2
          15       0.00      0.00      0.00         1
          22       0.00      0.00      0.00         5
          23       0.00      0.00      0.00         0
          24       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Calculate precision, recall, and F-score with zero_division set to 1
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='micro', zero_division=1)

print("Precision: {:.2f}".format(precision))
print("Recall: {:.2f}".format(recall))
print("F-score: {:.2f}".format(fscore))


Precision: 0.62
Recall: 0.62
F-score: 0.62


In [None]:
knnclf = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2) #default number of neighbors = 5

In [None]:
knnclf.fit(X_train, y_train)

KNeighborsClassifier()

In [None]:
y_pred = knnclf.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)

In [None]:
print(cm)

[[2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0

In [None]:
acc_score = accuracy_score(y_test, y_pred)

In [None]:
print(acc_score)

0.7058823529411765


In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.40      0.67      0.50         3
           2       0.00      0.00      0.00         1
           3       1.00      1.00      1.00         1
           4       0.00      0.00      0.00         2
           5       0.75      1.00      0.86         3
           6       1.00      0.50      0.67         2
           7       0.50      1.00      0.67         1
           8       1.00      1.00      1.00         5
           9       1.00      0.67      0.80         3
          10       0.00      0.00      0.00         1
          11       1.00      1.00      1.00         7
          12       1.00      1.00      1.00         2
          13       0.33      0.50      0.40         2
          14       1.00      1.00      1.00         2
          15       0.00      0.00      0.00         1
          22       0.50      0.20      0.29         5
          23       0.00      0.00      0.00         0
          24       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
nbclf = GaussianNB()

In [None]:
nbclf.fit(X_train, y_train)

GaussianNB()

In [None]:
y_pred = nbclf.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)

In [None]:
print(cm)

[[2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 2 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 

In [None]:
acc_score = accuracy_score(y_test, y_pred)

In [None]:
print(acc_score)

0.7205882352941176


In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.67      0.67      0.67         3
           2       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         1
           4       0.00      0.00      0.00         2
           5       1.00      1.00      1.00         3
           6       0.50      1.00      0.67         2
           7       1.00      1.00      1.00         1
           8       1.00      1.00      1.00         5
           9       1.00      0.67      0.80         3
          10       0.50      1.00      0.67         1
          11       1.00      0.57      0.73         7
          12       0.33      0.50      0.40         2
          13       1.00      0.50      0.67         2
          14       0.50      0.50      0.50         2
          15       1.00      1.00      1.00         1
          22       0.67      0.40      0.50         5
          24       0.40      1.00      0.57         2
          25       1.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
