In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report

In [3]:
X, y = load_iris(return_X_y=True)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=100)

In [5]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

In [6]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

Accuracy Score: 94.73684210526315 %


In [7]:
knn = KNeighborsClassifier() # default k=5
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [None]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

In [8]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.91      1.00      0.95        10
           2       1.00      0.93      0.96        14

    accuracy                           0.97        38
   macro avg       0.97      0.98      0.97        38
weighted avg       0.98      0.97      0.97        38



In [9]:
dtree = DecisionTreeClassifier() # default criteria='gini'
dtree.fit(X_train, y_train)
y_pred = dtree.predict(X_test)

In [10]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

Accuracy Score: 94.73684210526315 %


In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.90      0.90      0.90        10
           2       0.93      0.93      0.93        14

    accuracy                           0.95        38
   macro avg       0.94      0.94      0.94        38
weighted avg       0.95      0.95      0.95        38



In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=100)

In [13]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

In [14]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

Accuracy Score: 96.0 %


In [15]:
knn = KNeighborsClassifier() # default k=5
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [16]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

Accuracy Score: 98.0 %


In [17]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       0.92      1.00      0.96        12
           2       1.00      0.94      0.97        18

    accuracy                           0.98        50
   macro avg       0.97      0.98      0.98        50
weighted avg       0.98      0.98      0.98        50



In [18]:
dtree = DecisionTreeClassifier() # default criteria='gini'
dtree.fit(X_train, y_train)
y_pred = dtree.predict(X_test)

In [19]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

Accuracy Score: 96.0 %


In [20]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       0.92      0.92      0.92        12
           2       0.94      0.94      0.94        18

    accuracy                           0.96        50
   macro avg       0.95      0.95      0.95        50
weighted avg       0.96      0.96      0.96        50



In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)

In [22]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

In [23]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

Accuracy Score: 95.55555555555556 %


In [24]:
knn = KNeighborsClassifier() # default k=5
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [25]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

Accuracy Score: 97.77777777777777 %


In [26]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       0.92      1.00      0.96        11
           2       1.00      0.94      0.97        18

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



In [27]:
dtree = DecisionTreeClassifier() # default criteria='gini'
dtree.fit(X_train, y_train)
y_pred = dtree.predict(X_test)

In [28]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

Accuracy Score: 95.55555555555556 %


In [29]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       0.91      0.91      0.91        11
           2       0.94      0.94      0.94        18

    accuracy                           0.96        45
   macro avg       0.95      0.95      0.95        45
weighted avg       0.96      0.96      0.96        45



In [30]:
from sklearn.model_selection import ShuffleSplit

In [31]:
rs = ShuffleSplit(n_splits=10, test_size=0.25, random_state=100)
accuracy_gnb = []
accuracy_knn = []
accuracy_dtree = []

In [36]:
for train_index, test_index in rs.split(X):
    X_train = np.array([X[index] for index in train_index])
    y_train = np.array([y[index] for index in train_index])
    y_test = np.array([y[index] for index in test_index])
    y_pred = GaussianNB().fit(X_train, y_train).predict(X_test)
    x=accuracy_score(y_test,y_pred)
    accuracy_gnb.append(x)
    y_pred = KNeighborsClassifier().fit(X_train, y_train).predict(X_test)
    x=accuracy_score(y_test, y_pred)
    accuracy_knn.append(x)
    y_pred = DecisionTreeClassifier().fit(X_train, y_train).predict(X_test)
    accuracy_dtree.append(accuracy_score(y_test, y_pred))

ValueError: Found input variables with inconsistent numbers of samples: [38, 45]

In [37]:
print(f'Mean accuracy of Gaussian Naive Bayes: {sum(accuracy_gnb) / len(accuracy_gnb) * 100} %')
print(f'Mean accuracy of K-Nearest Neighbors: {sum(accuracy_knn) / len(accuracy_knn) * 100} %')
print(f'Mean accuracy of Decision Tree Classifier: {sum(accuracy_dtree) / len(accuracy_dtree) * 100} %')

ZeroDivisionError: division by zero

In [None]:
dtree = DecisionTreeClassifier()
knn = KNeighborsClassifier()
gnb = GaussianNB()

In [None]:
accuracy_dtree = cross_val_score(dtree, X, y, cv=5)
accuracy_knn = cross_val_score(knn, X, y, cv=5)
accuracy_gnb = cross_val_score(gnb, X, y, cv=5)

In [None]:
print(f'Mean accuracy of Gaussian Naive Bayes: {sum(accuracy_gnb) / len(accuracy_gnb) * 100} %')
print(f'Mean accuracy of K-Nearest Neighbors: {sum(accuracy_knn) / len(accuracy_knn) * 100} %')
print(f'Mean accuracy of Decision Tree Classifier: {sum(accuracy_dtree) / len(accuracy_dtree) * 100} %')

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=100)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

In [None]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

In [None]:
knn = KNeighborsClassifier() # default k=5
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [None]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
dtree = DecisionTreeClassifier() # default criteria='gini'
dtree.fit(X_train, y_train)

In [None]:
y_pred = dtree.predict(X_test)

In [None]:
print(f'Accuracy Score: {accuracy_score(y_test, y_pred) * 100} %')

In [None]:
print(classification_report(y_test, y_pred))