# **KNN with Adaboosting**

In [None]:
import numpy as np
import adaknn as ka
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score

# IRIS Dataset

In [None]:
iris = load_iris()
print(iris.DESCR)
X_iris, y_iris = iris.data, iris.target

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [None]:
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size = 0.2, random_state=42)

In [None]:
clf = ka.Adaboost()
clf.fit(X_iris_train, y_iris_train)

<__main__.Adaboost at 0x7fd94a5d9630>

In [None]:
y_iris_pred = clf.predict(X_iris_test)

In [None]:
accuracy = accuracy_score(y_iris_test, y_iris_pred)
accuracy

1.0

In [None]:
recall = recall_score(y_iris_test, y_iris_pred, average='micro')
recall

1.0

# Breast Cancer Dataset

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
cancer = load_breast_cancer()
print(cancer.DESCR)
X_cancer, y_cancer = cancer.data, cancer.target

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [None]:
X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test = train_test_split(X_cancer, y_cancer, test_size = 0.2, random_state=42)

In [None]:
clf = ka.Adaboost()
clf.fit(X_cancer_train, y_cancer_train)

<__main__.Adaboost at 0x7fd91ef449a0>

In [None]:
y_cancer_pred = clf.predict(X_cancer_test)

In [None]:
accuracy = accuracy_score(y_cancer_test, y_cancer_pred)
accuracy

0.9473684210526315

In [None]:
recall = recall_score(y_cancer_test, y_cancer_pred)
recall

1.0

# Wine Dataset

In [None]:
from sklearn.datasets import load_wine

In [None]:
wine = load_wine()
print(wine.DESCR)
X_wine, y_wine = wine.data, wine.target

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0.98  3.88    2.29  0.63
    Fl

In [None]:
X_wine_train, X_wine_test, y_wine_train, y_wine_test = train_test_split(X_wine, y_wine, test_size = 0.2, random_state=42)

In [None]:
clf = ka.Adaboost()
clf.fit(X_wine_train, y_wine_train)

<__main__.Adaboost at 0x7fd94a5d9480>

In [None]:
y_wine_pred = clf.predict(X_wine_test)

In [None]:
accuracy = accuracy_score(y_wine_test, y_wine_pred)
accuracy

0.6944444444444444

In [None]:
recall = recall_score(y_wine_test, y_wine_pred, average='micro')
recall

0.6944444444444444