In [None]:
# type: ignore
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
import mglearn


def printIrisMetaData(meta_name: str):
    meta_data = iris_dataset[meta_name]
    print(f"---- {meta_name}: {type(meta_data)}\n{meta_data}\n\n")

iris_dataset = load_iris()

print(f"Keys = {iris_dataset.keys()}\n\n")
printIrisMetaData('target_names')
printIrisMetaData('feature_names')
printIrisMetaData('filename')
printIrisMetaData('data_module')
printIrisMetaData('data')
printIrisMetaData('target')


In [None]:
# type: ignore
import mglearn

from sklearn.model_selection import train_test_split 

X_train, X_test, y_train, y_test = train_test_split(
    iris_dataset['data'], iris_dataset['target'], random_state = 0
)


X_train_df = pd.DataFrame(X_train, columns=iris_dataset.feature_names)

pd.plotting.scatter_matrix(X_train_df, c=y_train, figsize=(12, 12), marker='o', 
                           hist_kwds={'bins': 20}, s=60, alpha=0.8, cmap=mglearn.cm3)

In [None]:
# type: ignore
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 1)
knn.fit(X_train, y_train)

y_prediction = knn.predict(X_test)
print("True labels {}\n".format(y_test))
print("Prediction: {}\n".format(y_prediction))
print("Predicted labels: {}\n"
      .format(iris_dataset['target_names'][y_prediction])
      )

print("y_prediction == y_test: {}\n"
      .format(y_prediction == y_test))

print("Test score: {:.2f}\n"
      .format(np.mean(y_prediction == y_test))
      )

print("score: {:.2f}\n".format(
    knn.score(X_test, y_test)
))


In [None]:
# type: ignore

import matplotlib.pylab
import matplotlib.pyplot
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data,
    cancer.target,
    test_size=0.2,
    stratify=cancer.target,
    random_state=11
)

neighbors = range(1, 11)
train_accuracy = []
test_accuracy = []

for n in neighbors:
    knn = KNeighborsClassifier(n_neighbors=n)
    knn.fit(X_train, y_train)

    train_accuracy.append(knn.score(X_train, y_train))
    test_accuracy.append(knn.score(X_test, y_test))

plt.plot(neighbors, train_accuracy, label='traing accuracy')
plt.plot(neighbors, test_accuracy, label='test accuracy')
plt.ylabel('Accuracy')
plt.xlabel('# of Neighbors')
plt.legend()

