# 14.7 Case Study: Unsupervised Machine Learning, Part 2—k-Means Clustering
### Iris Dataset

## 14.7.1 Loading the Iris Dataset
**We added `%matplotlib inline` to enable Matplotlib in this notebook.**

In [None]:
%matplotlib inline
from sklearn.datasets import load_iris

In [None]:
iris = load_iris()

In [None]:
print(iris.DESCR)

### Checking the Numbers of Samples, Features and Targets

In [None]:
iris.data.shape

In [None]:
iris.target.shape

In [None]:
iris.target_names

In [None]:
iris.feature_names

## 14.7.2 Exploring the Iris Dataset: Descriptive Statistics with Pandas

In [None]:
import pandas as pd

In [None]:
pd.set_option('max_columns', 5)

In [None]:
pd.set_option('display.width', None)

In [None]:
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)

In [None]:
iris_df['species'] = [iris.target_names[i] for i in iris.target]

In [None]:
iris_df.head()

In [None]:
pd.set_option('precision', 2)

In [None]:
iris_df.describe()

In [None]:
iris_df['species'].describe()

## 14.7.3 Visualizing the Dataset with a Seaborn `pairplot` 

In [None]:
import seaborn as sns

In [None]:
sns.set(font_scale=1.1)

In [None]:
sns.set_style('whitegrid')

In [None]:
grid = sns.pairplot(data=iris_df, vars=iris_df.columns[0:4],
     hue='species')

### Displaying the pairplot in One Color

In [None]:
grid = sns.pairplot(data=iris_df, vars=iris_df.columns[0:4])

## 14.7.4 Using a `KMeans` Estimator
### Creating the Estimator

In [None]:
from sklearn.cluster import KMeans

In [None]:
kmeans = KMeans(n_clusters=3, random_state=11)

### Fitting the Model

In [None]:
kmeans.fit(iris.data)

### Comparing the Computer Cluster Labels to the Iris Dataset’s Target Values

In [None]:
print(kmeans.labels_[0:50])

In [None]:
print(kmeans.labels_[50:100])

In [None]:
print(kmeans.labels_[100:150])

## 14.7.5 Dimensionality Reduction with Principal Component Analysis
### Creating the PCA Object

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=2, random_state=11)

### Transforming the Iris Dataset’s Features into Two Dimensions

In [None]:
pca.fit(iris.data)

In [None]:
iris_pca = pca.transform(iris.data)

In [None]:
iris_pca.shape

### Visualizing the Reduced Data

In [None]:
iris_pca_df = pd.DataFrame(iris_pca, 
                            columns=['Component1', 'Component2'])

In [None]:
iris_pca_df['species'] = iris_df.species

In [None]:
axes = sns.scatterplot(data=iris_pca_df, x='Component1', 
     y='Component2', hue='species', legend='brief') 

iris_centers = pca.transform(kmeans.cluster_centers_)

import matplotlib.pyplot as plt

dots = plt.scatter(iris_centers[:,0], iris_centers[:,1], 
                    s=100, c='k')

In [None]:
# This placeholder cell was added because we had to combine 
# the sections snippets 39-42 for the visualization to work in Jupyter
# and we wanted the subsequent snippet numbers to match the book

In [None]:
# placeholder cell 

In [None]:
# placeholder cell 

## 14.7.6 Choosing the Best Clustering Estimator

In [None]:
from sklearn.cluster import DBSCAN, MeanShift,\
     SpectralClustering, AgglomerativeClustering

In [None]:
estimators = {
    'KMeans': kmeans,
    'DBSCAN': DBSCAN(),
    'MeanShift': MeanShift(),
    'SpectralClustering': SpectralClustering(n_clusters=3),
    'AgglomerativeClustering': 
        AgglomerativeClustering(n_clusters=3)
}

In [None]:
import numpy as np

In [None]:
for name, estimator in estimators.items():
     estimator.fit(iris.data)
     print(f'\n{name}:')
     for i in range(0, 101, 50):
         labels, counts = np.unique(
             estimator.labels_[i:i+50], return_counts=True)
         print(f'{i}-{i+50}:')
         for label, count in zip(labels, counts):
             print(f'   label={label}, count={count}')
             

In [None]:
##########################################################################
# (C) Copyright 2019 by Deitel & Associates, Inc. and                    #
# Pearson Education, Inc. All Rights Reserved.                           #
#                                                                        #
# DISCLAIMER: The authors and publisher of this book have used their     #
# best efforts in preparing the book. These efforts include the          #
# development, research, and testing of the theories and programs        #
# to determine their effectiveness. The authors and publisher make       #
# no warranty of any kind, expressed or implied, with regard to these    #
# programs or to the documentation contained in these books. The authors #
# and publisher shall not be liable in any event for incidental or       #
# consequential damages in connection with, or arising out of, the       #
# furnishing, performance, or use of these programs.                     #
##########################################################################
