In [None]:
# The ``inline`` flag will use the appropriate backend to make figures appear inline in the notebook.  
%matplotlib inline

import pandas as pd
import numpy as np

# `plt` is an alias for the `matplotlib.pyplot` module
import matplotlib.pyplot as plt

# import seaborn library (wrapper of matplotlib)
import seaborn as sns

# Using the scikit-learn library to load a dataset
from sklearn.datasets import load_digits

### Create Data (not shown in video)

We are going to load the digits dataset from scikit-learn, but there isn't time to teach the loading the library and such. 

In [None]:
# this is a special method for scikit-learn to load data
digits = load_digits()

In [None]:
type(digits)

In [None]:
# Each datapoint is a 8x8 image of a digit.
df = pd.DataFrame(list(digits.data))

In [None]:
label_df = pd.DataFrame(list(digits.target), columns = ['label'])

In [None]:
df = pd.concat([df, label_df], axis = 1)

In [None]:
df.to_csv('data/digitsDataset.csv', index = False)

In [None]:
df.shape

## Subplots

It is often useful to compare different subsets of your data side by side. To demonstrate this, we are going to visualize images side by side.

In [None]:
# The ``inline`` flag will use the appropriate backend to make figures appear inline in the notebook.  
%matplotlib inline

import pandas as pd
import numpy as np

# `plt` is an alias for the `matplotlib.pyplot` module
import matplotlib.pyplot as plt

### Load Data

The dataset is the digits dataset (from scikit-learn) that I arranged into a csv file for convenience. The data consists of pixel intensity values for 1797 images that are 8 by 8 pixels. This means that the dataset has 64 total values per image. Each image is labeled with a number from 0-9.

In [None]:
# Load digits dataset
filename = 'data/digitsDataset.csv'
df = pd.read_csv(filename)

In [None]:
# Columns 0 to 63 are the pixel intensity values for an 8 by 8 image. 
# label column is what the image is supposed to be. 
df.head()

### Show image

In [None]:
pixel_colnames = df.columns[:-1]

In [None]:
pixel_colnames

In [None]:
# Get all columns except the label column for the first image
image_values = df.loc[0, pixel_colnames].values

In [None]:
# This is not the correct format for viewing images
image_values.shape

The images are 8 pixels by 8 pixels. It is important to keep in mind that just because a dataset is stored in a certain way, doesnt mean it was meant to be viewed that way. 

In [None]:
image_values.reshape(8,8)

As it is not easy to understand pixel intensity values by looking at an array, lets visualize the image. 

In [None]:
plt.imshow(image_values.reshape(8,8), cmap ='gray')

#### Not the correct way to format your data

In [None]:
# this is not the format the image should be in. 
plt.imshow(image_values.reshape(64, 1), cmap = 'gray')

### Subplot Creation
We are going to create a 5 by 1 plot. 

In [None]:
# There is a large amount of replicated code

plt.figure(figsize=(10,2))

## The first image
plt.subplot(1, 5, 1)
image_values = df.loc[0, pixel_colnames].values
image_label = df.loc[0, 'label']
plt.imshow(image_values.reshape(8,8), cmap ='gray')
plt.title('Label: ' + str(image_label))

# The second image
plt.subplot(1, 5, 2)
image_values = df.loc[1, pixel_colnames].values
image_label = df.loc[1, 'label']
plt.imshow(image_values.reshape(8,8), cmap ='gray')
plt.title('Label: ' + str(image_label))

# The third image
plt.subplot(1, 5, 3)
image_values = df.loc[2, pixel_colnames].values
image_label = df.loc[2, 'label']
plt.imshow(image_values.reshape(8,8), cmap ='gray')
plt.title('Label: ' + str(image_label))

# The fourth image
plt.subplot(1, 5, 4)
image_values = df.loc[3, pixel_colnames].values
image_label = df.loc[3, 'label']
plt.imshow(image_values.reshape(8,8), cmap ='gray')
plt.title('Label: ' + str(image_label))

# The fifth image
plt.subplot(1, 5, 5)
image_values = df.loc[4, pixel_colnames].values
image_label = df.loc[4, 'label']
plt.imshow(image_values.reshape(8,8), cmap ='gray')
plt.title('Label: ' + str(image_label))

#### Using a for loop

In [None]:
# range(0,5) produces a sequence of integers from 0
# up to but not including 5
list(range(0,5))

In [None]:
# This is a lot less code

plt.figure(figsize=(10,2))
for index in range(0, 5):

    plt.subplot(1, 5, 1 + index )
    image_values = df.loc[index, pixel_colnames].values
    image_label = df.loc[index, 'label']
    plt.imshow(image_values.reshape(8,8), cmap ='gray')
    plt.title('Label: ' + str(image_label))