# t-SNE: t-DISTRIBUTED STOCHASTIC NEIGHBOR EMBEDDING

**File:** tSNE.ipynb

**Course:** Data Science Foundations: Data Mining in Python


# IMPORT LIBRARIES

In [None]:
import pandas as pd                # For dataframe
import matplotlib.pyplot as plt    # For plotting data
import seaborn as sns              # For plotting data
from sklearn.manifold import TSNE  # For tSNE

# LOAD AND PREPARE DATA

Load the complete dataset (as opposed to the training or testing data) from the CSV file in the data directory. Separate the data matrix from the class variable. Name the data matrix as `df` and the column with class labels as `y`.

In [None]:
# Imports the complete dataset
df = pd.read_csv('data/optdigits.csv')

# Separates the attributes P0-P63 into X
X = df.filter(regex='\d')

# Separates the class variable into y
y = df.y

In [None]:
# Shows the first few rows of the data
df.head()

# t-SNE: MODEL DATA

This phase applies t-SNE to the training data with various values of the perplexity parameter and displays embeddings of data.

In [None]:
# Sets up the t-SNE object with 2 components
tsne = TSNE(
    n_components=2,
    random_state=1)

In [None]:
# Displays the t-SNE parameters
tsne.get_params()

## PERPLEXITY = 1

In [None]:
# Sets up t-SNE with perplexity = 1
tsne = TSNE(
    n_components=2,
    perplexity=1,
    random_state=1)

# Transforms the attribute data
X_tf = tsne.fit_transform(X)

# Creates a scatterplot of the data embedding
sns.scatterplot(
    x=X_tf[:, 0],
    y=X_tf[:, 1],
    style=y,
    hue=y,
    palette=['red', 'green', 'blue'])

## PERPLEXITY = 2

In [None]:
# Sets up t-SNE with perplexity = 2
tsne = TSNE(
    n_components=2,
    perplexity=2,
    random_state=1)

# Transforms the attribute data
X_tf = tsne.fit_transform(X)

# Creates a scatterplot of the data embedding
sns.scatterplot(
    x=X_tf[:, 0],
    y=X_tf[:, 1],
    style=y,
    hue=y,
    palette=['red', 'green', 'blue'])

## PERPLEXITY = 5

In [None]:
# Sets up t-SNE with perplexity = 5
tsne = TSNE(
    n_components=2,
    perplexity=5,
    random_state=1)

# Transforms the attribute data
X_tf = tsne.fit_transform(X)

# Creates a scatterplot of the data embedding
sns.scatterplot(
    x=X_tf[:, 0],
    y=X_tf[:, 1],
    style=y,
    hue=y,
    palette=['red', 'green', 'blue'])

## PERPLEXITY = 10

In [None]:
# Sets up t-SNE with perplexity = 10
tsne = TSNE(
    n_components=2,
    perplexity=10,
    random_state=1)

# Transforms the attribute data
X_tf = tsne.fit_transform(X)

# Creates a scatterplot of the data embedding
sns.scatterplot(
    x=X_tf[:, 0],
    y=X_tf[:, 1],
    style=y,
    hue=y,
    palette=['red', 'green', 'blue'])

## PERPLEXITY = 50

In [None]:
# Sets up t-SNE with perplexity = 50
tsne = TSNE(
    n_components=2,
    perplexity=50,
    random_state=1)

# Transforms the attribute data
X_tf = tsne.fit_transform(X)

# Creates a scatterplot of the data embedding
sns.scatterplot(
    x=X_tf[:, 0],
    y=X_tf[:, 1],
    style=y,
    hue=y,
    palette=['red', 'green', 'blue'])

# CLEAN UP

- If desired, clear the results with Cell > All Output > Clear. 
- Save your work by selecting File > Save and Checkpoint.
- Shut down the Python kernel and close the file by selecting File > Close and Halt.