## t-SNE

This notebook implements the t-SNE mapping for visualising higher dimensional datasets into lower dimensions.

In [9]:
# import libraries 

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [10]:

def reduceDimensionality(data, n_components = 30):
    """
    This function aims to reduce the higher dimensional data to lower dimensions [ = 30 ]
    """
    # standardize the data
    scaler = StandardScaler()
    x = data[:,:-1]
    y = data[:,-1]
    x = scaler.fit_transform(x)
    # apply PCA
    pca = PCA(n_components = n_components)
    x = pca.fit_transform(x)
    # concatenate the data
    data = np.concatenate((x, y.reshape(-1,1)), axis = 1)
    return data


In [11]:
def plotPCA(data,title):
    """
    Function to plot the generated PCA plot 
    """
    plt.figure(figsize=(10, 10))
    plt.scatter(data[:, 0], data[:, 1], alpha=0.5,c=data[:,-1])
    plt.colorbar()
    plt.xlabel('First Principal Component')
    plt.ylabel('Second Principal Component')
    plt.title(title)
    plt.show()

## Load MNIST Dataset

- Loading the mnist dataset and reducing it to a lower dimensional space using PCA and number of components as 30

In [12]:
import seaborn as sns
from scipy.linalg import eigh

In [13]:
df = pd.read_csv('./data/mnist_train.csv')
print(df.shape)
df.head()

(42000, 785)


Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
labels = df['label']
data = df.drop('label', axis = 1)
data = data.values
data = np.concatenate((data, labels.values.reshape(-1,1)), axis = 1)
data = reduceDimensionality(data)
print(data.shape)
print(data[:5])


(42000, 784)


: 

: 

In [None]:
#  plot PCA
plotPCA(data, 'PCA')