In [1]:
import numpy as np
import pandas as pd
import time
from sklearn.manifold import TSNE


import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
%matplotlib inline
%matplotlib notebook
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

In [2]:
df = pd.read_csv('forestfires.csv')

In [3]:
df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00


In [4]:
object_columns = df.select_dtypes(['object']).columns
df[object_columns] = df[object_columns].apply(lambda x: x.astype('category'))

In [5]:
cat_columns = df.select_dtypes(['category']).columns

df[cat_columns] = df[cat_columns].apply(lambda x: x.cat.codes)

In [6]:
df['area_log'] = np.log(df.area+1)

In [7]:
df['area_bins'] = pd.qcut(df['area_log'],2, labels=[0,1])
df.drop(['area','area_log'],axis=1,inplace=True)

In [8]:
df['area_bins']

0      0
1      0
2      0
3      0
4      0
      ..
512    1
513    1
514    1
515    0
516    0
Name: area_bins, Length: 517, dtype: category
Categories (2, int64): [0 < 1]

In [9]:
y=df.pop('area_bins')
X=df

In [10]:
y

0      0
1      0
2      0
3      0
4      0
      ..
512    1
513    1
514    1
515    0
516    0
Name: area_bins, Length: 517, dtype: category
Categories (2, int64): [0 < 1]

In [11]:
RS=123
time_start = time.time()
fashion_tsne_2d = TSNE(n_components=2,perplexity=40,early_exaggeration= 40,n_iter=500,verbose=1,random_state=RS).fit_transform(X)
print('t-SNE Time elapsed: {} seconds'.format(time.time()-time_start))


[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 517 samples in 0.001s...
[t-SNE] Computed neighbors for 517 samples in 0.011s...
[t-SNE] Computed conditional probabilities for sample 517 / 517
[t-SNE] Mean sigma: 13.910009
[t-SNE] KL divergence after 250 iterations with early exaggeration: 257.285828
[t-SNE] KL divergence after 500 iterations: 0.177266
t-SNE Time elapsed: 0.6323418617248535 seconds


In [12]:
sns.set_style('darkgrid')
sns.set_palette('muted')
sns.set_context("notebook", font_scale=1.5,
                rc={"lines.linewidth": 2.5})

In [13]:

RS=123
def fashion_scatter_2d(x, colors):
    # choose a color palette with seaborn.
    num_classes = len(np.unique(colors))
    palette = np.array(sns.color_palette("hls", num_classes))

    # create a scatter plot.
    f = plt.figure(figsize=(24, 6))
    ax = plt.subplot(aspect='equal')
    sc = ax.scatter(x[:,0], x[:,1], lw=0, s=40, c=palette[colors.astype(np.int)])
    plt.xlim(-25, 25)
    plt.ylim(-25, 25)
    ax.axis('off')
    ax.axis('tight')

    # add the labels for each digit corresponding to the label
    txts = []

    for i in range(num_classes):

        # Position of each label at median of data points.

        xtext, ytext = np.median(x[colors == i, :], axis=0)
        txt = ax.text(xtext, ytext, str(i), fontsize=24)
        txt.set_path_effects([
            PathEffects.Stroke(linewidth=5, foreground="w"),
            PathEffects.Normal()])
        txts.append(txt)

    return f, ax, sc, txts

In [14]:
%matplotlib notebook
fashion_scatter_2d(fashion_tsne_2d, y)

<IPython.core.display.Javascript object>

(<Figure size 1728x432 with 1 Axes>,
 <AxesSubplot:>,
 <matplotlib.collections.PathCollection at 0x12a1ea0a0>,
 [Text(-0.39586335, -1.1037307, '0'), Text(0.51139, -1.0212464, '1')])

In [15]:
time_start = time.time()
fashion_tsne_3d = TSNE(n_components=3,perplexity=40,early_exaggeration= 40,n_iter=500,verbose=1,random_state=RS).fit_transform(X)
print('t-SNE Time : {} seconds'.format(time.time()-time_start))

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 517 samples in 0.001s...
[t-SNE] Computed neighbors for 517 samples in 0.010s...
[t-SNE] Computed conditional probabilities for sample 517 / 517
[t-SNE] Mean sigma: 13.910009
[t-SNE] KL divergence after 250 iterations with early exaggeration: 368.438812
[t-SNE] KL divergence after 500 iterations: 2.642489
t-SNE Time : 1.146885871887207 seconds


In [16]:
import matplotlib.animation as animation
from numpy.random import normal as normal


In [17]:
# 3D animation

nfr = 30 # Number of frames
fps = 10 # Frame per sec
xs = []
ys = []
zs = []
ss = np.arange(1,nfr,0.5)
for s in ss:
    xs.append(normal(50,s,200))
    ys.append(normal(50,s,200))
    zs.append(normal(50,s,200))


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
sct, = ax.plot([], [], [], "o", markersize=2)
def update(ifrm, xa, ya, za):
    sct.set_data(xa[ifrm], ya[ifrm])
    sct.set_3d_properties(za[ifrm])
ax.set_xlim(0,100)
ax.set_ylim(0,100)
ax.set_zlim(0,100)
ani = animation.FuncAnimation(fig, update, nfr, fargs=(xs,ys,zs), interval=1000/fps)

<IPython.core.display.Javascript object>

In [18]:
#xs is list of 58 lists w/ 200 items
xs

[array([50.6767831 , 51.43890471, 49.63661901, 49.7838153 , 51.64908924,
        51.15658459, 49.21085241, 49.72651938, 48.30485129, 49.27639343,
        48.23047175, 50.66152363, 50.13561165, 49.19439625, 50.21978007,
        48.34619949, 50.65540426, 51.58815762, 49.43193253, 49.89285285,
        50.22798682, 49.53090662, 49.6018962 , 48.87544515, 50.27655906,
        49.33802948, 50.1143969 , 50.38864102, 49.15884351, 49.60498793,
        50.21236989, 51.23763892, 50.52467744, 51.14164257, 50.64894966,
        50.74261977, 50.95414041, 50.22061372, 49.58016006, 51.0057183 ,
        50.31015628, 51.39221353, 50.92253167, 49.8009305 , 48.95626988,
        50.61902244, 48.88163611, 50.25105829, 50.30609262, 49.33078766,
        49.37147906, 50.52689649, 48.70661867, 50.2187601 , 52.15977404,
        49.88420054, 47.73237297, 48.9255529 , 49.21754101, 49.98080587,
        51.44240183, 49.64339666, 51.23798317, 49.50162609, 49.536965  ,
        51.1312131 , 48.68691301, 49.18922388, 50.8

In [19]:
fashion_tsne_3d.shape

(517, 3)

In [20]:
len(fashion_tsne_3d[0])

3

In [21]:
fashion_tsne_3d[:,0].shape

(517,)

In [22]:
# 3D animation

nfr = 300 # Number of frames
fps = 10 # Frame per sec
xs = fashion_tsne_3d[:,0]
ys = fashion_tsne_3d[:,1]
zs = fashion_tsne_3d[:,2]


    
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
sct, = ax.plot(xs, ys, zs, "o", markersize=2)
ani = animation.FuncAnimation(fig, update, nfr, fargs=(xs,ys,zs), interval=100)


def update(ifrm, xa, ya, za):
    sct.set_data(xa[ifrm], ya[ifrm])
    sct.set_3d_properties(za[ifrm])
    


<IPython.core.display.Javascript object>

In [23]:
len(fashion_tsne_3d[:,2])

517

In [24]:
xs[0]

-40.79756

In [25]:
def fashion_scatter_3d(x, colors):
    # choose a color palette with seaborn.
    num_classes = len(np.unique(colors))
    palette = np.array(sns.color_palette("hls", num_classes))
    
    

    # create a scatter plot.
    f = plt.figure(figsize=(12, 6))
    ax = f.add_subplot(111, projection='3d')
    sc = ax.scatter(x[:,0], x[:,1],x[:,2], lw=0, s=40, c=palette[colors.astype(np.int)])
    plt.xlim(-15, 15)
    plt.ylim(-15, 15)
    ax.axis('off')
    ax.axis('tight')


    return f, ax, sc

In [26]:
fashion_scatter_3d(fashion_tsne_3d, y)

<IPython.core.display.Javascript object>

(<Figure size 864x432 with 1 Axes>,
 <Axes3DSubplot:>,
 <mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x12a227e80>)

In [37]:
num_classes = len(np.unique(y))
palette = np.array(sns.color_palette("hls", num_classes))

nfr = 300 # Number of frames
fps = 10 # Frame per sec
xs = fashion_tsne_3d[:,0]
ys = fashion_tsne_3d[:,1]
zs = fashion_tsne_3d[:,2]

# create a scatter plot.
f = plt.figure(figsize=(12, 6))
ax = f.add_subplot(111, projection='3d')
sc = plt.scatter(xs, ys, lw=0, s=40, c=palette[y.astype(np.int)])

#the update function should come here
def animate(i,xa,ya,scat):
    #sc.set_data(xa[i],ya[i])
    #sc.set_3d_properties(za[i])
    data = np.hstack((xa, ya))
    scat.set_offsets(data[i])
    print('yala')
    return scat,
plt.xlim(-15, 15)
plt.ylim(-15, 15)
ax.axis('off')
ax.axis('tight')

animasa = animation.FuncAnimation(f,animate,nfr,fargs=(xs, ys, sc),interval=100)


<IPython.core.display.Javascript object>