# TSNE.jl

## Plot examples

In [None]:
using Plots

In [1]:
function pippo()
    println("pipp8ne")
end
pippo()

pipp8ne


In [None]:
# define the Lorenz attractor
Base.@kwdef mutable struct Lorenz
    dt::Float64 = 0.02
    σ::Float64 = 10
    ρ::Float64 = 28
    β::Float64 = 8/3
    x::Float64 = 1
    y::Float64 = 1
    z::Float64 = 1
end

function step!(l::Lorenz)
    dx = l.σ * (l.y - l.x)
    dy = l.x * (l.ρ - l.z) - l.y
    dz = l.x * l.y - l.β * l.z
    l.x += l.dt * dx
    l.y += l.dt * dy
    l.z += l.dt * dz
end

attractor = Lorenz()


# initialize a 3D plot with 1 empty series
plt = plot3d(
    1,
    xlim = (-30, 30),
    ylim = (-30, 30),
    zlim = (0, 60),
    title = "Lorenz Attractor",
    marker = 2,
)

# build an animated gif by pushing new points to the plot, saving every 10th frame
@gif for i=1:1500
    step!(attractor)
    push!(plt, attractor.x, attractor.y, attractor.z)
end every 10

## TSNE code

In [1]:
using LinearAlgebra

In [365]:
N = 200
X_dim = 100
Y_dim = 2

2

In [366]:
data = randn(N,X_dim);
data_red = randn(N,Y_dim);

In [287]:
function eu_dist(data::Array{Float64,2})
    dist_mat2 = zeros(size(data)[1],size(data)[1])
    for i in 1:size(data)[1], j in i:size(data)[1]
        dist_mat2[i,j] = sum((data[i,:] - data[j,:]).^2)
        dist_mat2[j,i] = dist_mat2[i,j]
    end
    dist_mat2
end

eu_dist (generic function with 1 method)

In [288]:
dist_mat = eu_dist(data);
dist_mat_red = eu_dist(data_red);

In [289]:
sum(data.^2,dims=2)

100×1 Matrix{Float64}:
 79.05295015545049
 62.35224067970594
 65.12244644667453
 71.28543405207274
 63.831674578345826
 63.93422430440896
 58.217344993046645
 64.77157730476466
 66.5554146308845
 70.72229747722163
 62.4244769109867
 66.05896326054142
 61.785244363219135
  ⋮
 65.12829331505108
 63.940568696177955
 71.13158422547735
 72.63132232054015
 68.79931318069642
 71.55815032835218
 70.67233913026904
 62.28972371968171
 65.49462731729449
 65.47031388076084
 65.60768642793053
 64.12169547561928

In [290]:
function cond_gauss(data::Matrix{Float64};sigma::Float64=1.0)
    exp_mat = exp.(-eu_dist(data)/(2*sigma^2))
    for i in 1:size(exp_mat)[1]
        exp_mat[i,i] = 0.
    end
    prob = max.(exp_mat/sum(exp_mat),1e-12)
    prob, exp_mat
end

cond_gauss (generic function with 1 method)

In [291]:
function cond_t(data::Matrix{Float64};df::Int64=1)
    sum_x = sum(data.^2,dims=2)
    num = -2. * data * transpose(data)
    num = 1. ./ (1. .+ (transpose(num .+ sum_x) .+ sum_x))
    for i in 1:size(num)[1]
        num[i,i] = 0.
    end
    max.(num/sum(num),1e-12), num
end

cond_t (generic function with 1 method)

In [292]:
p_distr, exp_mat = cond_gauss(data)
q_distr_g, _ = cond_gauss(data_red,sigma=1/(sqrt(2)));
q_distr_t, num = cond_t(data_red);


In [274]:
cost_KL(p_distr::Matrix{Float64},q_distr::Matrix{Float64}) = sum(p_distr.*(log.(p_distr./q_distr)))

cost_KL (generic function with 1 method)

In [293]:
function shannon_entropy(p_distr::Matrix{Float64})
    sh = p_distr.* log2.(p_distr)
    for i in 1:size(sh)[1]
        sh[i,i] = 0.
    end
    -sum(sh,dims=2)
end

perplexity(p_distr::Matrix{Float64}) = 2 .^shannon_entropy(p_distr)


perplexity (generic function with 1 method)

In [295]:
perplexity(p_distr);

In [188]:
size(data_red)

(5, 2)

In [298]:
PQ = p_distr - q_distr_t
sum(repeat(PQ[:,1] .* num[:,1],1,2) .* (transpose(data_red[1,:]) .- data_red),dims=1)

1×2 Matrix{Float64}:
 -0.000590371  0.00103943

In [361]:
function grad_KL(data::Matrix{Float64},data_red::Matrix{Float64})
    q_distr, num = cond_t(data_red)
    PQ = cond_gauss(data)[1] - q_distr
    dy = zeros(size(data_red))
    for i in 1:size(dy)[1]
        dy[i,:] = sum(repeat(PQ[:,i] .* num[:,i],1,size(data_red)[2]) .* (transpose(data_red[i,:]) .- data_red),dims=1)
    end
    dy
end

grad_KL (generic function with 1 method)

In [302]:
dr = copy(data_red);

In [303]:
for t in 1:1000
    dy = grad_KL(data,dr)
    dr = dr - 1*dy
    if t%10 == 0
        println(t," ",cost_KL(cond_gauss(data)[1],cond_t(dr)[1]))
    end
end

10 0.8898280649831637
20 0.8812140840332808
30 0.8734504572896833
40 0.8664554705484617
50 0.8601532159482199
60 0.8544702873703078
70 0.8493349106676045
80 0.844677669968315
90 0.8404329089162555
100 0.8365401142968704
110 0.8329449026091166
120 0.829599494635922
130 0.8264627273010492
140 0.8234997227440395
150 0.8206813434225616
160 0.817983540963172
170 0.8153866768079585
180 0.8128748652736788
190 0.8104353682812284
200 0.8080580559887818
210 0.8057349377592828
220 0.8034597619287867
230 0.8012276795049028
240 0.7990349652919108
250 0.7968787893622904
260 0.7947570318413388
270 0.7926681343666665
280 0.790610982158227
290 0.7885848112857676
300 0.7865891363896395
310 0.7846236947627299
320 0.7826884033151422
330 0.7807833255069248
340 0.778908645841044
350 0.7770646499564295
360 0.7752517087500108
370 0.7734702652903936
380 0.7717208235690087
390 0.7700039383728533
400 0.7683202057621562
410 0.7666702538017933
420 0.7650547333316777
430 0.7634743086723492
440 0.7619296482505211
45

In [337]:
dr .- repeat(sum(dr,dims=1)/size(dr)[1],size(dr)[1],1)

100×2 Matrix{Float64}:
  0.89899    -1.72432
 -0.1887     -0.175039
 -1.16986    -0.0769383
  0.1186      0.119816
 -0.11212    -0.495481
 -1.40859    -1.23151
  0.0657225  -0.0811027
  0.0364917  -0.0586603
  1.02339    -1.87048
  0.0179117  -0.315377
  0.226072    0.0978493
  0.405802   -0.977637
 -0.783667   -0.128462
  ⋮          
  0.0983338   0.0634371
 -0.148654   -0.521593
 -0.0605545  -0.0199561
  0.915385   -0.92417
 -1.25054     0.850166
  0.013457    0.930403
  2.21889    -1.01548
  1.07697    -1.43864
 -0.0381682   0.0763653
 -0.0435397   0.110315
 -0.231848    0.00158013
 -0.0151336  -0.152494

In [362]:
function gradient_descent(T::Int64,data::Matrix{Float64},data_red::Matrix{Float64},lr::Float64)
    # Start iterating
    dr = copy(data_red)
    for t in 1:T
        # Compute gradient
        dy = grad_KL(data,dr)
        # Update values
        dr = dr - lr * dy
        # Compute loss
        if t % 10 == 0
            println(t," ",cost_KL(cond_gauss(data)[1],cond_t(dr)[1]))
        end
        # Center in zero
        dr = dr - repeat(sum(dr,dims=1)/size(dr)[1],size(dr)[1],1)
    end
    dr
end

gradient_descent (generic function with 3 methods)

In [363]:
function tsne(X::Matrix{Float64},emb_size::Int64,T::Int64;lr::Float64=1.)
    # Create an initial embedding
    Y = randn(size(X)[1],emb_size)
    # Start the iteration
    Y_new = gradient_descent(T,X,Y,lr)
end

tsne (generic function with 1 method)

In [367]:
tsne(data,2,100;lr=2.)

10 8.183569759167924
20 8.073026550683077
30 8.040579133674095
40 8.00891358537357
50 7.974479710151515
60 7.936002698139684
70 7.891903599962814
80 7.841450674903781
90 7.791637614996068
100 7.759366336835661


200×2 Matrix{Float64}:
  0.627148   -0.614059
 -1.10413     0.0657984
 -0.486111    1.13798
 -0.131729   -0.408703
  0.943873   -2.07824
  0.291096    0.72165
 -0.38065     1.85284
  1.31349     0.691577
 -0.173786    1.10805
 -0.786114   -1.83113
  0.571455    0.287831
  1.508       1.93521
  0.10362    -1.11015
  ⋮          
 -0.0468275   2.51032
  0.90184     1.80113
 -0.116323    1.86613
 -1.50972    -1.89269
 -0.0249173   1.81965
 -0.816542   -2.18481
  0.874543    0.676234
 -1.65291     0.927422
 -1.43433    -0.401058
 -0.988598   -1.55601
  1.80441    -1.96405
  0.135701   -0.339527