# AEFIT

This would be the first attempt to run the unsupervised learning VAE network to learn how to characterize a 1D profile with atted noise and missing input.


In [106]:
import numpy as np
import tensorflow as tf

# %matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.colors as colors 

import ipysh
import Hunch_utils  as Htls
import Hunch_lsplot as Hplt

#%aimport Dummy_g1data
#import Dummy_g1data as dummy
%aimport Dummy_dsx3
import Dummy_dsx3 as dummy

%aimport models.base
%aimport models.AEFIT
# ipysh.Bootstrap_support.debug()

## Data and Model
The model and data generator are set:
Dummy data generator generates from a set of 5 kind of curves with a dataset cardinality of 10K samples.

All the shapes are generated from a dictionary array that defines mean sigma and gain of sum of gaussians.
This table is printed from the variable ds.kinds

>NOTE: 
> The actual model is generating random so it is not redoing the very same samples on each epoch.
> To exactly constraint the maximum size of the dataset the buffer can be used

the model uses by default an input of 40 samples that are the (x,y) tuple values of 20 points from the generated shapes.
If the command **buffer()** is used all shaped are stored in a buffer and the generator yields always the same set of curves.


In [107]:
#ds = dummy.Dummy_g1data(counts=10000, size=20)
ds = dummy.Dummy_dsx3(counts=10000, size=20)

#ds.kinds=ds.kinds[:2]
#ds.kinds

ds.buffer()

<Dummy_dsx3.Dummy_dsx3 at 0x7f4125c35b50>

In [108]:
# convert dataset to a mirrror data-data suitable to be fed into VAE
dds = ds.ds_array.map(lambda xy,l: (xy,xy) )

In [109]:
# create a VAE model from AEFIT prototype
m3 = models.AEFIT.AEFIT(feature_dim=40,latent_dim=2, scale=0.5, 
                        #beta=10**(-6),
                        beta=0,
                        geometry=[20,20,10])

AEFIT5 ready:


In [110]:
m3.latent_dim
m3.set_rlv_bypass()
# m3.gamma = 0.01

In [113]:
m3.beta.assign(1e-6)
fit = lambda: m3.fit(dds.skip(3000).batch(20, drop_remainder=True), validation_data=dds.take(3000).batch(100), epochs=39, shuffle=False)
# If interactive notebook
models.base.fn_thread(m3, fit).control_panel()

# If standard notebook
# fit()

Button(button_style='success', description='start', style=ButtonStyle())

Button(button_style='primary', description='watch', style=ButtonStyle())

Button(description='clear log', style=ButtonStyle())

Output()

In [25]:
#m3.beta.assign(10**(-1))
m3.beta.assign(0)



<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=0.0>

Epoch 6/39
Epoch 7/39
Epoch 8/39

In [111]:
# starts a viewer of the latentspace
p = Hplt.LSPlotBokeh()
p.set_model(m3)
p.set_data(ds, counts=1000)
p.plot(notebook_url='http://rat2.rfx.local:8889')

In [112]:
p = Hplt.LSPlotViolin()
p.set_model(m3)
p.set_data(ds, counts=1000)
p.plot(notebook_url='http://rat2.rfx.local:8889')

-0.039723657
-0.08683379


In [None]:
# plot 1000 points of the latent space 
x_ls=np.random.uniform(-2.5,2.5,1000)
y_ls=np.random.uniform(-2.5,2.5,1000)

In [None]:

i=int(np.random.randint(0,1000,1))

pt_i=[x_ls[i],y_ls[i]]
xy = m3.decode(tf.convert_to_tensor([pt_i]), training=False)

x,y = tf.split(xy[0], 2)
x,y = (x.numpy(), y.numpy())

f=plt.figure(figsize=(18, 6))
f1 = f.add_subplot(121)
f2 = f.add_subplot(122)    

f1.set_title('Latent space')
f1.scatter(x_ls,y_ls)
f1.scatter(x_ls[i],y_ls[i],c='#FF0000')

f2.set_title('Decoded curve')
f2.plot(x,y,c='#FF0000')


In [None]:
# this plot the relevance layer 
relevance = m3.generative_net.layers[0]
relevance.weights

## Missing data simulation

Now we want to test the network against particular shapes within the latent main paths but with added noise and simulated missing data. The function simulate_missing_data reduce de number of available input simply duplicating the point that precedes the missing one with the same value.

A further gaussian noise has been also applied.

In [None]:

def simulate_missing_data(m, pt=[0.5,-1.6], noise_var=0.05, arr = []):
    xy = m.decode(tf.convert_to_tensor([pt]), training=False)
    x,y = tf.split(xy[0], 2)
    x,y = (x.numpy(), y.numpy())

    fig = plt.figure('gen_missing_curve',figsize=(18, 6))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)    
    
    ax1.set_xlim(-2.,2.)
    ax1.set_ylim(-2.,2.)
    
    ax1.scatter(pt[0],pt[1],s=80)
    ax2.scatter(x,y,s=40)

    # apply noise
    x += np.random.normal(0,noise_var,len(x))
    y += np.random.normal(0,noise_var,len(y))

    # apply missing data simulation
    for i,v in enumerate(arr,0):
        x[arr[i]]=x[arr[i]+1]
        y[arr[i]]=y[arr[i]+1]
    
    ax2.scatter(x,y,s=80)

    me,va = m.encode(tf.reshape(tf.concat([x,y],0), shape=[1,-1]), training=False)
    print("Guessed Latent point = ",me.numpy())
    gpt = me[0].numpy()
    ax1.scatter(gpt[0],gpt[1])
    
    XY = m.decode(me, training=False)
    X,Y = tf.split(XY[0], 2)
    X,Y = (X.numpy(), Y.numpy())
    # plt.figure('reconstructed')
    ax2.scatter(X,Y,s=40)
    # plt.plot(X,Y)



We start by generating close to the shape {'mean': [0.5], 'sigma': [0.2], 'gain': [0.5]}

This is in the middle of the central cluster

In [None]:
# generate from point: 0.6, -0.7
pt = [-0.400,0.593]
noise_var = 0.1
arr = [3,2,1,5,8,7,6,9,12,11,14,13,18]
simulate_missing_data(m3, pt,noise_var,arr)


Now we want to check if the nework can simulate a point in the middle od two clusters

In [None]:
# generate from point: 0.5, -1.6
pt = [0.666,-0.278]
noise_var = 0.05
arr = [3,2,1,5,8,7,6,9,12,11,14,13,18]
simulate_missing_data(m3, pt,noise_var,arr)



In [None]:
# generate from point: 0.5, -1.6
pt = [-1.283,0.541]
noise_var = 0.0
#arr = [3,2,1,5,8,7,6,9,12,11,14,13,18]
simulate_missing_data(m3, pt,noise_var)



In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Apr  4 15:21:20 2023

@author: Gabriel
"""

import numpy as np

from bokeh.layouts import row, column
from bokeh.plotting import figure, output_file, show
from bokeh.palettes import Category20
from bokeh.models import CustomJS, Slider, HoverTool, WheelZoomTool, ColumnDataSource


# Some distributions to make the plot, later to be remplaced by 
# the distribution of the value of each dimension of the LS

a=np.random.normal(-0.7,0.3,size=10000)
b=np.random.normal(1,0.1,size=10000)
c=np.random.normal(0,0.6,size=10000)
d=np.random.normal(0.5,0.2,size=10000)
e=np.random.normal(2,0.1,size=10000)
f=np.random.normal(0.8,0.8,size=10000)
x=np.random.normal(0,0.1,size=10000)
y=np.random.normal(1.5,0.5,size=10000)
z=np.random.normal(-2,0.9,size=10000)

# list of all the distribution that we iterate on
data_uniform=[a,b,c,d,e,f,x,y,z]
l=len(data_uniform) 

import itertools # itertools handles the cycling 
colors = itertools.cycle(Category20[l]) # create a color iterator 
# not useful, color comes froms the kinds, that give colors to different clusters in EACH distribution plot


output_file("LS.html")

fig=[]
# list to store each figure of each dimension 
 
m=-0.7
sigma=0.3
# Parameters for a gaussian that we will plot on each LS dimension plot, just to compare them
# later can be the current for example

points=1000
# To have all array the same size (needed for the dictionnary ?)



# Tools we want on each plot

#Specify the selection tools to be made available
select_tools = ['pan','box_select','tap', 'reset']

# Format the tooltip
# tooltips = [
#             ('Player', '@name'),
#             ('Three-Pointers Made', '@play3PM'),
#             ('Three-Pointers Attempted', '@play3PA'),
#             ('Three-Point Percentage', '@pct3PM{00.0%}')   
#            ]


gen_list=[]
gen_list=np.full(l, 0)

# Loop to create a plot for all the distribution in fig list

for i in range(l):
    
    subfig=[]
    # subfig contains the plot of the distribution and a slider
    
    # Bokeh figure creation
    s = figure(title=f"dimension {i}", plot_width=200, plot_height=300,
               toolbar_location='left',tools=select_tools,y_axis_location="left")    
    
    # creations of histogram bars thanks to numpy, based on the distribution we want to plot 
    hist, edges = np.histogram(a=data_uniform[i], bins=points)
    
    # Bokeh histogram creation
    s.quad(top=edges[1:], bottom=edges[:-1], left=0, right=hist,color=next(colors))
    s.y_range.flipped = True
    
    # edges[0] : first value of the distribution of the dimension values
    # edges[-1] : last one
    
    # Probability density function (the one to compare with our distribution)
    x0 = np.linspace(edges[0]-1, edges[-1]+1, points)
    y0 = np.exp(-0.5*((x0-m)/sigma)**2) / (np.sqrt(2*np.pi)*sigma)
    s.line(x=20*y0, y=x0, line_width=2, line_color="black") # Bokeh plot of this probability density function
        
    s.add_tools(HoverTool(),WheelZoomTool()) # Tools for the plot
    
    # Points to plot a line representing one selected value of the distribution 
    # This value will be later used to generate a new curve thanks to the decoder
    x_slide=np.linspace(0,50,points)
    y_slide=np.full(points,(edges[0]+edges[-1])/2)
    


    source_slider = ColumnDataSource(data=dict(x=x_slide, y=y_slide))    
    
    # plot of the line representing the selected value of the distribution with the slider
    s.line(x='x',y='y', source=source_slider, line_width=4, line_color="black")


    slider = Slider(start=edges[0]-1, end=edges[-1]+1, value=(edges[0]+edges[-1])/2, step=.01, 
                    title="Selected value ", width=200 )

    callback_slider = CustomJS(args=dict(source=source_slider, slider=slider), 
                        code="""
                        const f = cb_obj.value
                        const x = source.data.x
                        const y = Array(1000).fill(f)
                        source.data = { x, y }
                        """)
          
              
    # iteration=np.full(l,i)                        
    # source_gen = ColumnDataSource(data=dict(x=gen_list,y=iteration))
    
    # callback_gen = CustomJS(args=dict(source=source_gen, slider=slider),
    #                     code="""
    #                     const f = cb_obj.value
    #                     const y = source.data.y
    #                     const x[y[0]] = f
    #                     source.data = {x, y}
    #                     """)

    slider.js_on_change('value', callback_slider)#, callback_gen)
    
    # gen_list[i]= source_slider.data['y'][0]
    

    
    
        
    layout=column(s,slider) # column of the distribution plot + slider (= one figure)
    fig.append(layout) # that we store in the fig list


test = figure()
# test.line(gen_list,gen_list)

show(column(row(fig),test)) # row of all the figures

    

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Apr  4 15:21:20 2023

@author: Gabriel
"""

import numpy as np

from bokeh.layouts import row, column
from bokeh.plotting import figure, output_file, show
from bokeh.palettes import Category20
from bokeh.models import CustomJS, Slider, HoverTool, WheelZoomTool, ColumnDataSource, TextInput
from bokeh.io import output_notebook
output_notebook()


# Some distributions to make the plot, later to be remplaced by 
# the distribution of the value of each dimension of the LS

a=np.random.normal(-0.7,0.3,size=10000)
b=np.random.normal(1,0.1,size=10000)
c=np.random.normal(0,0.6,size=10000)
d=np.random.normal(0.5,0.2,size=10000)
e=np.random.normal(2,0.1,size=10000)
f=np.random.normal(0.8,0.8,size=10000)
g=np.random.normal(-0.7,0.3,size=10000)
h=np.random.normal(-2,0.9,size=10000)
i=np.random.normal(0,0.6,size=10000)
x=np.random.normal(0,0.1,size=10000)
y=np.random.normal(1.5,0.5,size=10000)
z=np.random.normal(-2,0.9,size=10000)

# list of all the distribution that we iterate on
data_uniform=[a,b,c,d,e,f,g,h,i,x,y,z]
l=len(data_uniform) 

import itertools # itertools handles the cycling 
colors = itertools.cycle(Category20[l]) # create a color iterator 
# not useful, color comes froms the kinds, that give colors to different clusters in EACH distribution plot


#output_file("LS.html")

fig=[]
# list to store each figure of each dimension 
 
m=-0.7
sigma=0.3
# Parameters for a gaussian that we will plot on each LS dimension plot, just to compare them
# later can be the current for example

points=1000
# To have all array the same size (needed for the dictionnary ?)



# Tools we want on each plot :

#Specify the selection tools to be made available
select_tools = ['pan','box_select','tap', 'reset']

# Format the tooltip
# tooltips = [
#             ('Player', '@name'),
#             ('Three-Pointers Made', '@play3PM'),
#             ('Three-Pointers Attempted', '@play3PA'),
#             ('Three-Point Percentage', '@pct3PM{00.0%}')   
#            ]



gen_list=[]
gen_list=np.full(l, 0) # important de le mettre à la bonne taille dès le début, car on ajoute pas les éléments, on les modifie juste
gen_x_list=[i for i in range(0,l)]

dict_gen={}
for i in range (l):
    dict_gen[f"{i}"] = [g[i]]



# Loop to create a plot for all the distribution in fig list

document=None
source_gen = ColumnDataSource(data=dict(x=gen_x_list,y=gen_list))

def updatey(index,value):
    gen_list[index]=value
    source_gen.data = dict(x=gen_x_list,y=gen_list)
    print(value)
    #['y'][index]=value
    
    

for i in range(l):    
    subfig=[]
    # subfig contains the plot of the distribution and a slider
    
    # Bokeh figure creation
    s = figure(title=f"dimension {i}", plot_width=200, plot_height=300,
               toolbar_location='left',tools=select_tools,y_axis_location="left")    
    
    # creations of histogram bars thanks to numpy, based on the distribution we want to plot 
    hist, edges = np.histogram(a=data_uniform[i], bins=points)
    
    # Bokeh histogram creation
    s.quad(top=edges[1:], bottom=edges[:-1], left=0, right=hist,color=next(colors))
    s.y_range.flipped = True
    
    # edges[0] : first value of the distribution of the dimension values
    # edges[-1] : last one
    
    # Probability density function (the one to compare with our distribution)
    x0 = np.linspace(edges[0]-1, edges[-1]+1, points)
    y0 = np.exp(-0.5*((x0-m)/sigma)**2) / (np.sqrt(2*np.pi)*sigma)
    s.line(x=20*y0, y=x0, line_width=2, line_color="black") # Bokeh plot of this probability density function
        
    s.add_tools(HoverTool(),WheelZoomTool()) # Tools for the plot
    
    # Points to plot a line representing one selected value of the distribution 
    # This value will be later used to generate a new curve thanks to the decoder
    x_slide=np.linspace(0,50,points)
    y_slide=np.full(points,(edges[0]+edges[-1])/2)
    


    source_slider = ColumnDataSource(data=dict(x=x_slide, y=y_slide))    
    
    # plot of the line representing the selected value of the distribution with the slider
    s.line(x='x',y='y', source=source_slider, line_width=4, line_color="black")


    slider = Slider(start=edges[0]-1, end=edges[-1]+1, value=(edges[0]+edges[-1])/2, step=.01, 
                    title="Selected value ", width=200 )
    
    
    
    #iteration=np.full(l,i) 
    # source_gen = ColumnDataSource(data=dict_gen)
    
    callback_slider = CustomJS(args=dict(source=source_slider, slider=slider),#, source_gen=source_gen),
                        code="""
                        const f = cb_obj.value
                        const x = source.data.x
                        const y = Array(1000).fill(f)
                        
                        //source_gen.data['0'] = [slider.value]
                        //src.data = imdict[sl.value]
                        //source_gen.change.emit()
                        
                        //const z = source_gen.data.x
                        //const c[z[0]] = f
                        source.data = { x, y }
                        """)
                        
                        
    indice=ColumnDataSource(data=dict(x=[i]))       
           
    #callback_gen = CustomJS(args=dict(source_gen=source_gen,indice=indice, slider=slider),
                        # code="""
                        # const f = cb_obj.value
                        # const x = source_gen.data.x
                        # const y = source_gen.data.y
                        
                        # //const i = indice.data.x
                        
                        # //const c = i[0]
                        # y[c]=f
                        
                        # source_gen.data = {x,y}
                        # """)
                        
                        
                        # code="""
                        
                        # const f = cb_obj.value // .toString()
                        
                        # //const i = indice.data.x
                        
                        # const datai = indice.data
                        # const i = datai['x'][0]
                        # //const i_utile = i[0]
                        
                        # const datag = source_gen.data
                        # //const gx = datag['x']
                        # //const gy = datag['y']
                        
                        # datag['y'][i]=f
                        
                        # //source_gen.change.emit()
                        
                        # //const x = source_gen.data.x
                        # //const y = source_gen.data.y
                        # //const c = source_gen.data.y[i]
                        
                        # //const y[i]=f
                        
                        # //source_gen.data = { x, y }
                        
                        
                        # """)
                        
                        
                        
    def update(attr, old, new):
        
        if document is not None:
            document.add_next_tick_callback(lambda: updatey(i,new))
        #s = new
        #source_gen.data['y'][i] = s
        print(document)
        
          
              
    # iteration=np.full(l,i)                        
    # source_gen = ColumnDataSource(data=dict(x=gen_list,y=iteration))
    
    # callback_gen = CustomJS(args=dict(source=source_gen, slider=slider),
    #                     code="""
    #                     const f = cb_obj.value
    #                     const y = source.data.y
    #                     const x[y[0]] = f
    #                     source.data = {x, y}
    #                     """)

    slider.js_on_change('value', callback_slider) #, callback_gen)
    slider.on_change('value',update)
    
    
    
    #gen_list[i]= source_slider.data['y'][0]
    #gen_list[i].js_on_change('value', callback_gen)
    

    
    
    #text_input = TextInput(value="1",title="1").js_on_change('value', callback_slider)    
    layout=column(s,slider) # column of the distribution plot + slider (= one figure)
    fig.append(layout) # that we store in the fig list


test = figure()
test.scatter(x='x',y='y' , source=source_gen)
#[0,1,2,3,4,5,6,7,8,9,10,11]
#show(column(row(fig),test)) # row of all the figures
layout2=column(row(fig),test)

def plot(doc):
    #self._doc=doc
    doc.add_root(layout2)
    document=doc
show(plot, notebook_url='http://rat2.rfx.local:8889', notebook_handle=True)


In [None]:
pip install rise --user

In [None]:
jupyter-nbextension install rise

In [None]:
pip install -U rise --pre