# Dashboard Understanding


***Data Flow***

--> Select Points $(x, Q^2)$ 

--> add noise to cross section ***(created from concatenation of obs_p and obs_n of corresponding $(x,Q^2)$)***

--> send the crossection data to the Model 

--> generates output (parameters) 

--> Parameters are used to generate PDF UP and PDF DOWN 


***Equation to add Noise***

$(xsec)' = xsec + (\sigma * f * R)$

- uncertainity ($\sigma$)

- noise ($f$)

- Random value ($R$)

***To generate PDFs***

$x$ = an array of range(0,100)

$Q^2$ = constant value

In [22]:
# libraries
import numpy as np #--- pdf,mlutils
import pandas as pd
import plotly.graph_objs as go
import dash
import matplotlib.pyplot as plt

#---mlutils
from tensorflow.keras.models import load_model
#from keras.models import load_model
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
import h5py

#---pdf libs
from scipy.special import gamma
import sys,os

#--- only for jupyter
from ipywidgets import interactive, HBox, VBox

In [3]:
#-- Loading the data
X = np.load("data/X.npy")
Q2 = np.load("data/Q2.npy")
obs_p = np.load("data/obs_p.npy") #-- toy data is of shape (100,101)
obs_n = np.load("data/obs_n.npy") #-- toy data is of shape (100,101)
obs_p.shape, obs_n.shape, X.shape, Q2.shape

((100, 101), (100, 101), (101,), (101,))

In [4]:
data = pd.DataFrame(data = X,
                    index=np.arange(len(X)),
                    columns=['X']
                    )
data['Q2'] = Q2
data['obs_p'] = obs_p[0,:] #--- currently taking only one sample
data['obs_n'] = obs_n[0,:] #--- currently taking only one sample
data['is_selected'] = False
data['co-ord'] = list(zip(X,Q2))
data.head()

Unnamed: 0,X,Q2,obs_p,obs_n,is_selected,co-ord
0,0.012055,1.0,2.627395,0.727704,False,"(0.012055362337673993, 1.0)"
1,0.019467,1.0,2.862549,0.816804,False,"(0.019466967101596316, 1.0)"
2,0.019467,1.260221,2.798435,0.799198,False,"(0.019466967101596316, 1.260220615891982)"
3,0.019467,1.588156,2.739728,0.783056,False,"(0.019466967101596316, 1.5881560007191666)"
4,0.031435,1.0,3.108782,0.920222,False,"(0.031435206800076304, 1.0)"


In [32]:
dataSelected = False

def selectedData(trace,points,selector):
    global dataSelected
    dataSelected = True
    print(points)
    temp_x = []
    temp_q2 = []
    temp_op = []
    temp_on = []
    for point in points['points']:
            #return point
            #--- update the is_selected column
            data.loc[data['co-ord'] == (point['x'],point['y']),'is_selected'] =  True
            temp_op.append(data.loc[data['co-ord'] == (point['x'],point['y']), 'obs_p'])
            temp_on.append(data.loc[data['co-ord'] == (point['x'],point['y']), 'obs_n'])
            temp_x.append(point['x'])
            temp_q2.append(point['y'])
    df = pd.DataFrame(data=temp_x, columns=["X"])
    df['Q2'] = temp_q2
    df['obs_p'] = temp_op
    df['obs_n'] = temp_on
    print(df)

In [38]:
#plotting the data
figure = go.FigureWidget(data=go.Scattergl(x=data['X'],y=data['Q2'],mode='markers',),
                   layout=go.Layout(xaxis_title="X",yaxis_title="Q2"))
#figure.show()
input_data = figure.data[0]

#--- for jupyter notebook
# Create a table FigureWidget that updates on selection from points in the scatter plot of f
t = go.FigureWidget([go.Table(
    header=dict(values=['X','Q2','obs_p','obs_n'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 5),
    cells=dict(values=[data[col] for col in ['X','Q2','obs_p','obs_n']],
               fill = dict(color='#F5F8FF'),
               align = ['left'] * 5))])


input_data.on_selection(selectedData)


# Put everything together ---- jupyter
VBox((figure,t))

False


VBox(children=(FigureWidget({
    'data': [{'mode': 'markers',
              'type': 'scattergl',
            …

#### Constant Values For Uncertainity, Noise (F-value)


In [7]:
uncertainity_value = 0.05
f_value = 1

#### Adding noise to the crossectional values

In [8]:
def addNoiseSelected(data, n_value, uncertainity_value, validation_column, data_column, num_features):
    predList = []
    #--- formula 
    #--- for the selected_data : xsec + Randomnumber * uncertainity * noise_value
    for i in range(1000):
        predList.append(np.where(data[validation_column]==True, #--- if data is selected
                                 data[data_column] + (float(uncertainity_value) * np.random.rand(num_features,) * float(n_value)), #---- add value (noise * uncertainity * random number)
                                 data[data_column])) #--- else remain the data as it is
        #predList.append(data + (alpha * np.random.rand(num_features,)))
    predList = np.array(predList)
    return predList


In [9]:
def calculate_xsec(p_Data, n_Data):
    return np.concatenate((p_Data, n_Data), axis = 1)

In [10]:
#--- for now we add noise to all the values
data['is_selected'] = True

#--- get the list of data with added noise
obs_p_noised = addNoiseSelected(data, f_value, uncertainity_value, 'is_selected', 'obs_p', len(data['obs_p']))
obs_n_noised = addNoiseSelected(data, f_value, uncertainity_value, 'is_selected', 'obs_n', len(data['obs_n']))

#--- concatenate the lists to get the cross-sectional Data
xsec = calculate_xsec(obs_p_noised, obs_n_noised)

#### Running the model to generate the parameters

In [11]:
#def load_model(model):
#    model = load_model('mldata/%s.h5'%model) 
#    return model

In [14]:
def backwardPredict(fname, model, xsec_noised):
    # load xsec file
    #xsec = calculate_xsec(dataframe)
    ml = load_model('mldata/%s.h5'%model) 

    # make the prediction
    pred = ml.predict(xsec_noised)
    #pred = par_scaler.inverse_transform(pred)
    np.save('data/%s-par.npy'%fname, pred)
    return pred

In [16]:
fname = 'test_backward'
model_select = 'my_model'
nn_pred = backwardPredict(fname, model_select, xsec)

#### Calculate pdfs from generated parmaeters

In [17]:
#--- need to know the significance of each term
Q02=4.0
lam2=0.2**2
pu=np.ones(10)
pd=np.ones(10)
pu[5:]*=0.1
pd[5:]*=0.1

In [18]:
def set_params(par):
    pu[:5]=par[:5]
    pd[:5]=par[5:]
  
def get_s(Q2):
    return np.log(np.log(Q2/lam2)/np.log(Q02/lam2))

def _get_shape(x,p):
    return p[0]*x**p[1]*(1-x)**p[2]*(1+p[3]*x+p[4]*x**2)
    
def get_shape(x,p,s):
    N=p[0] + p[5] * s
    a=p[1] + p[6] * s
    b=p[2] + p[7] * s
    c=p[3] + p[8] * s
    d=p[4] + p[9] * s
    return _get_shape(x,[N,a,b,c,d])

def get_pdf(x,Q2,flav):
    s=get_s(Q2)
    if flav=='u': return get_shape(x,pu,s)
    if flav=='d': return get_shape(x,pd,s)
    
def calculate_pdf(data_par):
    x = np.linspace(0.01,0.99,100)
    Q2=4.0
    u=[]
    d=[]
    for i in range(data_par.shape[0]):
        set_params(data_par[i])
        u.append(get_pdf(x,Q2,'u'))
        d.append(get_pdf(x,Q2,'d'))
    u = np.array(u)
    d = np.array(d)
    print('--> (RUNLOG) - Up data shape : ',u.shape)
    print('--> (RUNLOG) - Down data Shape : ',d.shape)
    return {'u' : u, 'd' : d, 'x-axis' : x}

In [19]:
pdf_dict = calculate_pdf(nn_pred)

--> (RUNLOG) - Up data shape :  (1000, 100)
--> (RUNLOG) - Down data Shape :  (1000, 100)


In [21]:
#--- Plot the figure Ratio Plot
figure_g = go.Figure()
pdf_up_trace = go.Scattergl(
                        x = pdf_dict['x-axis'],
                        y = np.ones(len(pdf_dict['x-axis'])),#pdf_dict['u'].mean(axis=0),
                        name='PDF UP',
                        showlegend=True,
                        error_y=dict(
                                type='data',
                                color='orange',
                                array=pdf_dict['u'].std(axis=0)/pdf_dict['d'].mean(axis=0),
                                #array=pdf_dict['u'].std(axis=0)*5,
                                visible=True
                                )
                        )
#--- Plotting pdf_up
figure_g.add_trace(pdf_up_trace)