In [1]:
import scipy
import numpy as np
from sklearn.neighbors import KernelDensity
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.cluster import estimate_bandwidth
from sklearn.cluster import MeanShift, estimate_bandwidth

import pandas as pd

from scipy import stats
from scipy.stats import beta
from math import sin
from random import randint
from IPython.display import clear_output
import matplotlib.pyplot as plt
import itertools as it

import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
init_notebook_mode(connected=True)

import collections

def recursively_default_dict():
        return collections.defaultdict(recursively_default_dict)

from matplotlib.collections import BrokenBarHCollection
import re

from structure_tools.Modules_tools import return_fsts

PCA_color_ref= ['darkseagreen','crimson', 'darkorange', 'darkblue', 'darkcyan',
            'darkgoldenrod', 'darkgray', 'darkgrey', 'darkgreen',
            'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange',
            'darkorchid', 'darkred', 'darksalmon', 'darkseagreen',
            'darkslateblue', 'darkslategray', 'darkslategrey',
            'darkturquoise', 'darkviolet', 'deeppink']

## vcf analysis
Jupyter notebook for the local analysis of genetic data stored in .vcf format.

Perform analysis of structure across data set, followed by a more detailed study of variation across local genomic windows.

### Input

In [2]:
from structure_tools.vcf_geno_tools import simple_read_vcf

vcf_file= 'data_cleanRefs_Gap01_Admx.vcf'

genotype, summary, info_save= simple_read_vcf(vcf_file,row_info= 5,header_info= 9,phased= True)

print('Number of markers: {}'.format(genotype.shape[1]))
print('Number of individuals: {}'.format(genotype.shape[0]))

Number of markers: 40000
Number of individuals: 130


In [3]:
summary.head()


Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT
0,1,37,1,A,T,.,PASS,.,GT:AD:DP
1,1,135,2,A,T,.,PASS,.,GT:AD:DP
2,1,149,3,A,T,.,PASS,.,GT:AD:DP
3,1,252,4,A,T,.,PASS,.,GT:AD:DP
4,1,293,5,A,T,.,PASS,.,GT:AD:DP


### Global variation

Perform PCA across data set.

Perform Mean shift clustering to attempt to extract genetically coherent groups of accessions.

These will later be used for supervised analysis.

In [4]:
from structure_tools.Tutorial_subplots import plot_global_pca

## Perform PCA
n_comp= 3
pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized')

feats= pca.fit_transform(genotype)

In [5]:
## perform MeanShift clustering.
bandwidth = estimate_bandwidth(feats, quantile=0.2)

ms = MeanShift(bandwidth=bandwidth, bin_seeding=False, cluster_all=True, min_bin_freq=45)
ms.fit(feats)
labels1 = ms.labels_
label_select = {y:[x for x in range(len(labels1)) if labels1[x] == y] for y in sorted(list(set(labels1)))}
###

In [6]:
###
plot_global_pca(feats,label_select,PCA_color_ref,title= 'global_pca',height= 500,width= 950)

In [7]:
select_refs= [0,1,2,4]
label_vector= [[max(select_refs)+1,labels1[x]][int(labels1[x] in select_refs)] for x in range(genotype.shape[0])]

Whose= list(range(genotype.shape[0]))


In [8]:
summary.shape

(40000, 9)

In [9]:
SequenceStore= {}

Nwindows= 100
Wsizes= 100
chrom= 1
wst= np.random.randint(0,genotype.shape[1] - Wsizes,size= Nwindows)
wst= np.linspace(0,genotype.shape[1] - Wsizes,Nwindows,dtype= int)
SequenceStore= {
    chrom: {summary.POS[st]: genotype[:,st:(st+Wsizes-1)] for st in wst}
}

Out= {chrom: {summary.POS[st]: summary.POS[st+ Wsizes - 1]for st in wst}}



In [10]:
supervised= True

Bandwidth_split= 30 # grid split for kde 
KDE_comps= 4 # PCA components to retain
clsize= 15 # minimum cluster size to retain during ms clustering
control_sampling= False
control_N= 100
amova= True # whether to calculate amova.

In [11]:
from structure_tools.StructE_tools import findPhiPT, Structure_profiles, Distance_profiles

from structure_tools.AMOVA_func import amova_cofactor, AMOVA_FM42
from structure_tools.mstutorial_tools import Windows_KDE_amova

### Perform Distance and association analysis on the data sets generated
ref_gps= [0,1,2]

refs_lib= {v:g for v,g in label_select.items() if v in ref_gps}
admx_lib= {v:g for v,g in label_select.items() if v not in ref_gps}
admx_lib.update(refs_lib)
import itertools as it

Results, Construct, PC_var= Windows_KDE_amova(SequenceStore,
                                              admx_lib,
                                              refs_lib,
                                              supervised= True,
                                              amova= True,
                                              ncomps= KDE_comps,
                                              clsize= clsize,
                                              Bandwidth_split= Bandwidth_split)

chr 1, where: 1995078, supervised: True, n clusters: 3
old: ; jaccard: 0.10551612751901025; PCA euc: 0.4007341339187106; nHam: 0.20029415642391907


In [12]:
AMOVA_stats= [[[Chr,wind,*Results[Chr][wind]] for wind in Results[Chr].keys()] for Chr in Results.keys()]
AMOVA_stats= np.array([y for y in it.chain(*AMOVA_stats)])

Names= ['updt jaccard','updt euc','updt hamming']

fig_data= [go.Scatter(
    x= AMOVA_stats[:,1],
    y= AMOVA_stats[:,x],
    mode= 'markers',
    name= Names[x - 3]
) for x in range(3,6)]

layout = go.Layout(
    title= 'Stats',
    yaxis=dict(
        title='AMOVA'),
    xaxis=dict(
        title='Windows')
)

fig= go.Figure(data=fig_data, layout=layout)
iplot(fig)

In [13]:
select_refs= [0,1,2,4]
label_vector= [[len(select_refs),labels1[x]][int(labels1[x] in select_refs)] for x in range(genotype.shape[0])]

Whose= list(range(genotype.shape[0]))


In [14]:
Names=['id' + str(x) for x in range(len(Whose))]
Fam= {
    Names[x]:x for x in range(len(Names))
}

Fam.update({
    x:Names[x] for x in range(len(Names))
})

###
Dr_dim= 3

###
focus_subset= False
Geneo= admx_lib
Focus_group= 0

Focus= [Names[x] for x in Geneo[Focus_group]]

###
Dr_var= 'all'
target_var= [0]

##

In [15]:
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans

Method= 'MeanShift'

Cl_store= {
    'MeanShift':{
        'Clusterfunc': MeanShift,
        'cluster_kwargs': {
            'bin_seeding': False,
            'cluster_all': True,
            'min_bin_freq': 15
        }
    },
    'DBscan':{
        'Clusterfunc': DBSCAN,
        'cluster_kwargs': {
            'min_samples': 15
        }
    },
    'Ward':{
        'Clusterfunc': AgglomerativeClustering,
        'cluster_kwargs': {
            'linkage': 'ward',
            'n_clusters': 4
        }
    },
    'Kmeans':{
        'Clusterfunc': KMeans,
        'cluster_kwargs': {
            'random_state': 0,
            'n_clusters': 3
        }
    }
}

In [16]:
from structure_tools.mstutorial_tools import MAC_process

preProc_Clover, Cameo, Coordinates, COMPS, X_se, label_select, Subset, labels1= MAC_process(Construct,
                                                                             Out,
                                                                             Cl_store,
                                                                             refs_lib,
                                                                             Fam,
                                                                             Names= Names,
                                                                             target_var= target_var,
                                                                             Dr_var= Dr_var,
                                                                             focus_subset= focus_subset,
                                                                             Focus= Focus,
                                                                             Dr_dim= Dr_dim,
                                                                             Method= Method)

Clover shape:  (280, 130)
Clover shape:  (280, 130)
focusing Dr on all


In [17]:
from plotly import tools
from structure_tools.mstutorial_tools import KDE_pca

KDE_pca(feats= feats,Cameo= Cameo,label_vector= label_vector,Subset= Subset, 
       Col_vec= PCA_color_ref)

['Global', 'Global', 'cluster 1', 'cluster 1', 'cluster 2', 'cluster 2', 'cluster 3', 'cluster 3', 'cluster 4', 'cluster 4']



plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead



### Reconstruct actual tree. 

Chose nodes to construct the tree with.

In [18]:
cluster_include= [0,1,2]

label_keep= {z:g for z,g in label_select.items() if z in cluster_include}

In [74]:


def MS_get_norm(Sequences,refs_lib,ncomps= 4,clsize= 15,Bandwidth_split= 20,
               pca_qtl= 0.2):
    '''
    Perform PCA + Mean Shift across windows. Extract Meanshift p-value vectors. Perform amova (optional).
    '''

    pca = PCA(n_components=ncomps, whiten=False,svd_solver='randomized').fit(Sequences)
    data = pca.transform(Sequences)

    params = {'bandwidth': np.linspace(np.min(data), np.max(data),Bandwidth_split)}
    grid = GridSearchCV(KernelDensity(algorithm = "ball_tree",breadth_first = False), params,verbose=0,cv= 3, iid= False)

    ######################################
    ####### TEST global Likelihood #######
    ######################################
    Focus_labels = [z for z in it.chain(*refs_lib.values())]

    #### Mean Shift approach
    ## from sklearn.cluster import MeanShift, estimate_bandwidth

    bandwidth = estimate_bandwidth(data, quantile= pca_qtl, n_samples=len(Focus_labels))
    if bandwidth <= 1e-3:
        bandwidth = 0.1

    ms = MeanShift(bandwidth=bandwidth, cluster_all=False, min_bin_freq=clsize)
    ms.fit(data[Focus_labels,:])
    labels = ms.labels_


    Tree = {x:[Focus_labels[y] for y in range(len(labels)) if labels[y] == x] for x in [g for g in list(set(labels)) if g != -1]}
    Keep= [x for x in Tree.keys() if len(Tree[x]) > clsize]

    Tree= {x:Tree[x] for x in Keep}
    Ngps= len(Tree)

    ### Extract MScluster likelihood by sample

    dist_store= {}

    for hill in Tree.keys():

        grid.fit(data[Tree[hill],:])

        # use the best estimator to compute the kernel density estimate
        kde = grid.best_estimator_

        # normalize kde derived log-likelihoods, derive sample p-values
        P_dist = kde.score_samples(data[Tree[hill],:])
        Dist = kde.score_samples(data)
        P_dist= np.nan_to_num(P_dist)
        Dist= np.nan_to_num(Dist)
        if np.std(P_dist) == 0:
            Dist= np.array([int(Dist[x] in P_dist) for x in range(len(Dist))])
        else:
            Dist = scipy.stats.norm(np.mean(P_dist),np.std(P_dist)).cdf(Dist)
            Dist= np.nan_to_num(Dist)
            dist_store[hill]= Dist
    
    return Tree,dist_store,data


def kde_gen_dict(data,label_dict):
    '''
    create dictionary of group kde generators in data space.
    '''
    
    params = {'bandwidth': np.linspace(np.min(data), np.max(data),Bandwidth_split)}
    grid = GridSearchCV(KernelDensity(algorithm = "ball_tree",breadth_first = False), params,verbose=0,cv= 3, iid= False)

    ref_gens= {}

    for hill in label_dict.keys():

        grid.fit(data[label_dict[hill],:])
        # use the best estimator to compute the kernel density estimate
        kde = grid.best_estimator_
        ref_gens[hill]= kde
    
    return ref_gens

def gen_class(samples,ref_generators,lb= 1e-3,out_code= -1):
    '''
    use kde generators in dictionary to score and classify samples.
    '''
    
    ref_keys= list(ref_generators.keys())
    score_dict= {z: g.score_samples(samples) for z,g in ref_generators.items()}
    
    score_array= [score_dict[z] for z in ref_keys]
    score_array= np.array(score_array)
    score_array= np.exp(score_array)
    
    maxl= np.argmax(score_array,axis= 1)
    maxl= np.array(ref_keys)[maxl]
    
    maxs= np.max(score_array,axis= 1)
    maxs= maxs < lb
    
    maxl[maxs]= out_code
    #print(maxl)
    return maxl



def clustClass(ms_local,pca_obj,ref_gens,out_code= -1, 
               return_mean= True,lb= 1e-2):
    '''
    ms_local= distances by cluster.
    '''
    
    mskeys= list(ms_local.keys())

    ## 
    dist_array= [ms_local[g] for g in mskeys]
    dist_array= np.array(dist_array)
    qtl_dist= pca_obj.transform(dist_array)

    ## Classify kde profiles. 
    cluster_class= gen_class(qtl_dist,ref_gens,lb= lb, 
                             out_code= out_code)

    if len(list(set(cluster_class[cluster_class != out_code]))) <= 1:
        return {}

    cluster_found= {z: [x for x in range(len(cluster_class)) if cluster_class[x] == z] for z in list(set(cluster_class)) if z != -1}

    for v,g in cluster_found.items():
        dist_foud= qtl_dist[g]
        if dist_foud.shape[0] > 1:
            dist_foud= np.mean(g,axis= 1)

        g= dist_foud    
    
    return cluster_found



In [153]:
Nwindows= 80
Wsizes= 100
chrom= 1
wst= np.random.randint(0,genotype.shape[1] - Wsizes,size= Nwindows)
wst= np.linspace(0,genotype.shape[1] - Wsizes,Nwindows,dtype= int)
SequenceStore= {
    chrom: {summary.POS[st]: genotype[:,st:(st+Wsizes-1)] for st in wst}
}

Out= {chrom: {summary.POS[st]: summary.POS[st+ Wsizes - 1]for st in wst}}


In [162]:
from IPython.display import clear_output
from sklearn.metrics import pairwise_distances

pca_qtl= 0.2
ncomps= 2
clsize= 15
Bandwidth_split= 20
out_code= -1
metric= 'euclidean'
lb= 1e-3
cl_samp= 50

Geneo= admx_lib
Geneo_order= list(Geneo.keys())
ref_order= list(refs_lib.keys())

Whose= list(range(sum([len(x) for x in Geneo.values()])))
Sup_labels= list(np.repeat(Geneo_order,[len(Geneo[x]) for x in Geneo_order]))

### Define parameters and libraries of analyses.

Results = {x:recursively_default_dict() for x in SequenceStore.keys()}

###
###
clov_pca= PCA(n_components=ncomps, whiten=False,svd_solver='randomized').fit(preProc_Clover)
data_clov= clov_pca.transform(preProc_Clover)


ref_gens= kde_gen_dict(data_clov,label_keep)
dists_dict= {z:{y:[] for y in ref_gens.keys()} for z in ref_gens.keys()}


for CHR in SequenceStore.keys():
    print('going on CHR: '+ str(CHR))
    for c in SequenceStore[CHR].keys():

        ### PCA and MeanShift of information from each window copied from *FM36_Galaxy.py.
        Sequences= [SequenceStore[CHR][c][x] for x in Whose]
        Sequences= np.array(Sequences) 
        Sequences= np.nan_to_num(Sequences)
        
        clust_acc, ms_local,feat_seq= MS_get_norm(Sequences,refs_lib,ncomps= ncomps,clsize= clsize,Bandwidth_split= Bandwidth_split,
               pca_qtl= pca_qtl)
        
        mskeys= list(ms_local.keys())
        
        cluster_found= clustClass(ms_local,clov_pca,ref_gens,out_code= out_code)
        
        if not cluster_found:
            continue
        
        cluster_found= {mskeys[z]:g for z,g in cluster_found.items()}
        cluster_keys= list(cluster_found.keys())
        lclust_gens= kde_gen_dict(feat_seq,clust_acc)
        
        
        lclust_samp= {z:g.sample(cl_samp) for z,g in lclust_gens.items()}
        lclust_means= {z: np.mean(g,axis= 0) for z,g in lclust_samp.items()}
        
        #print([x.shape for x in lclust_samp.values()])
        hills= [lclust_means[z] for z in cluster_keys]
        hills= np.array(hills)
        hill_dists= pairwise_distances(hills,metric= metric)
        
        for idx in range(len(cluster_keys)):
            for idx1 in range(len(cluster_keys)):
                if idx != idx1:
                    cd1= cluster_keys[idx]
                    cd2= cluster_keys[idx1]
                    dists_dict[cd1][cd2].append(hill_dists[idx,idx1])

    

going on CHR: 1


In [163]:
lclust_means.keys()

dict_keys([0, 1, 2])

In [164]:
cluster_keys

[0, 1, 2]

In [165]:
hill_dists

array([[0.        , 7.37801283, 7.52259053],
       [7.37801283, 0.        , 7.31567097],
       [7.52259053, 7.31567097, 0.        ]])

In [167]:
from plotly import subplots

def D1_kdegen(dists_dict,kernel='gaussian', bandwidth=0.05):
    '''
    '''
    gen_dict= {}
    for gp,data in dists_dict.items():
        
        if not data:
            gen_dict[gp]= data
            continue
        data= np.array(data).reshape(-1,1)
        
        kde = KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(data)
        gen_dict[gp]= kde

    return gen_dict



def plot_distances(dists_dict,gp,range_dists,height= 500,width= 900):
    Ncols= 1
    
    keys_get= sorted([v for v,g in dists_dict[gp].items() if len(g)])
    titles= ['cl: {}'.format(g) for g in keys_get]

    fig_subplots = subplots.make_subplots(rows= int(len(titles) / float(Ncols)) + (len(titles) % Ncols > 0), cols=Ncols,
                             subplot_titles=tuple(titles))
    
    for idx in range(len(titles)):
        print(idx)
        ref= keys_get[idx]
        pos1= int(float(idx) / Ncols) + 1
        pos2= idx - (pos1-1)*Ncols + 1

        title= titles[idx]
        
        data= dists_dict[gp][ref]
        data= np.array(data).reshape(-1,1)
        kde = KernelDensity(kernel='gaussian', bandwidth=.5).fit(data)
        scor_dist= kde.score_samples(range_dists)
        scor_dist= np.exp(scor_dist)
        trace1= go.Scatter(
            y= scor_dist,
            x= range_dists.T[0],
            mode= 'markers',
            name= titles[idx]
        )
        
        fig_subplots.append_trace(trace1, pos1, pos2)
        
        fig_subplots['layout']['yaxis' + str(idx + 1)].update(title= 'L')
        fig_subplots['layout']['yaxis' + str(idx + 1)].update(range= [0,max(scor_dist) + max(scor_dist)/10])
        fig_subplots['layout']['xaxis' + str(idx + 1)].update(title= 'pca dist')

    layout = go.Layout(
        title= title,
    )

    fig= go.Figure(data=fig_subplots, layout=layout)
    
    fig['layout'].update(height= height,width= width)
    
    
    iplot(fig)



range_dists= np.linspace(0,12,100)
range_dists= range_dists.reshape(-1,1)
gp= 0

for gp in sorted(dists_dict.keys()):
    plot_distances(dists_dict,gp,range_dists,height= 500,width= 900)

0
1


0
1


0
1


In [168]:
### Compare Full data and inferred true distances:


In [169]:
print('full data set shape: {}'.format(genotype.shape))

nan_n= 1

xnan= np.random.randint(0,genotype.shape[1],size= nan_n)
ynan= np.random.randint(0,genotype.shape[0],size= nan_n)

nan_coords= [ynan,xnan]
nan_coords= np.array(nan_coords).T

print(nan_coords)


full data set shape: (130, 40000)
[[   81 19478]]


In [170]:
nan_idx= 0

nan_obs= nan_coords[nan_idx]
nan_acc= nan_obs[0]
nan_pos= nan_obs[1]

wind_sizes= 100
Nreps= 400
ncomps= 5
dimN= 2
metric= 'euclidean'


In [173]:
local_l= genotype[:,(nan_pos-int(wind_sizes/2)):(nan_pos+int(wind_sizes/2))]
coords= {z:[x for x in range(len(label_vector)) if label_vector[x] == z] for z in list(set(label_vector))}

pca2 = PCA(n_components=ncomps, whiten=False,svd_solver='randomized')
featl= pca2.fit_transform(local_l)

figwl= [go.Scatter(
    x= feat_seq[coords[i],0],
    y= feat_seq[coords[i],1],
    mode= 'markers',
    name= str(i)
) for i in coords.keys()]

figwl.append(go.Scatter(
    mode='markers',
    x=[feat_seq[nan_acc,0]],
    y=[feat_seq[nan_acc,1]],
    marker=dict(
        color='rgba(135, 206, 250, 0)',
        size=25,
        opacity= 1,
        line=dict(
            color='red',
            width=5
        )
    ),
    showlegend=False
))

layout= go.Layout()

Figure_wl= go.Figure(data= figwl, layout= layout)

iplot(Figure_wl)

In [174]:
### Apply to coordinate.

In [175]:
Sequences.shape

(130, 99)

In [176]:
lb= 1e-2

clust_acc, ms_local, feat_seq= MS_get_norm(local_l,refs_lib,ncomps= ncomps,clsize= clsize,Bandwidth_split= Bandwidth_split,
       pca_qtl= pca_qtl)

mskeys= list(ms_local.keys())

cluster_found= clustClass(ms_local,clov_pca,ref_gens,lb= lb,
                          out_code= out_code)


cluster_found= {mskeys[z]:g for z,g in cluster_found.items()}
clustidx_keep= {z:g for z,g in clust_acc.items() if z in cluster_found.keys()}

cluster_keys= list(cluster_found.keys())
lclust_gens= kde_gen_dict(featl,clustidx_keep)

lclust_samp= {z:g.sample(cl_samp) for z,g in lclust_gens.items()}
lclust_means= {z: np.mean(g,axis= 1) for z,g in lclust_samp.items()}

In [185]:
from impute_tools.impute_tools import get_bg_grid


P= 50
dimN= 2
expand= 3

Quanted_set= np.array(featl) * expand

background= get_bg_grid(Quanted_set, P= P, dimN= dimN)

In [207]:

def comb_score(background,lclust_samp= {},dists_gens= {},select_missing= 0,dimN= 2, metric= "euclidean"):
    dist_refs= {}
    dist_refs= {
        z: pairwise_distances(background,g[:,:dimN],metric= metric) for z,g in lclust_samp.items()
    }


    dist_refMeans= {z: np.mean(g,axis= 1) for z,g in dist_refs.items()}
    
    select_gens= {}

    for gp,g in dists_gens.items():
        if gp == select_missing:
            continue

        if g[select_missing]:
            select_gens[len(select_gens)]= g[select_missing]


    ##
    bg_score= {z: g.score_samples(dist_refMeans[z].reshape(-1,1)) for z,g in select_gens.items()}
    bg_scores= np.array(list(bg_score.values()))
    bg_scores= np.exp(bg_scores)
    bg_scof= np.prod(bg_scores,axis= 0)
    ##
    return bg_scof

##
dists_gens= {z:D1_kdegen(g) for z,g in dists_dict.items()}

##

select_missing= 0

bg_scof= comb_score(background,lclust_samp= lclust_samp,dists_gens= dists_gens,
                    select_missing= 0,dimN= dimN, metric= metric)

In [208]:


figwl= [go.Scatter(
    mode='markers',
    x=background[:,0],
    y=background[:,1],
    marker= {
    'color':bg_scof,
    'colorbar': go.scatter.marker.ColorBar(
        title= 'ColorBar'
    ),
    'colorscale':'Viridis',
    'line': {'width': 0},
    'size': 25,
    'symbol': 'circle',
  "opacity": 1
  }
)]

layout= go.Layout()

Figure_wl= go.Figure(data= figwl, layout= layout)

iplot(Figure_wl)

In [210]:
from impute_tools.impute_tools import (
    nBg_MS, nBg_grid,
    gridWalk
)


P= 20
dimN= 2
N_samps= P**dimN
dist_comps= 10
Bandwidth_split = 30
kernel= 'gaussian'



select_missing= 0

BG_func= comb_score
BG_args= {
    'lclust_samp': lclust_samp,
    'dists_gens': dists_gens,
    'select_missing': 0,
    'dimN': dimN, 
    'metric': metric
}


granted, grid_likes= gridWalk(featl,dist_ref,BG_func, BG_args= BG_args, std_gp_use= std_gp_use,
            P= P,
            dimN= dimN,
            N_samps= N_samps,
            dist_comps= dist_comps,
            Bandwidth_split = Bandwidth_split,
            metric= metric,
            kernel= kernel)


NameError: name 'dist_ref' is not defined

In [None]:
from plotly import tools

title= 'coords'
fig_subplots = tools.make_subplots(rows=1, cols=2,subplot_titles=tuple([title]*2))

for trace in figwl:
    fig_subplots.append_trace(trace, 1, 1)
    

trace= go.Scatter(
    x= granted[:,0],
    y= granted[:,1],
    #z= grid_likes,
    mode= 'markers',
    marker= {
        'color':grid_likes,
        'colorbar': go.scatter.marker.ColorBar(
            title= 'ColorBar'
        ),
        'colorscale':'Viridis',
        'line': {'width': 0},
        'size': 5,
        'symbol': 'circle',
      "opacity": 1
      }
)

fig_subplots.append_trace(trace, 1,2)

iplot(fig_subplots)