In [2]:
import pandas as pd
import numpy as np
import os
import itertools as it

import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *

import collections

def recursively_default_dict():
        return collections.defaultdict(recursively_default_dict)

init_notebook_mode(connected=True)

Classification summary across thresholds used. This notebook analyses classification proportions across specific groups following supervised local kernel density analysis. Analyses should have been run using one or more classification thresholds. Then, for each threshold, classification summary across specific groups must have been performed using the notebook [Summary_classifications](https://nbviewer.jupyter.org/github/SantosJGND/Galaxy_KDE_classifier/blob/master/Summary_functions/Summary_output_examples/Summary_classification/Summary_exploration.ipynb). This notebook combines the output of the script `Summary_analyses.py`, which performs classification for a given for a predetermined set of thresholds and outputs the genome proportion assigned to each class by individual, with passport information, allowing the user to parse this output using the global classifications of each individual.

In this notebook, we analyse average proportion assigned to each class across a range of *p*-value comparison threshold for the groups Japonica, Indica, cAus and cBasmati.

In [9]:
th_stock= [1,1.2,1.4,1.7,2,3,4,5]

groups= ['Japonica','Aus','Indica','cBasmati']

Gpstats= recursively_default_dict()



for thresh in th_stock:
    
    for gp in groups:
        filename= 'Summary_th' + str(thresh) + '/Summary_CSVs/' + gp + '_summary_MEAN.txt'
        
        
        df= pd.read_csv(filename,sep= '\t')
        Colz= df.color
        
        Gpstats[gp][thresh]= df['length_%']
        
        

In [8]:
from plotly import tools

Ncols= 2
titles= groups
yaxis= '% gen'
xaxis= 'threshold'

print(titles)

Color_select= ['red','blue','yellow','purple','green','orange','silver']
Select_indx= [list(Colz).index(x) for x in Color_select]

Color_class= {
    'red': 'Indica',
    'yellow': 'cAus',
    'blue': 'Japonica',
    'purple': 'Indica-Japonica',
    'green': 'Japonica-cAus',
    'orange': 'Indica-cAus',
    'silver': 'Indica-Japonica-cAus'
}


fig_subplots = tools.make_subplots(rows= int(len(titles) / float(Ncols)) + (len(titles) % Ncols > 0), cols=Ncols,
                         subplot_titles=tuple(titles))

#####
for gp in range(len(titles)):
    
    Gp= titles[gp]
    pos1= int(float(gp) / Ncols) + 1
    pos2= gp - (pos1-1)*Ncols + 1

    title= titles[gp]

    for i in range(len(Color_select)):
        trace1= go.Scatter(
                x = th_stock,
                y = [Gpstats[Gp][z][Select_indx[i]] for z in th_stock],
                mode= 'lines+markers',
                name= Color_class[Color_select[i]],
                marker= dict(
                    color= Color_select[i]
                )
            )

        fig_subplots.append_trace(trace1, pos1, pos2)

    if yaxis:
        fig_subplots['layout']['yaxis' + str(gp + 1)].update(title=yaxis)
    if xaxis:
        fig_subplots['layout']['xaxis' + str(gp + 1)].update(title=xaxis)


layout = go.Layout(
    title= title
)

fig= go.Figure(data=fig_subplots, layout=layout)
iplot(fig_subplots)



['Japonica', 'Aus', 'Indica', 'cBasmati']
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]

