## Random Forest classification (Figure 4)


In [20]:
%matplotlib inline
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from ateamopt.utils import utility
import man_opt.utils as man_utils
import os
import matplotlib as mpl
from floweaver import *
import feather
import warnings
warnings.filterwarnings('ignore')

### Data paths

In [27]:
data_path = os.path.join(os.getcwd(),os.pardir,os.pardir,'assets','aggregated_data')
broad_subclass_colors_filename = os.path.join(data_path, 'broad_subclass_colors.pkl')
annotation_datapath = os.path.join(data_path, 'anno.feather')
subclass_clf_results_path = os.path.join(data_path, 'Subclass_pred_record.pkl')
broad_subclass_clf_results_path = os.path.join(data_path, 'Broad_subclass_pred_record.pkl')

### Read the data

In [28]:
subclasses = ["Vip", "Sst", "Pvalb", "L2/3 IT", "L4", "L5 PT", "L5 IT"]
broad_subclasses = ["Vip", "Sst", "Pvalb", "Pyr"]

annotation_df = feather.read_dataframe(annotation_datapath)
anno_subclass_colors = annotation_df.loc[:, [
    'subclass_label', 'subclass_color']]
subclass_colors = anno_subclass_colors.drop_duplicates().set_index('subclass_label')[
    'subclass_color'].to_dict()
broad_subclass_colors = utility.load_pickle(broad_subclass_colors_filename)

subclass_clf_results = utility.load_pickle(subclass_clf_results_path)
broad_subclass_clf_results = utility.load_pickle(broad_subclass_clf_results_path)

subclass_color_dict = {
    type_: subclass_colors[type_] for type_ in subclasses}
broad_subclass_color_dict = {
    type_: broad_subclass_colors[type_] for type_ in broad_subclasses}

### Flow Diagram

In [23]:
%%html
<style>
.sankey .node {
    font-size:100%;
}
</style>

In [24]:
# Load results for transcriptomic subclass and Morph + Model Parameters combination
broad_subclass_clf_mp = broad_subclass_clf_results['Morph+Model Parameters', 'Broad_subclass']
broad_subclass_grp = broad_subclass_clf_mp.groupby(list(broad_subclass_clf_mp))
broad_subclass_flow = pd.DataFrame({'value':broad_subclass_grp.size()}).reset_index()
broad_subclass_flow = broad_subclass_flow.rename(columns={'true':'source','predicted':'target'})
broad_subclass_flow

Unnamed: 0,source,target,value
0,Pvalb,Pvalb,10
1,Pvalb,Vip,1
2,Pyr,Pyr,33
3,Sst,Pvalb,5
4,Sst,Sst,1
5,Vip,Sst,3


In [26]:
partition_broad_subclass_true = Partition.Simple('process',broad_subclasses)
partition_broad_subclass_predicted = Partition.Simple('process',broad_subclasses)

nodes = {
    'True_Broad_Subclass': ProcessGroup(broad_subclasses, partition_broad_subclass_true,title='True'),
    'Prdicted_Broad_Subclass': ProcessGroup(broad_subclasses, partition_broad_subclass_predicted,title='Predicted')
}

bundles = [
    Bundle('True_Broad_Subclass', 'Prdicted_Broad_Subclass'),
]
ordering = [
    ['True_Broad_Subclass'],
    ['Prdicted_Broad_Subclass'],
]
flow_by_broad_subclass =  Partition.Simple('source', broad_subclasses)

sdd = SankeyDefinition(nodes, bundles, ordering,flow_partition=flow_by_broad_subclass)
size = dict(width=500, height=280)
weave(sdd, broad_subclass_flow,palette=broad_subclass_color_dict).to_widget(**size).auto_save_svg('figures/broad_subclass_clf_sankey.svg')

SankeyWidget(groups=[{'id': 'True_Broad_Subclass', 'type': 'process', 'title': 'True', 'nodes': ['True_Broad_S…

In [29]:
subclass_clf_mp = subclass_clf_results['Morph+Model Parameters', 'Subclass']
subclass_grp = subclass_clf_mp.groupby(list(subclass_clf_mp))
subclass_flow = pd.DataFrame({'value':subclass_grp.size()}).reset_index()
subclass_flow = subclass_flow.rename(columns={'true':'source','predicted':'target'})
subclass_flow

Unnamed: 0,source,target,value
0,L2/3 IT,L2/3 IT,1
1,L2/3 IT,L5 IT,2
2,L4,L4,12
3,L4,L5 IT,1
4,L5 IT,L5 IT,6
5,L5 IT,L5 PT,1
6,L5 PT,L5 PT,7
7,Pvalb,Pvalb,11
8,Sst,Pvalb,2
9,Sst,Sst,2


In [32]:
partition_subclass_true = Partition.Simple('process',subclasses)
partition_subclass_predicted = Partition.Simple('process',subclasses)

nodes = {
    'True_Subclass': ProcessGroup(subclasses, partition_subclass_true,title='True'),
    'Prdicted_Subclass': ProcessGroup(subclasses, partition_subclass_predicted,title='Predicted')
}

bundles = [
    Bundle('True_Subclass', 'Prdicted_Subclass'),
]
ordering = [
    ['True_Subclass'],
    ['Prdicted_Subclass'],
]
flow_by_subclass =  Partition.Simple('source', subclasses)

sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=flow_by_subclass)
size = dict(width=500, height=350)
weave(sdd, subclass_flow, palette=subclass_color_dict).to_widget(**size).auto_save_svg('figures/subclass_clf_sankey.svg')

SankeyWidget(groups=[{'id': 'True_Subclass', 'type': 'process', 'title': 'True', 'nodes': ['True_Subclass^Vip'…