# Notebook to create a the component defintion dataframe

Creates a Combined dataframe from multiple jsons into a single dataframe

In [14]:
import pandas as pd
from decouple import config
from nfact_id.functions.component_def_functions import (
    rename_dict, 
    get_max_value_from_dict, 
    clean_and_group, 
    most_frequent
    )
from conilab.data_functions.data import load_json
import os
import re

## Get file path

In [4]:
component_df_dir = config("component_def")

## Load JSONS

In [5]:
intra_hemi = load_json(os.path.join(component_df_dir, "jsons_to_build_df" , "intra_hemi.json"))
yeo = load_json(os.path.join(component_df_dir, "jsons_to_build_df" , "yeo_17_dice_scores.json"))
cole = load_json(os.path.join(component_df_dir, "jsons_to_build_df" , "cole_dice_scores.json"))
gemini = load_json(os.path.join(component_df_dir, "jsons_to_build_df", "gemini_defintion.json"))

## Clean Jsons 

This involves 
1) making naming across atlases similar 
2) Getting the maximum dice score from jsons

In [None]:
cole_cleaned = rename_dict({k: clean_and_group(v) for k, v in cole.items()}, {'Cingulo-Opercular': 'Salience'})
yeo_cleaned = rename_dict({k: clean_and_group(v) for k, v in yeo.items()}, {'Control': 'Frontoparietal'})
cole_def = get_max_value_from_dict(cole_cleaned)
yeo_def = get_max_value_from_dict(yeo_cleaned)


## Process Gemini dictionary

THis involes only keeping the network in the json

In [10]:
gemini_def = {key: value['network']  for key, value in gemini.items()}

## Combine into one dataframe

In [11]:
comp_df = pd.DataFrame(
    data={
        "Components": cole_def.keys(),
        "Cole": cole_def.values(),
        "Yeo17": yeo_def.values(),
        "Gemini": gemini_def.values(),

    }
)

In [13]:
comp_df

Unnamed: 0,Components,Cole,Yeo17,Gemini
0,0,Default,Limbic,Limbic
1,1,Auditory,Frontoparietal,Default
2,2,Somatomotor,Somatomotor,Somatomotor
3,3,Somatomotor,Salience,Somatomotor
4,4,Salience,Salience,Language
...,...,...,...,...
95,95,Salience,Salience,Salience
96,96,Somatomotor,Dorsal Attention,Dorsal Attention
97,97,Ventral Multimodal,Dorsal Attention,Visual
98,98,Visual,Visual,Visual


## Get an averaged definition 

In [16]:
comp_df['definition'] = comp_df[['Yeo17', "Cole", "Gemini"]].T.apply(lambda x: most_frequent(x))

## Added in whether component is a Intra Hemispheric component

In [19]:
comp_df['IntraHemi'] = intra_hemi.values()

## Save CSV

In [22]:
comp_df.to_csv(os.path.join(component_df_dir, "Component_defintions.csv"), index=False)