In [2]:
# --- Interactive plotting for basic dyslexia center data overview --- #
# Import libraries
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import getpass
import ipywidgets as widgets
from IPython.display import display, Markdown, Javascript
from IPython.core.display import display, HTML
import regex as re
import clipboard

sys.path.append('/Users/mkersey/code/REDCap_tools/')
from redcap_tools.import_data import (import_data_redcap, import_redcap_metadata, import_redcap_report)
from IPython.core.display import display, HTML

sys.path.append('/Users/mkersey/code/neuro_predictor/code')
from neuro_predictor.data_curation import get_diagnosis, get_mri, get_batteries

from ucsfneuroviz.importer import import_dataframe, read_csv_as_list
from ucsfneuroviz.interactive_behav_plots import interactive_heatmap, interactive_radar, interactive_line_plot, interactive_individual_line_plot
from ucsfneuroviz.interactive_brain_plots import interactive_brain_zscore_plot, extract_dc_diagnoses, activate_selected_font
from ucsfneuroviz.fc_vars import FC_vars, FC_vars_select

display(HTML('<link rel="stylesheet" href="style.css">'))

In [3]:
# # Pull REDCap date from the token
# token = getpass.getpass("Enter Redcap API token:")
# redcap_labels = import_data_redcap(token, data='label', headers='label')
# redcap_raw = import_data_redcap(token, data='raw', headers='raw')
# # Zip the dataframe column names and labels together
# dict_labels = dict(zip(redcap_raw.columns.values.tolist(), redcap_labels.columns.values.tolist()))
# redcap_metadata = import_redcap_metadata(token)
# del token

# UCSF Dyslexia Center

In [4]:
# # Add columns to the redcap dataframe
# # redcap_labels = pd.read_csv('/Users/mkersey/Library/CloudStorage/Box-Box/math_cognition_team/projects/ucsfneuroviz/data/DyslexiaProject_VISITONE-PHENO_DATA_LABELS_2023-09-11.csv')

# df = redcap_raw.copy()
# df = get_diagnosis(df)
# df = get_mri(df)
# df = get_batteries(df)
# display(df)
# display(redcap_raw)

# # dict_labels
# # dict_labels['diagnosis_overall'] = 'Diagnosis Overall:'

# # display(redcap_labels.columns)

In [5]:
# # Identify columns in df that are not in redcap_raw
# new_cols = df.columns.difference(redcap_raw.columns)
# # Add these columns to redcap_labels
# for col in new_cols:
#     redcap_labels[col] = df[col]
# display(redcap_labels)

In [6]:
class REDCapAnalysis:
    activate_selected_font('EB Garamond', 'EBGaramond-Regular.ttf')
    def __init__(self, diagnosis_columns, additional_columns, battery_breakdown_path):
        self.token_input = widgets.Password(
            value='',
            placeholder='Enter REDCap token',
            description='Token:',
            disabled=False,
            layout=widgets.Layout(width='400px')
        )
        self.submit_token = widgets.Button(
            description='Submit',
            disabled=False,
            # button_style='info',
            tooltip='Submit REDCap token',
            icon='check'
        )
        # Create a progress bar for the token submission
        self.progress_description = widgets.Label(
            value='',
            layout=widgets.Layout(width='100px')  # Adjust as necessary
        )
        self.progress = widgets.IntProgress(
            value=0,
            min=0,
            max=10,
            step=1,
            bar_style='',
            orientation='horizontal'
        )
        # To display them side by side
        self.progress_container = [self.progress_description, self.progress]

        self.diagnosis_dropdown = widgets.Dropdown(
            options=diagnosis_columns,
            description='Diagnosis Type:',
            disabled=False,
        )
        self.checkboxes = []  # Initialize the checkboxes list
        self.additional_checkboxes = {}  # Key: column name, Value: list of checkboxes
        self.checkboxes_initialized = False  # Add this flag
        self.additional_columns = additional_columns

        # Set the event handler for the dropdown widget
        self.diagnosis_dropdown.observe(self.update_checkboxes, names='value')

        # Set the event handler for the submit button
        self.submit_token.on_click(self.get_redcap_data)

        # Initially, call update_checkboxes with the default value
        self.redcap_labels = None  # Initialize before calling update_checkboxes
        # Define the output widget
        self.out_checkbox = widgets.Output()
        self.out_plot = widgets.Output()
        # self.update_checkboxes({'new': diagnosis_columns[0]})

        self.redcap_labels = None
        self.redcap_raw = None
        self.redcap_metadata = None
        self.dict_labels = None

        self.battery_breakdown = pd.read_excel(battery_breakdown_path, sheet_name='Sheet1')
        self.all_checkbox_tasks = self.battery_breakdown[self.battery_breakdown['Checkbox']=='YES']['Redcap Variable Name'].values.tolist()
        self.primary_battery_pheno_tasks = self.battery_breakdown[self.battery_breakdown['Primary Battery (Pediatric Phenotyping)']=='YES']['Redcap Variable Name'].values.tolist()
        self.primary_battery_control_tasks = self.battery_breakdown[self.battery_breakdown['Primary Battery (Pediatric Controls)']=='YES']['Redcap Variable Name'].values.tolist()

    def get_redcap_data(self, button):
        # Pull REDCap data from the token
        try:
            # Initialize progress bar
            self.progress.value = 0
            self.progress.max = 10
            self.progress_description.value = 'Starting...'

            # Step 1: Get token and initialize progress
            token = self.token_input.value
            self.progress.value += 1
            self.progress_description.value = 'Token received...'

            # Step 2: Fetch REDCap label data
            self.redcap_labels = import_data_redcap(token, data='label', headers='label')
            self.progress.value += 3
            self.progress_description.value = 'Label data fetched...'

            # Step 3: Fetch REDCap raw data
            self.redcap_raw = import_data_redcap(token, data='raw', headers='raw')
            self.progress.value += 3
            self.progress_description.value = 'Raw data fetched...'

            # I don't think we need the metadata honestly
            # self.redcap_metadata = import_redcap_metadata(token)
            
            self.progress.value = self.progress.max
            self.progress_description.value = 'All done. Nice!'
            del token

        except Exception as e:
            self.progress.bar_style = 'danger'
            self.progress_description.value = 'Failed'
            
            # If REDCap data cannot be loaded from the token, load in csv files from BOX
            self.redcap_labels = pd.read_csv('/Users/mkersey/Library/CloudStorage/Box-Box/math_cognition_team/projects/ucsfneuroviz/data/DyslexiaProject_VISITONE-PHENO_DATA_LABELS_2023-09-11.csv')
            self.redcap_raw = pd.read_csv('/Users/mkersey/Library/CloudStorage/Box-Box/math_cognition_team/projects/ucsfneuroviz/data/DyslexiaProject_VISITONE-PHENO_DATA_RAW_2023-09-11.csv')

            # HTML display "REDCap API data load failed - Loading data from BOX instead."
            display(HTML(f'<h3 style="color: #052049;">REDCap API data load failed - Loading data from BOX instead.<br></h3>'))

        # Create a dictionary to map column names to labels
        self.dict_labels = dict(zip(self.redcap_raw.columns.values.tolist(), self.redcap_labels.columns.values.tolist()))

        # Add meaningful columns for diagnosis, MRI, and batteries
        df_tmp = self.redcap_raw.copy()
        df_tmp = get_diagnosis(df_tmp)
        df_tmp = get_mri(df_tmp)
        df_tmp = get_batteries(df_tmp)

        # Identify columns in df_tmp that are not in self.redcap_raw
        new_cols = df_tmp.columns.difference(self.redcap_raw.columns)
        
        # Add these columns to self.redcap_labels
        for col in new_cols:
            self.redcap_labels[col] = df_tmp[col]
        # Filter to phenotyping study participants only, and visit one only
        self.redcap_labels = self.redcap_labels[self.redcap_labels[self.dict_labels['project_category']]=='Pediatric Phenotyping']
        self.redcap_labels = self.redcap_labels[self.redcap_labels[self.dict_labels['redcap_event_name']]=='Visit One']

        # Add col for Primary Battery (Pediatric Phenotyping) Completed and Primary Battery (Pediatric Controls) Completed
        cols_for_pheno_tasks = []
        cols_for_control_tasks = []

        # Get the column names for primary_battery_pheno_tasks from dict_labels
        for task in self.primary_battery_pheno_tasks:
            if task in self.dict_labels:  # Check if the key exists in the dictionary
                cols_for_pheno_tasks.append(self.dict_labels[task])

        # Get the column names for primary_battery_control_tasks from dict_labels
        for task in self.primary_battery_control_tasks:
            if task in self.dict_labels:  # Check if the key exists in the dictionary
                cols_for_control_tasks.append(self.dict_labels[task])

        # Add new columns based on the condition
        # For Primary Battery (Pediatric Phenotyping)
        self.redcap_labels['Primary Battery (Pediatric Phenotyping) Completed'] = \
            self.redcap_labels[cols_for_control_tasks].apply(lambda row: 'Yes' if row.notna().all() else 'No', axis=1)

        # For Primary Battery (Pediatric Controls)
        self.redcap_labels['Primary Battery (Pediatric Controls) Completed'] = \
            self.redcap_labels[cols_for_control_tasks].apply(lambda row: 'Yes' if row.notna().all() else 'No', axis=1)

        # Update plot when data is loaded
        self.plot_stats(self.diagnosis_dropdown.value)

        # Final step: Update plot and checkboxes
        self.update_checkboxes({'new': self.diagnosis_dropdown.value})
        self.plot_stats(self.diagnosis_dropdown.value)


    def update_checkboxes(self, change):
        selected_diagnosis_column = change['new'] if change else self.diagnosis_dropdown.value
        self.out_checkbox.clear_output(wait=True)  # Clear existing checkboxes

        all_columns_boxes = []

        if selected_diagnosis_column != "All Children":
            if self.redcap_labels is not None:
                unique_values = self.redcap_labels[selected_diagnosis_column].dropna().unique()
                self.checkboxes = [widgets.Checkbox(value=False, description=str(val), disabled=False) for val in unique_values]
                for checkbox in self.checkboxes:
                    checkbox.observe(self.plot_stats, names='value')
            diagnosis_label = widgets.Label(value="Diagnosis")
            diagnosis_checkboxes = widgets.VBox(self.checkboxes)
            diagnosis_box = widgets.VBox([diagnosis_label, diagnosis_checkboxes])
            all_columns_boxes.append(diagnosis_box)

        for col in self.additional_columns:
            unique_values = self.redcap_labels[col].dropna().unique()
            checkboxes = [widgets.Checkbox(value=False, description=str(val), disabled=False) for val in unique_values]
            for checkbox in checkboxes:
                checkbox.observe(self.plot_stats, names='value')
            self.additional_checkboxes[col] = checkboxes
            col_label = widgets.Label(value=f"{col}")
            col_checkboxes = widgets.VBox(checkboxes)
            col_box = widgets.VBox([col_label, col_checkboxes])
            all_columns_boxes.append(col_box)

        with self.out_checkbox:
            display(widgets.HBox(all_columns_boxes))

    def plot_stats(self, *args):
        with self.out_plot:
            self.out_plot.clear_output(wait=True)

            if self.diagnosis_dropdown.value == "All Children":
                df_filtered = self.redcap_labels.copy()
            else:
                selected_diagnoses = [cb.description for cb in self.checkboxes if cb.value]
                if not selected_diagnoses:
                    return
                df_filtered = self.redcap_labels[self.redcap_labels[self.diagnosis_dropdown.value].isin(selected_diagnoses)]
                df_filtered = df_filtered[df_filtered[self.diagnosis_dropdown.value].notna()]

            for col, checkboxes in self.additional_checkboxes.items():
                selected_values = [cb.description for cb in checkboxes if cb.value]
                if selected_values:
                    df_filtered = df_filtered[df_filtered[col].isin(selected_values)]
                    df_filtered = df_filtered[df_filtered[col].notna()]

            # Check if there are any subjects left
            if df_filtered.shape[0] == 0:
                # If not, display a message and return
                display(HTML(f'<h3 style="color: #052049;">No subjects found with the selected criteria.<br></h3>'))
                return

            fig, ax = plt.subplots(1, 4, figsize=(20, 5))

            # add a suptitle to the figure: Total Participants: {df_filtered.shape[0]}
            fig.suptitle(f'Total Participants Selected: {df_filtered.shape[0]}', fontsize=22)
            # add some padding under the suptitle
            fig.subplots_adjust(top=.8)

            # Plot the counts in each diagnosis group
            if self.diagnosis_dropdown.value == 'All Children':
                # Simply count the number of rows and plot a single bar
                sns.barplot(x=['All Children'], y=[len(df_filtered)], ax=ax[0], zorder=1)
                ax[0].set_title('Total Children', fontsize=20)
                ax[0].set_xlabel('All Children', fontsize=16)
            else:
                # Use sns.countplot to count the occurrences of each unique value in the selected column
                sns.countplot(data=df_filtered, x=self.diagnosis_dropdown.value, ax=ax[0], zorder=1)
                ax[0].set_title('Diagnosis', fontsize=20)
                ax[0].set_xlabel('Diagnosis', fontsize=16)

            # Age (neuropsych)
            df_filtered['age_neuropsych_int'] = df_filtered[self.dict_labels['age_neuropsych']].apply(lambda x: int(x) if not pd.isna(x) else x)
            sns.countplot(data=df_filtered, x='age_neuropsych_int', ax=ax[1], zorder=1)
            ax[1].set_title('Age', fontsize=20)
            ax[1].set_xlabel('Age', fontsize=16)
            ax[1].set_ylabel('Count', fontsize=16)
            ax[1].set_xticklabels([int(float(tick.get_text())) for tick in ax[1].get_xticklabels()])
            # add counts above bars

            # Sex
            # change 1 to Male, 2 to Female, 3 to Other
            # df_filtered.loc[:, 'sex'] = df_filtered['sex'].apply(lambda x: 'Male' if x == 1 else 'Female' if x == 2 else 'Other' if x == 3 else x)
            sns.countplot(data=df_filtered, x=self.dict_labels['sex'], ax=ax[2], order=['Male', 'Female', 'Other'], zorder=1)
            ax[2].set_title('Sex', fontsize=20)
            ax[2].set_xlabel('Sex', fontsize=16)
            ax[2].set_ylabel('Count', fontsize=16)

            # Handedness
            # df_filtered.loc[:, 'handedness'] = df_filtered['handedness'].apply(lambda x: 'Non-right' if x == 1 else 'Right' if x == 2 else 'Unknown' if x == 3 else x)
            sns.countplot(data=df_filtered, x=self.dict_labels['handedness'], ax=ax[3], order=['Right', 'Non-Right', 'Unknown'], zorder=1)
            ax[3].set_title('Handedness', fontsize=20)
            ax[3].set_xlabel('Handedness', fontsize=16)
            ax[3].set_ylabel('Count', fontsize=16)

            # add counts on top of bars in the middle of the bar
            for subplot in ax:
                for p in subplot.patches:
                    subplot.annotate(
                        str(p.get_height()), 
                        (p.get_x() + p.get_width() / 2., p.get_height()), 
                        ha='center', 
                        va='center', 
                        xytext=(0, 8), 
                        textcoords='offset points'
                    )

            # add horizontal lines behind the bars at each y-tick
            for subplot in ax:
                for ytick in subplot.get_yticks():
                    subplot.axhline(ytick, linestyle='-', alpha=0.25, color='grey', zorder=0)

            plt.show()

        # Connect checkboxes to plot_stats function
        for checkbox in self.checkboxes:
            checkbox.observe(self.plot_stats, names='value')

# List of diagnosis columns to choose from
diagnosis_columns = ['All Children', 'Diagnosis Overall', 'Dyslexia and ADHD', 'Dyslexia Phenotype', 'Dyslexia Phenotype Short', 'Dyslexia and Dyscalculia', 'Dyslexia and MLD', 'MLD Subtype Primary', 'MLD Subtype Secondary']
additional_columns = ['MRI Acquired', 'Primary Battery (Pediatric Phenotyping) Completed', 'Primary Battery (Pediatric Controls) Completed']
#'Math Battery Completed'

# Load in battery breakdown spreadsheet
battery_breakdown_path = '/Users/mkersey/Library/CloudStorage/Box-Box/Dyslexia RC Documents/Administering, Testing Stimuli, Scoring/Full_List_of_Batteries_and_Tests.xlsx'

# Create an instance of REDCapAnalysis with the list of diagnosis columns
redcap_analysis = REDCapAnalysis(diagnosis_columns, additional_columns, battery_breakdown_path)
# Link the progress bar to the submit button
redcap_analysis.submit_token.on_click(redcap_analysis.progress)

# Display the widgets
display(widgets.HBox([redcap_analysis.token_input, redcap_analysis.submit_token, redcap_analysis.progress_container[0], redcap_analysis.progress_container[1]]))
display(widgets.HBox([redcap_analysis.diagnosis_dropdown]))
display(redcap_analysis.out_checkbox)  # Add MRI checkboxes here
display(redcap_analysis.out_plot)  # Use 'self.out' as the output widget

HBox(children=(Password(description='Token:', layout=Layout(width='400px'), placeholder='Enter REDCap token'),…

HBox(children=(Dropdown(description='Diagnosis Type:', options=('All Children', 'Diagnosis Overall', 'Dyslexia…

Output()

Output()

HTTP Status: 200
HTTP Status: 200


KeyError: "None of [Index(['wj_lw_per', 'wj_spelling_per', 'wj_calc_per', 'wj_wordattack_per',\n       'wj_spellingsounds_per', 'wj_seg_per', 'wj_rapnaming_per',\n       'wj_sentrep_perc', 'wj_soundblend_per', 'wj_soundawareness_per',\n       'wj_oral_perc', 'wj_nonwordrep_per', 'wj_memwords_perc',\n       'towre_sde_per', 'towre_pde_per', 'rate_percentile',\n       'accuracy_percentile', 'fluency_percentile', 'comp_percentile',\n       'sentrep_per', 'mserep', 'morph_tot', 'ppt25_pic', 'wabpic', 'frogsto',\n       'rowpvt_percentile', 'benton_total', 'beery_percentile',\n       'jolo_percentile', 'mat_r_per', 'sym_search_perc', 'coding_percentile',\n       'digitforward_percentile', 'digitbackward_percentile',\n       'spatialforward_percentile', 'spatialbackward_percentile',\n       'color_ta_per', 'color_tb_per', 'cvlt_trial1_percentile',\n       'cvlt_trial5_percentile', 'cvltc_sdfr_percentile',\n       'cvltc_ldfr_percentile', 'cvlt2_recog_percentile',\n       'rey_delay_percentile', 'naming_per', 'inhib_perc', 'switching_perc',\n       'semanticfluency_percentile', 'phonfluency_percentile',\n       'dkefs_df_filled_perc'],\n      dtype='object')] are in the [columns]"

TypeError: 'IntProgress' object is not callable