# Randomization

This tab allows to see the repartition of patients according to the metadata indicated and to control the balance between groups. First, you can select the metadata that will be monitored in this tab (list of selected metadata is saved in the config file). Second, the selected metadata are listed in a table and size of corresponding subgroups are given. An indicator of balance is also provided for each metadata, see below for details.

In [1]:
import os
import sys
sys.path.append("C:\\Users\\Fabien Boux\\Code\\ClinLib")

from functions.config import Config
from clinlib.database import Database

with open('init.txt') as f:
    lines = f.readlines()
config = Config(os.path.join('ini', lines[0]))
config.read()

database = Database(config.get_value('database', section='PATH'), idlength=(int(config.get_value('id_length', section='OTHER')) if config.is_key('id_length') else 3))
database.add_resource({'metadata': os.path.join(config.get_value('database', section='PATH'), config.get_value('metadata', section='PATH'))})

import ipywidgets as widgets
from ipywidgets import interact, interactive
import matplotlib.pyplot as plt
from IPython.display import display

In [2]:
import ipywidgets as widgets

if config.is_key('list_metadata'):
    list_metadata = config.get_value('list_metadata')
else:
    list_metadata = config.extract_config_values('list_metadata')
    config.write()
    
if config.is_key('monitored_metadata'):
    list_selected = config.get_value('monitored_metadata')
else:
    list_selected = list_metadata[:1]

all_metadata = widgets.Select(
    options=list_metadata,
    value=list_metadata[0],
    description='Metadata:',
    disabled=False
)

selected_metadata = widgets.Select(
    options=list_selected,
    value=list_selected[0],
    description='Monitored:',
    disabled=False
)

def on_button_add_clicked(var):
    if all_metadata.value not in list_selected:
        list_selected.append(all_metadata.value)
        list_selected.sort()
        selected_metadata.options = list_selected

def on_button_remove_clicked(var):
    list_selected.remove(selected_metadata.value)
    selected_metadata.options = list_selected

def on_button_save_clicked(var):
    config.set_value(list_selected, 'monitored_metadata', section='METADATA')
    config.write()
    
button_add = widgets.Button(description='Add', command=on_button_add_clicked)
button_remove = widgets.Button(description='Remove', command=on_button_remove_clicked)
button_save = widgets.Button(description='Save', command=on_button_save_clicked)

button_add.on_click(on_button_add_clicked)
button_remove.on_click(on_button_remove_clicked)
button_save.on_click(on_button_save_clicked)

widgets.HBox([all_metadata, widgets.VBox([button_add, button_remove, button_save]), selected_metadata])

HBox(children=(Select(description='Metadata:', options=('Phase', 'Arm', 'Cancer', 'Center', 'Gender', 'Age', '…

## 1. Balance

The next table reports the selected metadata. The indicator of balance is the coefficient of variation (ratio between standard deviation and mean):
- in range 0-10% (*green*): subgroups are considered balanced,
- around 20% (*orange*): risk of unbalancy (< 20% low risk and > 20% high risk),
- and from 30% and above (*red*): subgroups are considered unbalanced.

To go further a p-value to test statistical significance (two-sided tests) is also provided (only if 2 groups) using:
- a Fisher exact test for discrete variables (indicated in table using (F)),
- a Mann Whitney test for continuous variables (indicated in table using (MW)).

In [3]:
import pandas as pd
import numpy as np
import matplotlib
from scipy.stats import fisher_exact, mannwhitneyu

metadata = database.get_metadata(which='all')

group = metadata['Group']
group_labels = group.dropna().unique()

metadata = metadata[list_selected]

df = pd.DataFrame([], columns=list(group_labels) + ['P-value'], index=[])
for m in list_selected:
    met = metadata[m].dropna()
    if len(met.unique()) > 10:
        if type(met.unique()[0]) is str:
            labels = met.unique()
            for i in range(1, len(labels)):
                table = [[(labels[i] == met[group == g]).sum() for g in group_labels], 
                        [(labels[i] != met[group == g]).sum() for g in group_labels]]
                if (np.array(table) > 0).all() & (len(group_labels) == 2):
                    df.loc['{}: {}'.format(m, labels[i])] = table[0] + ['{:.2f}(F)'.format(fisher_exact(table, alternative="two-sided")[1])]
                else:
                    df.loc['{}: {}'.format(m, labels[i])] = table[0] + [np.nan]
                        
        else:
            labels = [np.quantile(met, q) for q in [0, 0.25, 0.50, 0.75]]
            for i in range(1, len(labels)):
                table = [[((labels[i - 1] <= met[group == g]) & (met[group == g] < labels[i])).sum() for g in group_labels],
                         [(~((labels[i - 1] <= met[group == g]) & (met[group == g] < labels[i]))).sum() for g in group_labels]]
                if (np.array(table) > 0).all() & (len(group_labels) == 2):
                    df.loc['{}: {:.2f}-{:.2f}'.format(m, labels[i - 1], labels[i])] = table[0] + ['{:.2f}(MW)'.format(mannwhitneyu(met[group == group_labels[0]], met[group == group_labels[1]], alternative='two-sided')[1])]
                else:
                    df.loc['{}: {:.2f}-{:.2f}'.format(m, labels[i - 1], labels[i])] = table[0] + [np.nan]
                    
    else:
        labels = met.unique()
        for i in labels:
            table = [[(i == met[group == g]).sum() for g in group_labels], 
                            [(i != met[group == g]).sum() for g in group_labels]]
            if (np.array(table) > 0).all() & (len(group_labels) == 2):
                df.loc['{}: {}'.format(m, str(i))] = [(met[group == g] == i).sum() for g in group_labels] + ['{:.2f}(F)'.format(fisher_exact(table, alternative="two-sided")[1])]
            else:
                df.loc['{}: {}'.format(m, str(i))] = [(met[group == g] == i).sum() for g in group_labels] + [np.nan]


balance = pd.Series([np.nan]*len(df.index), index=df.index)
for i in df.index:
    if (df.loc[i][:-1] > 4).all():
        balance[i] = round(100 * df.loc[i].values[:-1].std() / df.loc[i].values[:-1].mean(), 1)
        
df.insert(loc=len(df.columns), column='Deviation', value=balance)

N = 200
vals = np.ones((N, 4))
vals[:, 0] = np.concatenate((np.linspace(0,1, N//2), np.linspace(1, 1, N//2)), axis=None)
vals[:, 1] = np.concatenate((np.linspace(.9, .5, N//2), np.linspace(.5, 0, N//2)), axis=None)
vals[:, 2] = np.concatenate((np.linspace(0, 0, N//2), np.linspace(0, 0, N//2)), axis=None)
my_cmap = matplotlib.colors.ListedColormap(vals)

def make_pretty(styler, column):
    styler.background_gradient(axis=None, vmin=10, vmax=30, cmap=my_cmap, subset=column)
    styler.format(precision=1)
    return styler

for g in group_labels:
    df[g] = [int(x) for x in df[g]]

df.style.pipe(make_pretty, column='Deviation')

Unnamed: 0,75 mg/kg,P-value,Deviation
Cancer: LAPC,8,,0.0
Cancer: cNSCLC,4,,
Center: Brigham and Women's Hospital,11,,0.0
Center: Dana-Farber Cancer Institute,1,,
Phase: 1,12,,0.0
