# Follow-up

This tab provides an overview of the clinical trial status, ie. number of patients, median follow-up times, total data, missing data, etc.

In [1]:
import os
import sys
sys.path.append("C:\\Users\\Fabien Boux\\Code\\ClinLib")

from functions.config import Config
config = Config()
config.read()

from clinlib.database import Database
database = Database(config.get_value('database', section='PATH'), idlength=3)
database.add_resource({'metadata': os.path.join(config.get_value('database', section='PATH'), config.get_value('metadata', section='PATH'))})

In [None]:
import pandas as pd
import numpy as np

metadata = database.get_metadata(which='all')

group = metadata['Group']
group_labels = group.unique()

df = pd.DataFrame([], columns=list(group_labels) + ['Overall'])

df.loc['Number of patients'] = [(metadata['Group'] == g).sum() for g in group_labels] + [len(metadata['Group'])]
df.loc['Censored'] = [metadata['End'][metadata['Group'] == g].isna().sum() for g in group_labels] + [metadata['End'].isna().sum()]
df.loc['Median follow-up (days)'] = [(metadata['End'][metadata['Group'] == g] - metadata['Start'][metadata['Group'] == g]).median().days for g in group_labels] + [(metadata['End'] - metadata['Start']).median().days]

list_files = [f.split('.')[0] for f in os.listdir(database.folders['data']) if os.path.isfile(os.path.join(database.folders['data'], f))]
for i in metadata.index:
    if metadata.loc[i, 'Patient'] in list_files:
        metadata.loc[i, 'File'] = True
        
        xls = pd.ExcelFile(os.path.join(database.folders['data'], metadata.loc[i, 'Patient'] + '.xlsx'))
        visits = []
        for sheet_name in xls.sheet_names:
            visits = visits + list(xls.parse(sheet_name)['Session'].unique())
        metadata.loc[i, 'Visits'] = len(set(visits))
        metadata.loc[i, 'Missing'] = 0
    else:
        metadata.loc[i, 'File'] = False
        metadata.loc[i, 'Visits'] = 0
        metadata.loc[i, 'Missing'] = 0
        

df.loc['Patient with data'] = [metadata['File'][metadata['Group'] == g].sum() for g in group_labels] + [metadata['File'].sum()]
df.loc['Number of visits'] = [metadata['Visits'][metadata['Group'] == g].sum() for g in group_labels] + [metadata['Visits'].sum()]
df.loc['Missing visits'] = [metadata['Missing'][metadata['Group'] == g].sum() for g in group_labels] + [metadata['Missing'].sum()]

def df_style(val):
    return "font-weight: bold"

df = df.astype(int)
df.style.applymap(df_style, subset='Overall')

The following plot is a *swimmer plot*. This graph allows a fast overview of all data, it combines time to event (patient's treatment, death or end of the follow-up) and imaging (via lesion evolutions if available).
Note that some variables used in this graph can be defined in the ```Configuration``` tab (the follow-up time, list of potential visits, etc).

In [None]:
%matplotlib widget

from functions.graph import swimmer_plot

if config.is_key('followup_time'):
    followup_time = config.get_value('followup_time')
else:
    followup_time = None
if config.is_key('followup_visits'):
    followup_visits = config.get_value('followup_visits')
else:
    followup_visits = None

swimmer_plot(database, followup_time=followup_time, followup_visits=visits);