In [83]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
# bokeh visulization tool
from bokeh.models import ColumnDataSource, Range1d
from bokeh.models.tools import HoverTool
from bokeh.plotting import figure, show, output_notebook, output_file
from bokeh.palettes import brewer
output_notebook()

In [84]:
# import cleaned file
df = pd.read_csv('./Alzheimer Commercial Trials.csv')

In [85]:
# check data type
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128 entries, 0 to 127
Data columns (total 8 columns):
sponsor_name       128 non-null object
drug_name          128 non-null object
fixed_name         128 non-null object
nct_id             128 non-null object
start_date         128 non-null object
completion_date    126 non-null object
phase              127 non-null object
overall_status     128 non-null object
dtypes: object(8)
memory usage: 8.1+ KB


In [86]:
# we need trnasfer date columns into datetime formate
df['start_date'] = pd.to_datetime(df['start_date'])
df['completion_date'] = pd.to_datetime(df['completion_date'])

In [87]:
# select company
df = df[df['fixed_name'] == 'Pfizer']
df.reset_index(drop=True, inplace=True)

In [136]:
# create a color map
colors = brewer["Set2"][len(df.phase.unique())]
color_map = pd.DataFrame({'phase': df.phase.unique(),
             'color': colors})
color_map

Unnamed: 0,phase,color
0,Phase 1,#66c2a5
1,Phase 2,#fc8d62
2,Phase 3,#8da0cb


In [143]:
# bokeh configure
G=figure(title='Clinical Trials Alzheimer', x_axis_type='datetime', width=800, height=400, y_range=df['drug_name'].tolist(),
        x_range=Range1d(df['start_date'].min(),df['completion_date'].max()+ datetime.timedelta(days=365)), tools='save')
# info need to show in hover
hover = HoverTool(tooltips=[('trial_id', '@nct_id'), 
                            ('phase', '@phase'), 
                            ('start_date', '@start_date{%F}'), 
                            ('completion_date', '@completion_date{%F}')],
          formatters={'start_date': 'datetime', 'completion_date': 'datetime'})
G.add_tools(hover)
# add some space between 
df['ID']=df.index+0.4
df['ID1']=df.index+1
# map color by different phase
df['color'] = [color_map[color_map['phase'] == phase]['color'].values[0] for phase in df['phase']]
CDS=ColumnDataSource(df[['nct_id', 'drug_name', 'phase', 'start_date', 'completion_date', 'color', 'ID', 'ID1']])
G.quad(left='start_date', right='completion_date', bottom='ID', top='ID1',source=CDS, color = 'color', legend_group = 'phase')
G.legend.location = "bottom_left"
show(G)