In [None]:
import sqlalchemy
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import folium
import os
import squarify

uri = 'mysql://uk-project:rchi2019@localhost/uk-data'
path = 'C:/Users/jbutl20/Desktop/'

month_labels = []

for year in ['17', '18']:
    for month in  ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']:
        month_labels.append("{} {}".format(month, year))

In [None]:
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
 def treemap (df, maxnum, index):
    labels = df.name
    labels = labels[:maxnum]
    labels = df.apply(lambda x: str(x['name']).lower().title() + "\n (" + str('{0:,.0f}'.format(x[index])) + ")\n{}".format(x['ratio']), axis=1)
    sizes = df[index].values.tolist()
    sizes = sizes[:maxnum]
    colors = [plt.cm.Spectral(i/float(len(labels))) for i in range(len(labels))]

    # Draw Plot
    plt.figure(figsize=(15,10), dpi= 80)
    squarify.plot(sizes=sizes, label=labels, color=colors, alpha=.8)

    # Decorate
    plt.axis('off')
    plt.show()

In [None]:
sql = 'select * from total_rx_by_practice_month'
df = pd.read_sql(sql, uri)

sql = 'select org_code, a.name, status_code, b.name as practice_setting, practice_setting_id from practice a left join practice_setting b on a.practice_setting_id=b.id'
practice_df = pd.read_sql(sql, uri, index_col='org_code')

sql ='SELECT practice, ROUND(AVG(gp_count)) as avg_gp_count, ROUND(AVG(num_patients)) AS avg_num_patients FROM patient_count GROUP BY practice'
gp_patient_count_df = pd.read_sql(sql, uri, index_col='practice')

sql = 'select * from patient_count'
gp_patient_count_full_df = pd.read_sql(sql, uri)

In [None]:
tmp_df = gp_patient_count_full_df.pivot_table(index='practice', columns='period', values='num_patients', margins=True, margins_name='Average', aggfunc=np.mean)
patient_count_df = practice_df.join(tmp_df)
tmp_df = gp_patient_count_full_df.pivot_table(index='practice', columns='period', values='gp_count', margins=True, margins_name='Average', aggfunc=np.mean)
gp_count_df = practice_df.join(tmp_df)
patient_count_df.head()

In [None]:
patient_count_df.info()

# Number of Registered Patients by Month

In [None]:
tmp_df = patient_count_df.set_index('name')
top100_patient_count_df = tmp_df.sort_values(by='Average', ascending=False).head(100)
top100_patient_count_heatmap_df = top100_patient_count_df.drop(top100_patient_count_df.columns[0:3], axis=1)
patient_count_df.to_csv(os.path.join(path,r'patients_count_by_months.csv'), index=False)
top100_patient_count_df.head()

# Heatmap: Number of Patients by Month

In [None]:
plt.figure(figsize=(8,25))
colors = [plt.cm.Spectral(i/float(20)) for i in range(20)]
ax = sns.heatmap(top100_patient_count_heatmap_df.drop(columns=['Average']).sort_values(by='name'), robust=True, cmap='Oranges', linecolor='black', linewidth=0.3, xticklabels=month_labels, cbar_kws={'label': 'Number of Registered Patients'})
ax.figure.axes[-1].yaxis.label.set_size(10)

# Number of Providers by Month

In [None]:
tmp_df = gp_count_df.set_index('name')
tmp_df = tmp_df.loc[top100_patient_count_df.index, :]
top100_gp_count_df = tmp_df.sort_values(by='Average', ascending=False).head(100)
top100_gp_count_heatmap_df = top100_gp_count_df.drop(top100_patient_count_df.columns[0:3], axis=1)
gp_count_df.to_csv(os.path.join(path,r'gp_count_by_months.csv'))
top100_gp_count_df.head()

# Heatmap: Number of Providers by Month

In [None]:
plt.figure(figsize=(8,25))
colors = [plt.cm.Spectral(i/float(20)) for i in range(20)]
ax = sns.heatmap(top100_gp_count_heatmap_df.drop(columns=['Average']).sort_values(by='name'), robust=True, cmap='Oranges', linecolor='black', linewidth=0.3, xticklabels=month_labels, cbar_kws={'label': 'Number of GP Providers'})
ax.figure.axes[-1].yaxis.label.set_size(10)

In [None]:
wide_df = df.pivot_table(index='practice', columns='period', values='total_items',  margins=True, margins_name='Total', aggfunc=np.sum)
tmp_df = gp_patient_count_df.join(wide_df, how='inner', on='practice')
final_df = practice_df.join(tmp_df, how='inner')
final_df['num_per_patient'] = final_df[final_df['avg_num_patients'].notnull()].apply(lambda x: round(x['Total'] / x['avg_num_patients']), axis=1)
final_df['num_per_gp'] =  final_df.apply(lambda x: round(x['Total'] / x['avg_gp_count']), axis=1)
final_df['ratio'] = final_df[final_df['avg_num_patients'].notnull()].apply(lambda x: "1:{:d}".format(round(x['avg_num_patients']/x['avg_gp_count'])), axis=1)
final_df.head()

In [None]:
sorted_final_df = final_df.sort_values(by='num_per_gp', ascending=False)
sorted_final_df.head(10)

In [None]:
sorted_final_df.to_csv(os.path.join(path,r'total_rx_by_practice_months.csv'), index=False)

In [None]:
sorted_final_df.info()

# Number of prescription prescribed per patient

Ratio indicates number of providers to patients.

In [None]:
top100_practice = final_df.sort_values(by='num_per_patient', ascending=False).head(100)
treemap(top100_practice,15,'num_per_patient')

# Number of prescription prescribed per GP

In [None]:
top100_practice = final_df.sort_values(by='num_per_gp', ascending=False).head(100)
treemap(top100_practice,30,'num_per_gp')