In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import seaborn as sb
import matplotlib.pyplot as plt
import math

import os
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42


In [None]:


my_bucket = os.getenv('WORKSPACE_BUCKET')
my_bucket
CDR_version=os.getenv("WORKSPACE_CDR")



In [None]:
#annotate results with descriptions
phecode_info = pd.read_csv(f'{my_bucket}/data/phewas/pheinfo.csv')

In [None]:
phecodes_up = pd.read_csv(f'{my_bucket}/data/phewas/phecode_icd10.csv')
phecodes = phecodes_up[['PheCode','Phenotype','Excl. Phecodes','Excl. Phenotypes']].drop_duplicates()

In [None]:
logit_Phecode_results=pd.merge(phecode_info,logit_Phecode_results)

In [None]:
logit_Phecode_results=pd.read_csv(f'{my_bucket}/data/phewas/logit_phecode_results.csv')

In [None]:
#phewas results with updated phecode descriptions
logit2 = pd.merge(logit_Phecode_results,phecodes_up,left_on='phecode',right_on='PheCode',how='left')

In [None]:
logit2.loc[pd.isna(logit2['description']),'description']=logit2['Phenotype']

In [None]:
logit2.to_csv('logit_results_to_update.csv')

In [None]:
#updated colors and groups for previously undescribed phecodes
logit_Phecode_results=pd.read_csv(f'{my_bucket}/data/phewas/phecode_results_full_v2_2.csv')

In [None]:
def Manhattan_Plot_Plus(results, group = "all"):
        """
        Method for plotting Manhattan Plot
        ======================================================================================================
        group: list of groups to display (e.g. neoplasms)
        """
        PheWAS_results_ehr = results

        PheWAS_results_ehr.loc[PheWAS_results_ehr["color"]=="darkorange1","color"]="orange"
        PheWAS_results_ehr.loc[PheWAS_results_ehr["color"]=="darkseagreen4","color"]="darkgreen"
        PheWAS_results_ehr.loc[PheWAS_results_ehr["color"]=="coral4","color"]="coral"
        PheWAS_results_ehr.loc[PheWAS_results_ehr["color"]=="chartreuse4","color"]="chartreuse"
        PheWAS_results_ehr.loc[PheWAS_results_ehr["color"]=="royalblue4","color"]="royalblue"
        PheWAS_results_ehr.loc[PheWAS_results_ehr["color"]=="gray50","color"]="gray"
        
        # subset to particular group
        
        if group != "all":
            PheWAS_results_ehr = PheWAS_results_ehr[PheWAS_results_ehr["group"]==group]
        
        fig, ax = plt.subplots(figsize=(15,8))
        benf_corr=.05/phecodes.PheCode.unique().shape[0]
        pos_beta=PheWAS_results_ehr[PheWAS_results_ehr["beta_ind"]>=0]
        neg_beta=PheWAS_results_ehr[PheWAS_results_ehr["beta_ind"]<0]

        ax.scatter(pos_beta["code_val"], pos_beta["neg_p_log_10"], c=pos_beta['color'],marker='^')
        ax.scatter(neg_beta["code_val"], neg_beta["neg_p_log_10"], c=neg_beta['color'],marker='v')
        ax.hlines(-np.log10(.05),0,PheWAS_results_ehr["code_val"].max()+1,colors='r',label = "0.05")
        ax.hlines(-np.log10(benf_corr),0,PheWAS_results_ehr["code_val"].max()+1,colors='g',label = "Bonferroni Threshold (0.05/1847)")
        PheWas_ticks=PheWAS_results_ehr[["code_val","group"]].groupby("group",as_index=False).mean()
        
        # reshape the final plot to just fit the phecodes in the subgroup 
        plt.xlim(PheWAS_results_ehr["code_val"].min(),PheWAS_results_ehr["code_val"].max())
        plt.xticks(PheWas_ticks["code_val"],PheWas_ticks["group"],rotation=45,ha="right")
        pos_beta_top=pos_beta[pos_beta["p_value"]<benf_corr].sort_values("neg_p_log_10",ascending=False).iloc[:15,][["code_val","neg_p_log_10","description"]]
        #Drop infs
        #
        pos_beta_top = pos_beta_top[~np.isinf(pos_beta_top["neg_p_log_10"])]
        neg_beta_top=neg_beta[neg_beta["p_value"]<benf_corr].sort_values("neg_p_log_10",ascending=False).iloc[:10,][["code_val","neg_p_log_10","description"]]
        ## drop infs
        neg_beta_top= neg_beta_top[~np.isinf(neg_beta_top["neg_p_log_10"])]
        
        for i,row in pos_beta_top.iterrows():
            ax.annotate(row["description"], (row["code_val"], row["neg_p_log_10"]))
        for i,row in neg_beta_top.iterrows():
            ax.annotate(row["description"], (row["code_val"], row["neg_p_log_10"]))
        # assign top pos and neg to self
        #self.pos_beta_top = pos_beta_top
        #self.neg_beta_top = neg_beta_top
        from matplotlib.lines import Line2D
        # add legend elements 
        legend_elements =  [Line2D([0], [0], color='g', lw=4, label='Bonferroni Correction'),
                            Line2D([0], [0], color='r', lw=4, label='Nominal Significance Level'),
                            Line2D([0], [0], marker='v', label='Protective Effect',
                                  markerfacecolor='b', markersize=15),
                           Line2D([0], [0], marker='^', label='Non-Protective Effect',
                                  markerfacecolor='b', markersize=15),]
        ax.legend(handles=legend_elements, loc='upper right')
        ax.set_ylabel(r'$-\log_{10}$(p-value)')

In [None]:
Manhattan_Plot_Plus(logit_Phecode_results)
plt.savefig('phewas_plot.pdf', bbox_inches='tight',format="pdf")