In [None]:
import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt

from ukbb_recessive.regression.regressions import sci_notation, get_plot_data, plot_errorbar_grouped
from ukbb_recessive.data_collection.variants import VariantFeatures


sns.set_style("whitegrid")

import matplotlib

from matplotlib import font_manager
import numpy as np
from matplotlib.cm import get_cmap

import glob

import matplotlib.ticker as ticker


In [None]:
# Add fonts
font_dirs = ['../../../../data/fonts']
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)

for font_file in font_files:
    font_manager.fontManager.addfont(font_file)
    print ("Added:", font_file)

In [None]:
SMALL_SIZE = 5
MEDIUM_SIZE = 6
BIGGER_SIZE = 7

plt.rc('font', size=BIGGER_SIZE, family='Arimo')          # controls default text sizes

plt.rcParams['text.usetex']= False

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [None]:

def configure_axis(ax, ytick_size=MEDIUM_SIZE, xtick_size=MEDIUM_SIZE, xlabel_size=MEDIUM_SIZE, ylabel_size=MEDIUM_SIZE, x_label=None, y_label=None):
    # Turn off grid
    ax.grid(False) 

    # Customize spines
    ax.spines['left'].set_color('black')
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
        
    # Set tick labels size
    ax.tick_params(axis='y', labelsize=ytick_size) 
    ax.tick_params(axis='x', labelsize=xtick_size) 

    # Add tick marks
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')
    ax.tick_params(which='major', width=1.00, length=2.5)
    ax.tick_params(which='minor', width=0.75, length=1.25)

    ax.set_xlabel(x_label, size=xlabel_size)
    ax.set_ylabel(y_label, size=ylabel_size)

# Load data

In [None]:
# read raw table
reader = pd.ExcelFile('../../../../data/tables/table_panel_regressions_on_s_het_panels.xlsx')

all_results_df = pd.read_excel(reader, sheet_name="Raw data", header=[0, 1], skiprows=[2])
all_results_df = all_results_df.drop(all_results_df.columns[0], axis=1)

# prettify p-values
new_columns = [(level0, 'p_value_pretty') for level0 in all_results_df.columns.get_level_values(level=0).unique()]
all_results_df[new_columns] = all_results_df.loc[:, (slice(None), 'p_value')].applymap(sci_notation)

# leave s_het effects only
all_results_df = all_results_df[all_results_df[all_results_df.columns[0]].str.contains('s_het')]

all_results_df = all_results_df['Weghorn']

In [None]:
renaming_dict_target = {
    'any_education_including_none': "Any education",
    'is_blond': 'Hair color', 
    'childlessness': 'Childlessness'
}

renaming_dict_panel = {
    's_het_recessive_Blindness': 'Blindness',
	's_het_recessive_Cilia_Kidney': 'Cilia + Kidney',
	's_het_recessive_Deafness': 'Deafness',
	's_het_recessive_Derm': 'Dermatologic',
	's_het_recessive_Endocrine': 'Endocrine',
	's_het_recessive_Hematologic': 'Hematologic',
	's_het_recessive_ID_total': 'ID',
	's_het_recessive_Immune_system': 'Immune system',
	's_het_recessive_Metabolic': 'Metabolic',
	's_het_recessive_Metabolic_ID': 'Metabolic-ID',
	's_het_recessive_Neuromuscular': 'Neuromuscular',
	's_het_recessive_Overlaps': 'Multi-system',
	's_het_recessive_Skeletal_Craniofacial': 'Skeletal'
}

import numpy as np

all_results_df = all_results_df[all_results_df['target'].isin(['childlessness', 'any_education_including_none', 'is_blond'])]

# all_results_df = all_results_df[all_results_df['target'].isin(['childlessness'])]

all_results_df[['odds_ratio', 'odds_ratio_lower', 'odds_ratio_upper']] = np.log(all_results_df[['odds_ratio', 'odds_ratio_lower', 'odds_ratio_upper']].values)

all_results_df['odds_ratio_lower'] = all_results_df['odds_ratio'] - all_results_df['odds_ratio_lower']
all_results_df['odds_ratio_upper'] = all_results_df['odds_ratio_upper'] - all_results_df['odds_ratio']

all_results_df = all_results_df[all_results_df['gender'] == 'all']
all_results_df['target'] = all_results_df['target'].apply(lambda x: renaming_dict_target.get(x, x))
all_results_df['feature'] = all_results_df['feature'].apply(lambda x: renaming_dict_panel.get(x, x))

all_results_df = all_results_df.sort_values(by='feature')

In [None]:
cm = 1/2.54  # centimeters in inches
k = 1.
fig = plt.figure(constrained_layout=True, figsize=(8.9*cm*k, 12*cm*k))

ax = fig.subplots(1, 1)

group_order = ['Childlessness', 'Any education', 'Hair color']

legend_kwargs = {
    "bbox_to_anchor": (0, 1, 1., .102), 
    "frameon": False,
    "mode": "expand", 
    "ncol": 3, 
    "labelspacing": 0.1, 
    "markerfirst": False, 
    'fontsize': MEDIUM_SIZE
}

colors = ["#003f5c", "#bc5090", "#ffa600"]

plot_errorbar_grouped(df=all_results_df, axis=ax, y_column='feature', group_column = 'target',  
                      title='', 
                      ymargin=0.01, legend_loc='lower right', group_scale=0.2, vertical_loc=0, 
                      group_order=group_order, colors=colors, legend_kwargs=legend_kwargs)

configure_axis(ax, x_label="Effect size (99% CI)")

plt.savefig("../../../../data/plots/figure_2.pdf", format="pdf", bbox_inches="tight")