In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt

from ukbb_recessive.regression.regressions import sci_notation, plot_errorbar_grouped, plot_errorbar_grouped_transposed

from matplotlib import font_manager
import matplotlib
from matplotlib.cm import get_cmap

In [None]:
sns.set(style='whitegrid', rc={"lines.linewidth": 0.7})


# Add fonts
font_dirs = ['../../../../data/fonts']
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)

for font_file in font_files:
    font_manager.fontManager.addfont(font_file)
    print ("Added:", font_file)

In [None]:
SMALL_SIZE = 5
MEDIUM_SIZE = 6
BIGGER_SIZE = 7

plt.rc('font', size=SMALL_SIZE, family='Arimo')          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=SMALL_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=MEDIUM_SIZE)  # fontsize of the figure title

plt.rcParams['text.usetex']= False

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# Load data

In [None]:
cr_pie = pd.read_csv("../../../../data/tables/CR_pie_data.csv", sep='\t')


cr_pie['rel'] = cr_pie['First cousins'] / (cr_pie['Non-consanguineous'] + 2.1)

cr_pie = cr_pie.sort_values(by='rel', ascending=False)

cr_pie

In [None]:
cr_pie['ID'] = (cr_pie['Panel'] == 'ID').astype(int)

cr_pie_id = cr_pie.drop('Panel', axis=1).groupby('ID').sum().reset_index().sort_values(by='ID', ascending=False)

cr_pie_id

In [None]:
def print_vals(pct, allvals):
    absolute = int(np.round(pct/100.*np.sum(allvals)))
    return f"{absolute:d}"

In [None]:
af_df = pd.read_csv("../../../../data/tables/figure_3b.csv")

af_df.columns = ['Gene set', 'UK Biobank AF', 'Dutch cohort AF']

af_df.head(2)

# Plot

In [None]:
colours = [
    "#176d8f",
    "#49829f",
    "#6d97af",
    "#8fadbf",
    "#afc3cf",
    "#d0d9e0",
    "#f1f1f1",
    "#f1d4d4",
    "#f0b8b8",
    "#ec9c9d",
    "#e67f83",
    "#de6069",
    "#d43d51"][::-1]

In [None]:
import matplotlib.ticker as ticker

cm = 1/2.54  # centimeters in inches
k = 1.
fig = plt.figure(constrained_layout=True, figsize=(12*cm*k, 9*cm*k))

subfigs = fig.subfigures(nrows=2, ncols=1, height_ratios=[1,0.8],  wspace=0.07)

ax = subfigs[0].subplots(1, 3, gridspec_kw={'width_ratios': [1, 1, 0.4]})
size = 0.4
# cmap = plt.colormaps["Set3"]

id_colors = colours[:1] + ['white']*(cr_pie.shape[0]-1)

ax[0].pie(cr_pie['Non-consanguineous'], radius=1.1, colors=id_colors, counterclock=False, startangle=90,
                    wedgeprops=dict(width=size, edgecolor='w'))

patches, texts, _ = ax[0].pie(cr_pie['Non-consanguineous'].values, radius=1, colors=colours, counterclock=False, startangle=90,
                           wedgeprops=dict(width=size, edgecolor='w'), 
                           autopct=lambda x: print_vals(x, cr_pie['Non-consanguineous'].values), pctdistance=0.85)

ax[0].set_title("Non-consanguineous")

ax[1].pie(cr_pie['First cousins'], radius=1.1, colors=id_colors, counterclock=False, startangle=90,
                    wedgeprops=dict(width=size, edgecolor='w'))

ax[1].pie(cr_pie['First cousins'].values, radius=1, colors=colours,  counterclock=False, startangle=90,
       wedgeprops=dict(width=size, edgecolor='w'), autopct=lambda x: print_vals(x, cr_pie['First cousins'].values), pctdistance=0.85)

ax[1].set_title("First cousins")

legend_kwargs = {
    "bbox_to_anchor": (0, 0.2, 1, .102), 
    "frameon": False,
    "mode": "expand", 
    "ncol": 1, 
    "labelspacing": 0.1, 
    "markerfirst": False, 
    # "legend_loc": 'upper left', 
    'fontsize': SMALL_SIZE
}

ax[2].legend(patches, cr_pie['Panel'].values, loc="lower left", **legend_kwargs)
ax[2].axis('off')

ax = subfigs[1].subplots(1, 1)

sns.regplot(data=af_df, x='UK Biobank AF', y='Dutch cohort AF', marker='D',  scatter_kws={"color": colours[1], 's':2}, color=colours[-2])

for row in af_df.values:
    gene_set, ukb_af, dutch_af = tuple(row)
    if 'Blindness' in gene_set:
        dutch_af -= 0.000005
    if 'Cilia' in gene_set:
        dutch_af += 0.00001
        ukb_af +=  0.00001
    if 'Multi' in gene_set:
        dutch_af += 0.00003
        ukb_af +=  0.00001
    if 'Derma' in gene_set:
        dutch_af += 0.000005
    if 'Metabolic-ID' in gene_set:
        dutch_af += 0.000005
    if 'Deafness' in gene_set:
        dutch_af += 0.000025
    if (ukb_af >= 0.00045) and ('Deafness' not in gene_set):
        plt.annotate(gene_set, (ukb_af+0.0001,  dutch_af-0.000005))
    elif ("ID (2.9" in gene_set):
        plt.annotate(gene_set, (ukb_af+0.00006,  dutch_af-0.000005))
    else:
        plt.annotate(gene_set, (ukb_af-0.000005,  dutch_af-0.000005))

plt.xlim([0.0001, 0.0009])

plt.gca().invert_xaxis()
plt.gca().invert_yaxis()

ax.tick_params(axis='both', labelsize=MEDIUM_SIZE) 
ax.grid(False)

# Customize spines
ax.spines['left'].set_color('black')
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Add ticks
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.tick_params(which='major', width=1.00, length=2.5)
ax.tick_params(which='minor', width=0.75, length=1.25)

plt.savefig("../../../../data/plots/figure_4.pdf", format="pdf", bbox_inches="tight")