# Pan-genomes meta analysis
This notebook contains a meta-analysis of previously published pan-genomes.  
Gene PAV matrices were downloaded and parsed in a uniform way to allow direct comparison.

In [None]:
import os
import pandas as pd
import plotly.express as px
import plotly.io as pio
from scipy.stats import ttest_ind

In [None]:
pio.templates.default = "plotly_white"
colors = ['grey','purple','darkgreen','lightblue','orange']

## Paths

In [None]:
pg_meta_dir = "/groups/itay_mayrose_nosnap/liorglic/Projects/PGCM/data/PG_meta"

In [None]:
figs_path = "/groups/itay_mayrose_nosnap/liorglic/Projects/PGCM/figs/FINAL"

## Data loading
The input files were downloaded, and in some cases modified, manually.  
The end goal is a simple DF for each PG : | gene | % presence |

In [None]:
def load_soybean_205():
    in_csv = os.path.join(pg_meta_dir, "soybean_205/soybean_PAV.csv")
    df = pd.read_csv(in_csv, index_col=0, usecols=['Gene/Accession','%'], skipfooter=1)
    df.index.name = 'gene'
    df.columns = ['presence_perc']
    return df

def load_B_napus_9():
    in_tsv = os.path.join(pg_meta_dir, "B_napus_9/geneclusters_presence.txt")
    df = pd.read_csv(in_tsv, sep='\t', index_col=0)
    df.index.name = 'gene'
    df['presence_perc'] = df.sum(axis=1)/df.shape[1]*100
    df = df[['presence_perc']]
    return df

def load_sunflower():
    in_tsv = os.path.join(pg_meta_dir, "sunflower/mat_CV3.txt")
    df = pd.read_csv(in_tsv, sep='\t', index_col=0).fillna(0).applymap(lambda x: 1 if x > 0 else 0)
    df.index.name = 'gene'
    df['presence_perc'] = df.sum(axis=1)/df.shape[1]*100
    df = df[['presence_perc']]
    return df

def load_tomato():
    variable_csv = os.path.join(pg_meta_dir, "tomato/tomato_variable.csv")
    core_csv = os.path.join(pg_meta_dir, "tomato/tomato_core.csv")
    var_df = pd.read_csv(variable_csv, index_col=0)
    var_df['presence_perc'] = var_df.sum(axis=1)/var_df.shape[1]*100
    var_df = var_df[['presence_perc']]
    core_df = pd.read_csv(core_csv, index_col=0)
    core_df['presence_perc'] = 100
    df = pd.concat([core_df, var_df])
    df.index.name = 'gene'
    return df

def load_B_distachyon():
    in_tsv = os.path.join(pg_meta_dir, "B_distachyon/pangenome_matrix_published.tsv")
    df = pd.read_csv(in_tsv, sep='\t', index_col=0).applymap(lambda x: 1 if x > 0 else 0)
    df.index.name = 'gene'
    df['presence_perc'] = df.sum(axis=1)/df.shape[1]*100
    df = df[['presence_perc']]
    return df
    
def load_B_napus_53():
    in_csv = os.path.join(pg_meta_dir, "B_napus_53/BnaPan.PAV.table.csv")
    df = pd.read_csv(in_csv, index_col=3)
    df.index.name = 'gene'
    df = df[df.columns[3:]]
    df = df.applymap(lambda x: 1 if x == "PRESENT" else 0)
    df['presence_perc'] = df.sum(axis=1)/df.shape[1]*100
    df = df[['presence_perc']]
    return df

def load_B_oleracea():
    in_vcf = os.path.join(pg_meta_dir, "B_oleracea/BOLEPan.pav.13062016.vcf")
    df = pd.read_csv(in_vcf, sep='\t', index_col=2)
    df.index.name = 'gene'
    df = df[df.columns[8:]]
    df = df.applymap(lambda x: 1 if x == "1/1" else 0)
    df['presence_perc'] = df.sum(axis=1)/df.shape[1]*100
    df = df[['presence_perc']]
    return df

def load_rice_67():
    in_csv = os.path.join(pg_meta_dir, "rice_67/rice_PAV.csv")
    df = pd.read_csv(in_csv)
    df = pd.DataFrame(df[df.columns[-1]])
    df.index.name = 'gene'
    df.columns = ['presence_perc']
    df['presence_perc'] = df['presence_perc']/67*100
    return df

def load_rice_453():
    in_tsv = os.path.join(pg_meta_dir, "rice_453/GenePAV.txt")
    df = pd.read_csv(in_tsv, sep='\t', index_col=0)
    df.index.name = 'gene'
    df['presence_perc'] = df.sum(axis=1)/df.shape[1]*100
    df = df[['presence_perc']]
    return df

def load_pigeon_pea():
    in_csv = os.path.join(pg_meta_dir, "pigeon_pea/pigeon_pea_PAV.csv")
    df = pd.read_csv(in_csv, index_col=0)
    df['presence_perc'] = df["Number of accessions containing the gene"]/89*100
    df.index.name = 'gene'
    df = df[['presence_perc']]
    return df

def load_eggplant():
    in_csv = os.path.join(pg_meta_dir, "eggplant/eggplant_PAV.csv")
    df = pd.read_csv(in_csv, index_col=0)
    df['presence_perc'] = df["N accessions present"]/26*100
    df.index.name = 'gene'
    df = df[['presence_perc']]
    return df
    
def load_apple():
    variable_csv = os.path.join(pg_meta_dir, "apple/apple_variable.csv")
    core_csv = os.path.join(pg_meta_dir, "apple/apple_core.csv")
    var_df = pd.read_csv(variable_csv, index_col=0)
    var_df['presence_perc'] = var_df.sum(axis=1)/var_df.shape[1]*100
    var_df = var_df[['presence_perc']]
    core_df = pd.read_csv(core_csv, index_col=0)
    core_df['presence_perc'] = 100
    df = pd.concat([core_df, var_df])
    df.index.name = 'gene'
    return df

def load_M_truncatula():
    in_tsv = os.path.join(pg_meta_dir, "M_truncatula/21.pan16.syntelog.tsv")
    df = pd.read_csv(in_tsv, sep='\t', index_col=0)
    df = df.applymap(lambda x: 0 if x == '-' else 1)
    df['presence_perc'] = df.sum(axis=1)/df.shape[1]*100
    df = df[['presence_perc']]
    df.index.name = 'gene'
    return df

def load_maize():
    in_csv = os.path.join(pg_meta_dir, "maize/pan_gene_matrix_v3_cyverse.csv")
    df = pd.read_csv(in_csv, index_col=0, usecols=list(range(3,30))).fillna(0)
    df = df.applymap(lambda x: 0 if x == 0 else 1)
    df['presence_perc'] = df.sum(axis=1)/df.shape[1]*100
    df = df[['presence_perc']]
    df.index.name = 'gene'
    return df

def load_cucumber():
    in_csv = os.path.join(pg_meta_dir, "cucumber/331122_1_data_set_6054985_r2g6vg.csv")
    df = pd.read_csv(in_csv, index_col=0)
    df = df.applymap(lambda x: 0 if x in {'-','UA'} else 1)
    df['presence_perc'] = df.sum(axis=1)/df.shape[1]*100
    df = df[['presence_perc']]
    df.index.name = 'gene'
    return df    

In [None]:
pg_meta_df = pd.read_csv(os.path.join(pg_meta_dir, "PG_meta.tsv"), sep='\t')

## Pan-genome stats
Calculate stats regarding pan-genomes compositions.

In [None]:
# dict of PG DFs
pg_dict = {}
for pg in pg_meta_df['Pan-genome']:
    pg_dict[pg] = globals()['load_' + pg].__call__()

In [None]:
def perc_core(pg_df, c=95):
    """c is the min % to count as core"""
    return pg_df.query('presence_perc >= @c').shape[0]/pg_df.shape[0]*100

def mean_presence(pg_df):
    """Mean % of presennce across genes"""
    return pg_df['presence_perc'].mean()

In [None]:
pg_meta_df['mean_presence'] = pg_meta_df.apply(lambda row: mean_presence(pg_dict[row['Pan-genome']]), axis=1)

In [None]:
for c in range(80,105,5):
    pg_meta_df['core'+str(c)] = pg_meta_df.apply(lambda row: perc_core(pg_dict[row['Pan-genome']], c=c), axis=1)

In [None]:
pg_meta_df['Approach'] = pg_meta_df['Approach'].map({'DN': 'De novo', 'MTP': 'Map-to-pan'})
pg_meta_df

## Analyze

### Number of accessions

In [None]:
dn_vals = pg_meta_df.query('Approach == "De novo"')['Accessions']
mean_dn = dn_vals.mean()
mtp_vals = pg_meta_df.query('Approach == "Map-to-pan"')['Accessions']
mean_mtp = mtp_vals.mean()
print("Mean MTP: %s\nMean DN: %s" %(mean_mtp, mean_dn))
print(ttest_ind(a=mtp_vals, b=dn_vals))

In [None]:
fig = px.strip(pg_meta_df, x='Approach', y="Accessions",
               hover_data=pg_meta_df.columns, color='Approach',
              color_discrete_sequence=['lightsalmon','mediumseagreen'])
fig.add_shape(type='line', x0=-0.07, x1=0.07, y0=mean_mtp, y1=mean_mtp, line=dict(dash='dot'))
fig.add_shape(type='line', x0=0.97, x1=1.07, y0=mean_dn, y1=mean_dn, line=dict(dash='dot'))

fig.update_xaxes(mirror=True, showline=True, linecolor='black', title='')
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False)
fig.update_layout(autosize=False, width=400, legend_title_text='')
fig.show()

In [None]:
fig4_a = os.path.join(figs_path, 'fig4a.pdf')
fig.write_image(fig4_a)

### Pan-genome stats

In [None]:
fig = px.scatter(pg_meta_df, x='mean_presence', y='core95', color='Approach',
                 trendline="ols", trendline_scope="overall",
                 trendline_color_override='black',
                color_discrete_sequence=['lightsalmon','mediumseagreen'])

fig.data[2].update(line_width=1)
fig.data[2]['showlegend'] = False

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='Overall occupancy (%)')
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='% core pan-genes', range=(0,100))
fig.update_layout(legend_title_text='')

fig.show()

In [None]:
fig3s_a = os.path.join(figs_path, 'figS3a.pdf')
fig.write_image(fig3s_a)

In [None]:
print("R^2:")
px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared

In [None]:
dn_vals = pg_meta_df.query('Approach == "De novo"')['core95']
mean_dn = dn_vals.mean()
mtp_vals = pg_meta_df.query('Approach == "Map-to-pan"')['core95']
mean_mtp = mtp_vals.mean()

Core pan-genes

In [None]:
print("Mean MTP: %s\nMean DN: %s" %(mean_mtp, mean_dn))
print(ttest_ind(a=mtp_vals, b=dn_vals))

In [None]:
fig = px.strip(pg_meta_df, x='Approach', y="core95",
               hover_data=pg_meta_df.columns, color='Approach',
              color_discrete_sequence=['lightsalmon','mediumseagreen'])
fig.add_shape(type='line', x0=-0.05, x1=0.05, y0=mean_mtp, y1=mean_mtp, line=dict(dash='dot'))
fig.add_shape(type='line', x0=0.95, x1=1.05, y0=mean_dn, y1=mean_dn, line=dict(dash='dot'))

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='')
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='% core pan-genes', range=(0,100))
fig.update_layout(legend_title_text='', autosize=False, width=400)

fig.show()

In [None]:
fig4_b = os.path.join(figs_path, 'fig4b.pdf')
fig.write_image(fig4_b)

In [None]:
fig = px.scatter(pg_meta_df, x='Accessions',y='core95', color='Approach',
                trendline="ols", trendline_scope="overall",
                 trendline_color_override='black',
                color_discrete_sequence=['lightsalmon','mediumseagreen'])

fig.data[2].update(line_width=1)
fig.data[2]['showlegend'] = False

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='Accessions', zeroline=False)
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='% core pan-genes', range=(0,100))
fig.update_layout(legend_title_text='',)

fig.show()

In [None]:
fig3s_b = os.path.join(figs_path, 'figS3b.pdf')
fig.write_image(fig3s_b)

In [None]:
print("R^2:")
px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared

Overall occupancy

In [None]:
dn_vals = pg_meta_df.query('Approach == "De novo"')['mean_presence']
mean_dn = dn_vals.mean()
mtp_vals = pg_meta_df.query('Approach == "Map-to-pan"')['mean_presence']
mean_mtp = mtp_vals.mean()

print("Mean MTP: %s\nMean DN: %s" %(mean_mtp, mean_dn))
print(ttest_ind(a=mtp_vals, b=dn_vals))

In [None]:
fig = px.strip(pg_meta_df, x='Approach', y="mean_presence",
               hover_data=pg_meta_df.columns, color='Approach',
              color_discrete_sequence=['lightsalmon','mediumseagreen'])
fig.add_shape(type='line', x0=-0.05, x1=0.05, y0=mean_mtp, y1=mean_mtp, line=dict(dash='dot'))
fig.add_shape(type='line', x0=0.95, x1=1.05, y0=mean_dn, y1=mean_dn, line=dict(dash='dot'))

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='')
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='Overall occupancy (%)', range=(0,100))
fig.update_layout(legend_title_text='', autosize=False, width=400)

fig.show()

In [None]:
fig4_c = os.path.join(figs_path, 'fig4c.pdf')
fig.write_image(fig4_c)

In [None]:
fig = px.scatter(pg_meta_df, x='Accessions',y='mean_presence', color='Approach',
                trendline="ols", trendline_scope="overall",
                 trendline_color_override='black',
                color_discrete_sequence=['lightsalmon','mediumseagreen'])

fig.data[2].update(line_width=1)
fig.data[2]['showlegend'] = False

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='Accessions', zeroline=False)
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='Overall occupancy (%)', range=(0,100))
fig.update_layout(legend_title_text='',)

fig.show()

In [None]:
fig3s_c = os.path.join(figs_path, 'figS3c.pdf')
fig.write_image(fig3s_c)

In [None]:
print("R^2:")
px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared

Nonreference genes

In [None]:
pg_meta_df['perc_nonref'] = pg_meta_df['Nonreference_pan_genes']/pg_meta_df['Total_pan_genes']*100

In [None]:
dn_vals = pg_meta_df.query('Approach == "De novo"')['perc_nonref']
mean_dn = dn_vals.mean()
mtp_vals = pg_meta_df.query('Approach == "Map-to-pan"')['perc_nonref']
mean_mtp = mtp_vals.mean()

print("Mean MTP: %s\nMean DN: %s" %(mean_mtp, mean_dn))
print(ttest_ind(a=mtp_vals, b=dn_vals))

In [None]:
fig = px.strip(pg_meta_df, x='Approach', y="perc_nonref",
               hover_data=pg_meta_df.columns, color='Approach',
              color_discrete_sequence=['lightsalmon','mediumseagreen'])
fig.add_shape(type='line', x0=-0.05, x1=0.05, y0=mean_mtp, y1=mean_mtp, line=dict(dash='dot'))
fig.add_shape(type='line', x0=0.95, x1=1.05, y0=mean_dn, y1=mean_dn, line=dict(dash='dot'))

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='')
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='% Nonreference genes', range=(0,100))
fig.update_layout(legend_title_text='', autosize=False, width=400)

fig.show()

In [None]:
fig4_d = os.path.join(figs_path, 'fig4d.pdf')
fig.write_image(fig4_d)

## Pairwise comparison
In rice and _B. napus_, we have two pan-genomes available, constructed using different approaches. Here we compare between them.

In [None]:
# B. napus
b_napus_53_in_csv = os.path.join(pg_meta_dir, "B_napus_53/BnaPan.PAV.table.csv")
b_napus_53_pav_df = pd.read_csv(b_napus_53_in_csv, index_col=3)
b_napus_53_pav_df.index.name = 'gene'
b_napus_53_pav_df = b_napus_53_pav_df[b_napus_53_pav_df.columns[3:]]
b_napus_53_pav_df = b_napus_53_pav_df.applymap(lambda x: 1 if x == "PRESENT" else 0)

# Rice
rice_453_in_tsv = os.path.join(pg_meta_dir, "rice_453/GenePAV.txt")
rice_453_pav_df = pd.read_csv(rice_453_in_tsv, sep='\t', index_col=0)
rice_453_pav_df.index.name = 'gene'

In [None]:
def subsample_acc(pav_df, acc, n=100, ref_acc=None):
    """
    Subsample acc accessions from
    the PAV DF and calculate PG stats.
    Repeat n times.
    If the name of the reference accession
    is provided, it will always be included
    and the the % nonref will be calculated
    """
    res = []
    for i in range(n):
        if ref_acc:
            samp_pav_df = pav_df.sample(acc-1, axis=1)
            samp_pav_df[ref_acc] = pav_df[ref_acc]
        else:
            samp_pav_df = pav_df.sample(acc, axis=1)
        samp_pav_df = samp_pav_df.loc[samp_pav_df.sum(axis=1) > 0]
        samp_pav_df['presence_perc'] = samp_pav_df.sum(axis=1)/samp_pav_df.shape[1]*100
        #samp_pav_df = samp_pav_df[['presence_perc']]
        samp_perc_core = perc_core(samp_pav_df[['presence_perc']])
        samp_mean_presence = mean_presence(samp_pav_df[['presence_perc']])
        if ref_acc:
            samp_perc_nonref = samp_pav_df.loc[samp_pav_df[ref_acc] == 0].shape[0] / samp_pav_df.shape[0] * 100
        else:
            samp_perc_nonref = None
        samp_res = pd.Series([samp_perc_core, samp_mean_presence, samp_perc_nonref])    
        res.append(samp_res)
    res = pd.concat(res, axis=1).T
    res.columns = ['core95', 'mean_presence', 'perc_nonref']
    return res

In [None]:
b_napus_53_sub_stats = subsample_acc(b_napus_53_pav_df, 9, 100, ref_acc='Darmor')

In [None]:
b_napus_53_sub_stats.describe()

In [None]:
b_napus_9_core95 = float(pg_meta_df.query('`Pan-genome` == "B_napus_9"')['core95'])

In [None]:
fig = px.histogram(b_napus_53_sub_stats['core95'], color_discrete_sequence=['black'])

fig.add_annotation(x=b_napus_9_core95, y=0,
            text="De novo",
            showarrow=True,
            arrowhead=2,
                  arrowcolor='red', arrowsize=2,
                  ax=0, ay=-40,
                  font={'color': 'red'})

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='% core genes', range=(0,100))
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False)
fig.update_layout(showlegend=False)

fig.show()

In [None]:
fig4s_a = os.path.join(figs_path, 'figS4a.pdf')
fig.write_image(fig4s_a)

In [None]:
b_napus_9_mean_presence = float(pg_meta_df.query('`Pan-genome` == "B_napus_9"')['mean_presence'])

In [None]:
fig = px.histogram(b_napus_53_sub_stats['mean_presence'], color_discrete_sequence=['black'])

fig.add_annotation(x=b_napus_9_mean_presence, y=0,
            text="De novo",
            showarrow=True,
            arrowhead=2,
                  arrowcolor='red', arrowsize=2,
                  ax=0, ay=-40,
                  font={'color': 'red'})

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='Overall occupancy (%)', range=(0,100))
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False)
fig.update_layout(showlegend=False)

fig.show()

In [None]:
fig4s_b = os.path.join(figs_path, 'figS4b.pdf')
fig.write_image(fig4s_b)

In [None]:
b_napus_9_perc_nonref = float(pg_meta_df.query('`Pan-genome` == "B_napus_9"')['perc_nonref'])

In [None]:
fig = px.histogram(b_napus_53_sub_stats['perc_nonref'], color_discrete_sequence=['black'])

fig.add_annotation(x=b_napus_9_perc_nonref, y=0,
            text="De novo",
            showarrow=True,
            arrowhead=2,
                  arrowcolor='red', arrowsize=2,
                  ax=0, ay=-40,
                  font={'color': 'red'})

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='% Nonreference pan-genes', range=(0,100))
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False)
fig.update_layout(showlegend=False)

fig.show()

In [None]:
fig4s_c = os.path.join(figs_path, 'figS4c.pdf')
fig.write_image(fig4s_c)

In [None]:
# Add ref sample Nipponbare
rice_453_pav_df['Nipponbare'] = rice_453_pav_df.index.str.startswith('Os').astype(int)

In [None]:
rice_453_sub_stats = subsample_acc(rice_453_pav_df, 67, 100, 'Nipponbare')

In [None]:
rice_453_sub_stats.describe()

In [None]:
rice_67_core95 = float(pg_meta_df.query('`Pan-genome` == "rice_67"')['core95'])

In [None]:
fig = px.histogram(rice_453_sub_stats['core95'], color_discrete_sequence=['black'])

fig.add_annotation(x=rice_67_core95, y=0,
            text="De novo",
            showarrow=True,
            arrowhead=2,
                  arrowcolor='red', arrowsize=2,
                  ax=0, ay=-40,
                  font={'color': 'red'})

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='% core genes', range=(0,100))
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False)
fig.update_layout(showlegend=False)

fig.show()

In [None]:
fig4s_d = os.path.join(figs_path, 'figS4d.pdf')
fig.write_image(fig4s_d)

In [None]:
rice_67_mean_presence = float(pg_meta_df.query('`Pan-genome` == "rice_67"')['mean_presence'])

In [None]:
fig = px.histogram(rice_453_sub_stats['mean_presence'], color_discrete_sequence=['black'])

fig.add_annotation(x=rice_67_mean_presence, y=0,
            text="De novo",
            showarrow=True,
            arrowhead=2,
                  arrowcolor='red', arrowsize=2,
                  ax=0, ay=-40,
                  font={'color': 'red'})

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='Overall occupancy (%)', range=(0,100))
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False)
fig.update_layout(showlegend=False)

fig.show()

In [None]:
fig4s_e = os.path.join(figs_path, 'figS4e.pdf')
fig.write_image(fig4s_e)

In [None]:
rice_67_perc_nonref = float(pg_meta_df.query('`Pan-genome` == "rice_67"')['perc_nonref'])

In [None]:
fig = px.histogram(rice_453_sub_stats['perc_nonref'], color_discrete_sequence=['black'])

fig.add_annotation(x=rice_67_perc_nonref, y=0,
            text="De novo",
            showarrow=True,
            arrowhead=2,
                  arrowcolor='red', arrowsize=2,
                  ax=0, ay=-40,
                  font={'color': 'red'})

fig.update_xaxes(mirror=True, showline=True, linecolor='black', showgrid=False, title='% Nonreference genes', range=(0,100))
fig.update_yaxes(mirror=True, showline=True, linecolor='black', showgrid=False)
fig.update_layout(showlegend=False)

fig.show()

In [None]:
fig4s_f = os.path.join(figs_path, 'figS4f.pdf')
fig.write_image(fig4s_f)