purpose: translate Rat GWAS into human orthologs (and maybe human to rat once a co-expression network made). After ortholog mapping, define seed gene sets. For ortho mapping- using bestmatch.

# Set-up

In [1]:
import os
import pandas as pd
import statsmodels.stats.multitest
from scipy import stats
import matplotlib.pyplot as plt
import re

In [2]:
os.chdir('/tscc/projects/ps-palmer/brittany/SUD_cross_species/')

# define ortholog dictionary

In [5]:
path='/tscc/projects/ps-palmer/brittany/orthology_ref_tbls/'

In [4]:
ortho=pd.read_csv('/tscc/projects/ps-palmer/brittany/orthology_ref_tbls/ORTHOLOGY-ALLIANCE_COMBINED_2024.tsv',sep='\t',skiprows=15)
#downloaded from https://www.alliancegenome.org/downloads#orthology on 11 June 2024
#filter for rat-human
ortho=ortho[(ortho['Gene1SpeciesName']=='Rattus norvegicus')&(ortho['Gene2SpeciesName']=='Homo sapiens')]
#filter for best match
ortho=ortho[ortho['IsBestScore']=='Yes']

lose 1006 if filter for >2 databases rather than best ortholog

# Read in rat genes (MAGMA)

In [13]:
test='final_cf'

In [14]:
if ((test=='meta')|(test=='mega')):
    rat_magma=pd.read_csv('magma/loco_meta_win10.genes.out',delim_whitespace=True,index_col=None,skiprows=1)
elif test=='gsem':
    rat_magma=pd.read_csv('magma/loco_gsem_7k_win10.genes.out',delim_whitespace=True,index_col=None,skiprows=1)
elif ((test=='final_cf')):
    rat_magma=pd.read_csv('magma/loco_final_cf_7k_win10.genes.out',delim_whitespace=True,index_col=None,skiprows=1)
elif ((test=='final_mega')):
    rat_magma=pd.read_csv('magma/loco_final_mega_7k_win10.genes.out',delim_whitespace=True,index_col=None,skiprows=1)
elif(test=='final_cf_25'):
    rat_magma=pd.read_csv('magma/loco_final_cf_7k_win25.genes.out',delim_whitespace=True,index_col=None,skiprows=1),
elif(test=='bmi_rn6'):
    rat_magma=pd.read_csv('magma/bmi_rn6_win10.genes.out',delim_whitespace=True,index_col=None,skiprows=1)
elif(test=='body_length_rn6'):
    rat_magma=pd.read_csv('magma/body_length_rn6_win10.genes.out',delim_whitespace=True,index_col=None,skiprows=1)

In [None]:
rat_magma

Unnamed: 0,GENE,CHR,START,STOP,NSNPS,NPARAM,N,ZSTAT,P
0,Vom2r1,1,765164,795885,1,1,7679,1.18250,0.118500
1,Vom2r5,1,1151906,1180559,2,1,7679,1.66460,0.047999
2,Vom2r6,1,1204035,1232755,2,1,7679,1.77350,0.038072
3,LOC120098769,1,1954049,1978983,5,1,7679,-0.67169,0.749110
4,Raet1l,1,1969433,1994410,1,1,7679,0.19892,0.421160
...,...,...,...,...,...,...,...,...,...
18966,Ascc3,20,53500137,53805446,347,1,7679,0.82694,0.204130
18967,Sim1,20,53817601,53917219,152,1,7679,0.88070,0.189240
18968,LOC103694460,20,54404724,54428200,56,1,7679,0.85508,0.196250
18969,Cd99,20,54408239,54432882,53,1,7679,0.86448,0.193660


# test cutoffs

## bonferroni

In [16]:
0.05/len(rat_magma)

2.635601707869907e-06

In [18]:
len(rat_magma[rat_magma['P']<(0.05/len(rat_magma))])

10

## FDR

In [19]:
#compare before and after ortholog mapping
rat_magma['FDR_corr']=statsmodels.stats.multitest.fdrcorrection(rat_magma['P'],alpha=0.05,method='indep',is_sorted=False)[1]

In [20]:
len(rat_magma[rat_magma['FDR_corr']<0.05])

383

# merge in human orthologs

In [19]:
magma_out=rat_magma.merge(ortho_best[['Gene1Symbol','Gene2Symbol']], left_on='GENE',right_on='Gene1Symbol',how='left').dropna().reset_index().drop(['index','Gene1Symbol'],axis=1)
magma_out.columns=['GENE', 'CHR', 'START', 'STOP', 'NSNPS', 'NPARAM', 'N', 'ZSTAT', 'P',
       'Q', 'HM_ORTHO']

In [20]:
if test=='meta':
    magma_out.to_csv('magma/seed_genes/loco_win10_annot.tsv',sep='\t', index=False)
elif test=='gsem':
    magma_out.to_csv('magma/seed_genes/loco_gsem_annot.tsv',sep='\t', index=False)
elif ((test=='final_cf')):
    magma_out.to_csv('magma/seed_genes/loco_final_cf_win10_annot.tsv',sep='\t',index=False)
elif ((test=='final_mega')):
    magma_out.to_csv('magma/seed_genes/loco_final_mega_win10_annot.tsv',sep='\t',index=False)
elif(test=='final_cf_25'):
    magma_out.to_csv('magma/seed_genes/loco_final_cf_win25_annot.tsv',sep='\t',index=False)
elif ((test=='bmi_rn6')):
    magma_out.to_csv('magma/seed_genes/bmi_rn6_annot.tsv',sep='\t',index=False)
elif ((test=='body_length_rn6')):
    magma_out.to_csv('magma/seed_genes/body_length_rn6_annot.tsv',sep='\t',index=False)

# externalizing (MAGMA)

In [3]:
set=['munged','orig']
file=set[1]

In [4]:
ext_magma=pd.read_csv('magma/ext_'+file+'.genes.out',delim_whitespace=True,index_col=None,skiprows=0)

In [8]:
gene_loc=pd.read_csv('/tscc/projects/ps-palmer/brittany/magma_v1/NCBI38/NCBI38.gene.loc',sep='\t',header=None)
gene_loc.columns=['ID','CHR','START','STOP','STAND','GENE_NAME']

In [6]:
ext_magma=gene_loc[['ID','GENE_NAME']].merge(ext_magma, left_on='ID',right_on='GENE',how='right').reset_index().drop(['index','GENE'],axis=1)
ext_magma.columns=['ID', 'GENE', 'CHR', 'START', 'STOP', 'NSNPS', 'NPARAM', 'N','ZSTAT', 'P']

In [9]:
ext_magma['Q']=statsmodels.stats.multitest.fdrcorrection(ext_magma['P'],alpha=0.05,method='indep',is_sorted=False)[1]

In [10]:
ext_magma.to_csv('magma/seed_genes/ext_'+file+'_annot.tsv',sep='\t',index=False)