Loading Python libraries

In [1]:
# Use pathlib for file path manipulation
import pathlib

# Install numpy
import numpy as np

# Install Pandas for tabular data
import pandas as pd

# Install plotnine: a ggplot2-compatible Python plotting package
from plotnine import *

# Always show all columns in a Pandas DataFrame
pd.set_option('display.max_columns', None)

Set paths

In [2]:
REL7_PATH = pathlib.Path(pathlib.Path.home(), 'workspace/gp2_tier2_eu_release7_30042024')
!ls -hal {REL7_PATH}

total 61K
-r--r--r-- 1 jupyter jupyter 61K Jul 23  2024 README_release7_30042024.txt
dr-xr-xr-x 1 jupyter jupyter   0 Mar  5 10:22 clinical_data
dr-xr-xr-x 1 jupyter jupyter   0 Mar  5 10:22 imputed_genotypes
dr-xr-xr-x 1 jupyter jupyter   0 Mar  5 10:22 meta_data
dr-xr-xr-x 1 jupyter jupyter   0 Mar  5 10:22 raw_genotypes


Make working directory

In [3]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

In [4]:
# make sure all tools installed
! ls /home/jupyter/tools

LICENSE		       plink2				rvtests
annovar		       plink2_linux_x86_64_latest.zip	toy.map
annovar.latest.tar.gz  plink_linux_x86_64_20190304.zip	toy.ped
plink		       prettify


In [5]:
# give permission

# chmod to make sure you have permission to run the program
! chmod u+x /home/jupyter/tools/plink
! chmod u+x /home/jupyter/tools/plink2
! chmod 777 /home/jupyter/tools/rvtests/executable/rvtest

In [6]:
%%bash
# making working directory
#Loop over all the ancestries
for ancestry in {'AAC','AFR','AJ','AMR','CAS','EAS','EUR','FIN','MDE','SAS','CAH'} ;
do

#Make a folder for each ancestry
mkdir ~/workspace/ws_files/TMEM175/TMEM175_"$ancestry"

done

Create a covariate file with GP2 data

In [4]:
CLINICAL_DATA_PATH = pathlib.Path(REL7_PATH, 'clinical_data/master_key_release7_final_vwb.csv')

In [8]:
# Let's load the master key
key = pd.read_csv(CLINICAL_DATA_PATH, low_memory=False)
print(key.shape)
key.head()

(58446, 26)


Unnamed: 0,GP2ID,GP2sampleID,study,study_arm,study_type,diagnosis,baseline_GP2_phenotype_for_qc,baseline_GP2_phenotype,biological_sex_for_qc,age_at_sample_collection,age_of_onset,age_at_diagnosis,age_at_death,age_at_last_follow_up,race_for_qc,family_history_for_qc,region_for_qc,manifest_id,Genotyping_site,sample_type,GDPR,monogenic,label,pruned,pruned_reason,related
0,APGS_000193,APGS_000193_s1,APGS,PD,Case(/Control),PD,PD,PD,Male,,,,,,Not Reported,Not Reported,AUS,m1,NIH,Saliva,0,0,EUR,0,,0
1,APGS_000194,APGS_000194_s1,APGS,PD,Case(/Control),PD,PD,PD,Female,,,,,,Not Reported,Not Reported,AUS,m1,NIH,Saliva,0,0,EUR,0,,0
2,APGS_000195,APGS_000195_s1,APGS,PD,Case(/Control),PD,PD,PD,Female,,,,,,Not Reported,Not Reported,AUS,m1,NIH,Saliva,0,0,EUR,0,,0
3,APGS_000196,APGS_000196_s1,APGS,PD,Case(/Control),PD,PD,PD,Male,,,,,,Not Reported,Not Reported,AUS,m1,NIH,Saliva,0,0,EUR,0,,0
4,APGS_000197,APGS_000197_s1,APGS,PD,Case(/Control),PD,PD,PD,Female,,,,,,Not Reported,Not Reported,AUS,m1,NIH,Saliva,0,0,EUR,0,,0


In [9]:
# Subsetting to keep only a few columns 
key = key[['GP2sampleID', 'baseline_GP2_phenotype_for_qc', 'biological_sex_for_qc', 'age_at_sample_collection', 'age_of_onset', 'label']]
# Renaming the columns
key.rename(columns = {'GP2sampleID':'IID',
                                     'baseline_GP2_phenotype_for_qc':'phenotype',
                                     'biological_sex_for_qc':'SEX', 
                                     'age_at_sample_collection':'AGE', 
                                     'age_of_onset':'AAO'}, inplace = True)
key

Unnamed: 0,IID,phenotype,SEX,AGE,AAO,label
0,APGS_000193_s1,PD,Male,,,EUR
1,APGS_000194_s1,PD,Female,,,EUR
2,APGS_000195_s1,PD,Female,,,EUR
3,APGS_000196_s1,PD,Male,,,EUR
4,APGS_000197_s1,PD,Female,,,EUR
...,...,...,...,...,...,...
58441,m-UROMETV_000031_s1,PD,Female,,46.0,EUR
58442,m-UROMETV_000032_s1,PD,Male,,41.0,EUR
58443,m-UROMETV_000033_s1,PD,Female,,38.0,EUR
58444,m-UROMETV_000034_s1,PD,Male,,50.0,EUR


In [10]:
! pwd

/home/jupyter/workspace/ws_files


In [11]:
ancestries = {'AAC','AFR','AJ','AMR','CAS','EAS','EUR','FIN','MDE','SAS','CAH'}

for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    print(f'WORKING ON: {ancestry}')
    
    ## Subset to keep ancestry of interest 
    ancestry_key = key[key['label']==ancestry].copy()
    ancestry_key.reset_index(drop=True)
    
     # Load information about related individuals in the ancestry analyzed
    related_df = pd.read_csv(f'{REL7_PATH}/meta_data/related_samples/{ancestry}_release7_vwb.related')
    print(f'Related individuals: {related_df.shape}')
    
    # Make a list of just one set of related people
    related_list = list(related_df['IID1'])
    
    # Check value counts of related and remove only one related individual
    ancestry_key = ancestry_key[~ancestry_key["IID"].isin(related_list)]
    
    # Check size
    print(f'Unrelated individuals: {ancestry_key.shape}')
    
    # Convert phenotype to binary (1/2)
    ## Assign conditions so case=2 and controls=1, and -9 otherwise (matching PLINK convention)
    # PD = 2; control = 1
    pheno_mapping = {"PD": 2, "Control": 1}
    ancestry_key['PHENO'] = ancestry_key['phenotype'].map(pheno_mapping).astype('Int64')
    
    # Check value counts of pheno
    ancestry_key['PHENO'].value_counts(dropna=False)
    
    ## Get the PCs
    pcs = pd.read_csv(f'{REL7_PATH}/meta_data/qc_metrics/projected_pcs_vwb.csv')
    
     #Select just first 5 PCs
    selected_columns = ['IID', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5']
    pcs = pd.DataFrame(data=pcs.iloc[:, 1:7].values, columns=selected_columns)
    
     # Drop the first row (since it's now the column names)
    pcs = pcs.drop(0)
    
    # Reset the index to remove any potential issues
    pcs = pcs.reset_index(drop=True)
    
    # Check size
    print(f'PCs: {pcs.shape}')
    
    # Check value counts of SEX
    sex_og_values = ancestry_key['SEX'].value_counts(dropna=False)
    print(f'Sex value counts - original:\n {sex_og_values.to_string()}')
    
     # Convert sex to binary (1/2)
    ## Assign conditions so female=2 and men=1, and -9 otherwise (matching PLINK convention)
    # Female = 2; Male = 1
    sex_mapping = {"Female": 2, "Male": 1}
    ancestry_key['SEX'] = ancestry_key['SEX'].map(sex_mapping).astype('Int64')
    
    # Check value counts of SEX after recoding
    sex_recode_values = ancestry_key['SEX'].value_counts(dropna=False)
    print(f'Sex value counts - recoded:\n{sex_recode_values.to_string()}')
    
    ## Make covariate file
    df = pd.merge(pcs, ancestry_key, on='IID', how='left')
    print(f'Check columns for covariate file: {df.columns}')
    
    #Make additional columns - FID, fatid and matid - these are needed for RVtests!!
    #RVtests needs the first 5 columns to be fid, iid, fatid, matid and sex otherwise it does not run correctly
    #Uppercase column name is ok
    #See https://zhanxw.github.io/rvtests/#phenotype-file
    df['FID'] = 0
    df['FATID'] = 0
    df['MATID'] = 0
    
    ## Clean up and keep columns we need 
    final_df = df[['FID','IID', 'FATID', 'MATID', 'SEX', 'AGE', 'PHENO','PC1', 'PC2', 'PC3', 'PC4', 'PC5']].copy()
    
    ##DO NOT replace missing values with -9 as this is misinterpreted by RVtests - needs to be nonnumeric
    #Leave missing values as NA
    
    #Check number of PD cases missing age
    pd_missAge = final_df[(final_df['PHENO']==2)&(final_df['AGE'].isna())]
    print(f'Number of PD cases missing age: {pd_missAge.shape[0]}')
    
    #Check number of controls missing age
    control_missAge = final_df[(final_df['PHENO']==1)&(final_df['AGE'].isna())]
    print(f'Number of controls missing age: {control_missAge.shape[0]}')
    
    ## Make file of sample IDs to keep 
    samples_toKeep = final_df[['FID', 'IID']].copy()
    samples_toKeep.to_csv(f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}/{ancestry}.samplestoKeep', sep = '\t', index=False, header=None)
    
    ## Make your covariate file
    #Included na_rep to write out missing/NA values explicitly as string/text, not as blank otherwise they are misread in RVtests
    final_df.to_csv(f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}/{ancestry}_covariate_file.txt', sep = '\t', na_rep='NA', index=False)

WORKING ON: AFR
Related individuals: (228, 9)
Unrelated individuals: (2752, 6)
PCs: (58209, 6)
Sex value counts - original:
 SEX
Male                          1553
Female                        1197
Other/Unknown/Not Reported       2
Sex value counts - recoded:
SEX
1       1553
2       1197
<NA>       2
Check columns for covariate file: Index(['IID', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'phenotype', 'SEX', 'AGE',
       'AAO', 'label', 'PHENO'],
      dtype='object')
Number of PD cases missing age: 835
Number of controls missing age: 809
WORKING ON: AAC
Related individuals: (28, 9)
Unrelated individuals: (1181, 6)
PCs: (58209, 6)
Sex value counts - original:
 SEX
Female                        690
Male                          489
Other/Unknown/Not Reported      2
Sex value counts - recoded:
SEX
2       690
1       489
<NA>      2
Check columns for covariate file: Index(['IID', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'phenotype', 'SEX', 'AGE',
       'AAO', 'label', 'PHENO'],
      dtype='obje

Annotation of the gene

Extract the region using PLINK

Extract TMEM175 gene

TMEM175 coordinates: Chromosome 4:932387-4:958656(GRCh38/hg38)

In [14]:
## extract region using plink
ancestries = {'AAC','AFR','AJ','AMR','CAS','EAS','EUR','FIN','MDE','SAS','CAH'}

for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'

    ! /home/jupyter/tools/plink2 \
    --pfile {REL7_PATH}/imputed_genotypes/{ancestry}/chr4_{ancestry}_release7_vwb \
    --chr 4 \
    --from-bp 882387 \
    --to-bp 1008656 \
    --make-bed \
    --out {WORK_DIR}/{ancestry}_TMEM175

PLINK v2.00a6LM 64-bit Intel (4 Jul 2024)      www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.log.
Options in effect:
  --chr 4
  --from-bp 882387
  --make-bed
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175
  --pfile /home/jupyter/workspace/gp2_tier2_eu_release7_30042024/imputed_genotypes/AFR/chr4_AFR_release7_vwb
  --to-bp 1008656

Start time: Wed Jul 31 09:54:42 2024
52223 MiB RAM detected, ~50711 available; reserving 26111 MiB for main
workspace.
Using up to 8 compute threads.
2643 samples (1163 females, 1480 males; 2643 founders) loaded from
/home/jupyter/workspace/gp2_tier2_eu_release7_30042024/imputed_genotypes/AFR/chr4_AFR_release7_vwb.psam.
4555051 variants loaded from
/home/jupyter/workspace/gp2_tier2_eu_release7_30042024/imputed_genotypes/AFR/chr4_AFR_release7_vwb.pvar.
1 binary phenotype loaded (942 cases, 1679 

In [17]:
# Visualize bim file
WORK_DIR = '/home/jupyter/workspace/ws_files/TMEM175/'
! head {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.bim

4	chr4:882390:C:A	0	882390	A	C
4	chr4:882397:C:T	0	882397	T	C
4	chr4:882422:C:T	0	882422	T	C
4	chr4:882432:G:A	0	882432	A	G
4	chr4:882454:C:T	0	882454	T	C
4	chr4:882471:G:A	0	882471	A	G
4	chr4:882483:C:G	0	882483	G	C
4	chr4:882498:C:G	0	882498	G	C
4	chr4:882507:G:A	0	882507	A	G
4	chr4:882517:C:A	0	882517	A	C


In [18]:
# Visualize bim file
! head {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.fam

0	APGS_000002_s1	0	0	1	2
0	APGS_000003_s1	0	0	1	2
0	APGS_000004_s1	0	0	2	2
0	APGS_000008_s1	0	0	1	2
0	APGS_000011_s1	0	0	1	2
0	APGS_000016_s1	0	0	1	2
0	APGS_000018_s1	0	0	1	2
0	APGS_000023_s1	0	0	1	2
0	APGS_000026_s1	0	0	2	2
0	APGS_000027_s1	0	0	1	2


In [21]:
for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}/'
    
    ! head -n 1 {WORK_DIR}/{ancestry}_TMEM175.fam > {WORK_DIR}/{ancestry}_s1.txt

In [22]:
! head /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EUR/EUR_s1.txt

0	APGS_000002_s1	0	0	1	2


Turn binary files into VCF

In [23]:
for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    
    ## Turn binary files into VCF
    ! /home/jupyter/tools/plink2 \
    --bfile {WORK_DIR}/{ancestry}_TMEM175 \
    --keep {WORK_DIR}/{ancestry}_s1.txt \
    --make-bed \
    --out {WORK_DIR}/{ancestry}_TMEM175_v1

PLINK v2.00a6LM 64-bit Intel (4 Jul 2024)      www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175_v1.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175
  --keep /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_s1.txt
  --make-bed
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175_v1

Start time: Wed Jul 31 10:13:40 2024
52223 MiB RAM detected, ~50482 available; reserving 26111 MiB for main
workspace.
Using up to 8 compute threads.
2643 samples (1163 females, 1480 males; 2643 founders) loaded from
/home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.fam.
3853 variants loaded from
/home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.bim.
1 binary phenotype loaded (942 cases, 1679 controls).
--keep: 1 sample remaining.
1 sample (0 females, 1 male; 1 found

In [24]:
for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    
    ## Turn binary files into VCF
    ! /home/jupyter/tools/plink2 \
    --bfile {WORK_DIR}/{ancestry}_TMEM175_v1 \
    --recode vcf-fid \
    --out {WORK_DIR}/{ancestry}_TMEM175_v1

PLINK v2.00a6LM 64-bit Intel (4 Jul 2024)      www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175_v1.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175_v1
  --export vcf-fid
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175_v1

Start time: Wed Jul 31 10:15:08 2024
Note: --export 'vcf-fid' modifier is deprecated.  Use 'vcf' + 'id-paste=fid'.
52223 MiB RAM detected, ~50568 available; reserving 26111 MiB for main
workspace.
Using up to 8 compute threads.
1 sample (0 females, 1 male; 1 founder) loaded from
/home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175_v1.fam.
3853 variants loaded from
/home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175_v1.bim.
1 binary phenotype loaded (1 case, 0 controls).
--export vcf to
/home/jupyter/workspace/ws_files/TMEM175/TMEM175_

In [25]:
### Bgzip and Tabix (zip and index the file)
for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    ! bgzip -f {WORK_DIR}/{ancestry}_TMEM175_v1.vcf
    ! tabix -f -p vcf {WORK_DIR}/{ancestry}_TMEM175_v1.vcf.gz 

Annotate using ANNOVAR

In [26]:
## annotate using ANNOVAR

for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    
    ! perl /home/jupyter/tools/annovar/table_annovar.pl {WORK_DIR}/{ancestry}_TMEM175_v1.vcf.gz /home/jupyter/tools/annovar/humandb/ -buildver hg38 \
    -out {WORK_DIR}/{ancestry}_TMEM175.annovar \
    -remove -protocol refGene,clinvar_20140902 \
    -operation g,f \
    --nopolish \
    -nastring . \
    -vcfinput


NOTICE: Running with system command <convert2annovar.pl  -includeinfo -allsample -withfreq -format vcf4 /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175_v1.vcf.gz > /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.annovar.avinput>
NOTICE: Finished reading 3860 lines from VCF file
NOTICE: A total of 3853 locus in VCF file passed QC threshold, representing 3628 SNPs (2760 transitions and 868 transversions) and 225 indels/substitutions
NOTICE: Finished writing allele frequencies based on 3628 SNP genotypes (2760 transitions and 868 transversions) and 225 indels/substitutions for 1 samples

NOTICE: Running with system command </home/jupyter/tools/annovar/table_annovar.pl /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.annovar.avinput /home/jupyter/tools/annovar/humandb/ -buildver hg38 -outfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.annovar -remove -protocol refGene,clinvar_20140902 -operation g,f --nopolish -nastri

In [29]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_AAC/AAC_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882400,882400,G,A,intronic,GAK,.,.,.,.,0.0,.,.,4,882400,chr4:882400:G:A,G,A,.,.,PR,GT,0/0
1,4,882432,882432,G,A,intronic,GAK,.,.,.,.,0.0,.,.,4,882432,chr4:882432:G:A,G,A,.,.,PR,GT,0/0
2,4,882455,882455,G,A,intronic,GAK,.,.,.,.,0.0,.,.,4,882455,chr4:882455:G:A,G,A,.,.,PR,GT,0/0
3,4,882471,882471,G,A,intronic,GAK,.,.,.,.,0.0,.,.,4,882471,chr4:882471:G:A,G,A,.,.,PR,GT,0/0
4,4,882498,882498,C,G,intronic,GAK,.,.,.,.,0.0,.,.,4,882498,chr4:882498:C:G,C,G,.,.,PR,GT,0/0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3444,4,1008499,1008499,C,T,intergenic,IDUA;FGFRL1,dist=3935;dist=3127,.,.,.,0.0,.,.,4,1008499,chr4:1008499:C:T,C,T,.,.,PR,GT,0/0
3445,4,1008515,1008515,C,T,intergenic,IDUA;FGFRL1,dist=3951;dist=3111,.,.,.,0.0,.,.,4,1008515,chr4:1008515:C:T,C,T,.,.,PR,GT,0/0
3446,4,1008535,1008535,G,C,intergenic,IDUA;FGFRL1,dist=3971;dist=3091,.,.,.,0.0,.,.,4,1008535,chr4:1008535:G:C,G,C,.,.,PR,GT,0/0
3447,4,1008649,1008649,C,T,intergenic,IDUA;FGFRL1,dist=4085;dist=2977,.,.,.,0.0,.,.,4,1008649,chr4:1008649:C:T,C,T,.,.,PR,GT,0/0


In [30]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           2746
exonic              287
intergenic          175
UTR3                102
downstream           58
UTR5                 42
upstream             36
splicing              2
exonic;splicing       1
Name: count, dtype: int64

In [31]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   287
Start                 287
End                   287
Ref                   287
Alt                   287
Func.refGene          287
Gene.refGene          287
GeneDetail.refGene    287
ExonicFunc.refGene    287
AAChange.refGene      287
clinvar_20140902      287
Otherinfo1            287
Otherinfo2            287
Otherinfo3            287
Otherinfo4            287
Otherinfo5            287
Otherinfo6            287
Otherinfo7            287
Otherinfo8            287
Otherinfo9            287
Otherinfo10           287
Otherinfo11           287
Otherinfo12           287
Otherinfo13           287
dtype: int64

In [32]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
nonsynonymous SNV         155
synonymous SNV            123
stopgain                    4
nonframeshift deletion      3
frameshift deletion         2
Name: count, dtype: int64

In [33]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_AFR/AFR_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882400,882400,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882400,chr4:882400:G:A,G,A,.,.,PR,GT,0/0
1,4,882432,882432,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882432,chr4:882432:G:A,G,A,.,.,PR,GT,0/0
2,4,882454,882454,C,T,intronic,GAK,.,.,.,.,0,.,.,4,882454,chr4:882454:C:T,C,T,.,.,PR,GT,0/0
3,4,882471,882471,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882471,chr4:882471:G:A,G,A,.,.,PR,GT,0/0
4,4,882498,882498,C,G,intronic,GAK,.,.,.,.,0,.,.,4,882498,chr4:882498:C:G,C,G,.,.,PR,GT,0/0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3848,4,1008499,1008499,C,T,intergenic,IDUA;FGFRL1,dist=3935;dist=3127,.,.,.,0,.,.,4,1008499,chr4:1008499:C:T,C,T,.,.,PR,GT,0/0
3849,4,1008524,1008524,G,T,intergenic,IDUA;FGFRL1,dist=3960;dist=3102,.,.,.,0,.,.,4,1008524,chr4:1008524:G:T,G,T,.,.,PR,GT,0/0
3850,4,1008620,1008620,G,T,intergenic,IDUA;FGFRL1,dist=4056;dist=3006,.,.,.,0,.,.,4,1008620,chr4:1008620:G:T,G,T,.,.,PR,GT,0/0
3851,4,1008649,1008649,C,T,intergenic,IDUA;FGFRL1,dist=4085;dist=2977,.,.,.,0,.,.,4,1008649,chr4:1008649:C:T,C,T,.,.,PR,GT,0/0


In [34]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           3069
exonic              314
intergenic          203
UTR3                118
downstream           63
UTR5                 45
upstream             40
exonic;splicing       1
Name: count, dtype: int64

In [35]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   314
Start                 314
End                   314
Ref                   314
Alt                   314
Func.refGene          314
Gene.refGene          314
GeneDetail.refGene    314
ExonicFunc.refGene    314
AAChange.refGene      314
clinvar_20140902      314
Otherinfo1            314
Otherinfo2            314
Otherinfo3            314
Otherinfo4            314
Otherinfo5            314
Otherinfo6            314
Otherinfo7            314
Otherinfo8            314
Otherinfo9            314
Otherinfo10           314
Otherinfo11           314
Otherinfo12           314
Otherinfo13           314
dtype: int64

In [36]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
nonsynonymous SNV          161
synonymous SNV             143
frameshift deletion          4
nonframeshift deletion       3
stopgain                     2
nonframeshift insertion      1
Name: count, dtype: int64

In [37]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_AJ/AJ_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882498,882498,C,G,intronic,GAK,.,.,.,.,0.0,.,.,4,882498,chr4:882498:C:G,C,G,.,.,PR,GT,0/0
1,4,882530,882530,C,T,intronic,GAK,.,.,.,.,0.0,.,.,4,882530,chr4:882530:C:T,C,T,.,.,PR,GT,0/0
2,4,882611,882611,T,C,intronic,GAK,.,.,.,.,0.0,.,.,4,882611,chr4:882611:T:C,T,C,.,.,PR,GT,0/0
3,4,882745,882745,G,A,exonic,GAK,.,synonymous SNV,"GAK:NM_001318134:exon11:c.C1242T:p.H414H,GAK:N...",.,0.0,.,.,4,882745,chr4:882745:G:A,G,A,.,.,PR,GT,0/0
4,4,882801,882801,C,T,exonic,GAK,.,nonsynonymous SNV,"GAK:NM_001318134:exon11:c.G1186A:p.A396T,GAK:N...",.,0.0,.,.,4,882801,chr4:882801:C:T,C,T,.,.,PR,GT,0/0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1280,4,1008337,1008337,C,T,intergenic,IDUA;FGFRL1,dist=3773;dist=3289,.,.,.,1.0,.,.,4,1008337,chr4:1008337:C:T,C,T,.,.,PR,GT,1/1
1281,4,1008371,1008371,C,-,intergenic,IDUA;FGFRL1,dist=3807;dist=3255,.,.,.,0.0,.,.,4,1008370,chr4:1008370:TC:T,TC,T,.,.,PR,GT,0/0
1282,4,1008480,1008480,A,G,intergenic,IDUA;FGFRL1,dist=3916;dist=3146,.,.,.,0.0,.,.,4,1008480,chr4:1008480:A:G,A,G,.,.,PR,GT,0/0
1283,4,1008538,1008538,A,G,intergenic,IDUA;FGFRL1,dist=3974;dist=3088,.,.,.,0.0,.,.,4,1008538,chr4:1008538:A:G,A,G,.,.,PR,GT,0/0


In [38]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           1024
exonic              125
intergenic           60
UTR3                 43
downstream           20
upstream              6
UTR5                  6
exonic;splicing       1
Name: count, dtype: int64

In [39]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   125
Start                 125
End                   125
Ref                   125
Alt                   125
Func.refGene          125
Gene.refGene          125
GeneDetail.refGene    125
ExonicFunc.refGene    125
AAChange.refGene      125
clinvar_20140902      125
Otherinfo1            125
Otherinfo2            125
Otherinfo3            125
Otherinfo4            125
Otherinfo5            125
Otherinfo6            125
Otherinfo7            125
Otherinfo8            125
Otherinfo9            125
Otherinfo10           125
Otherinfo11           125
Otherinfo12           125
Otherinfo13           125
dtype: int64

In [40]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
nonsynonymous SNV         66
synonymous SNV            55
frameshift deletion        2
stopgain                   1
nonframeshift deletion     1
Name: count, dtype: int64

In [41]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_AMR/AMR_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882397,882397,C,T,intronic,GAK,.,.,.,.,0,.,.,4,882397,chr4:882397:C:T,C,T,.,.,PR,GT,0/0
1,4,882400,882400,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882400,chr4:882400:G:A,G,A,.,.,PR,GT,0/0
2,4,882432,882432,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882432,chr4:882432:G:A,G,A,.,.,PR,GT,0/0
3,4,882454,882454,C,T,intronic,GAK,.,.,.,.,0,.,.,4,882454,chr4:882454:C:T,C,T,.,.,PR,GT,0/0
4,4,882498,882498,C,G,intronic,GAK,.,.,.,.,0,.,.,4,882498,chr4:882498:C:G,C,G,.,.,PR,GT,0/0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1982,4,1008395,1008395,C,T,intergenic,IDUA;FGFRL1,dist=3831;dist=3231,.,.,.,0,.,.,4,1008395,chr4:1008395:C:T,C,T,.,.,PR,GT,0/0
1983,4,1008480,1008480,A,G,intergenic,IDUA;FGFRL1,dist=3916;dist=3146,.,.,.,0,.,.,4,1008480,chr4:1008480:A:G,A,G,.,.,PR,GT,0/0
1984,4,1008515,1008515,C,T,intergenic,IDUA;FGFRL1,dist=3951;dist=3111,.,.,.,0,.,.,4,1008515,chr4:1008515:C:T,C,T,.,.,PR,GT,0/0
1985,4,1008571,1008571,G,A,intergenic,IDUA;FGFRL1,dist=4007;dist=3055,.,.,.,0,.,.,4,1008571,chr4:1008571:G:A,G,A,.,.,PR,GT,0/0


In [42]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           1596
exonic              178
intergenic          104
UTR3                 56
downstream           23
UTR5                 17
upstream             12
exonic;splicing       1
Name: count, dtype: int64

In [43]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   178
Start                 178
End                   178
Ref                   178
Alt                   178
Func.refGene          178
Gene.refGene          178
GeneDetail.refGene    178
ExonicFunc.refGene    178
AAChange.refGene      178
clinvar_20140902      178
Otherinfo1            178
Otherinfo2            178
Otherinfo3            178
Otherinfo4            178
Otherinfo5            178
Otherinfo6            178
Otherinfo7            178
Otherinfo8            178
Otherinfo9            178
Otherinfo10           178
Otherinfo11           178
Otherinfo12           178
Otherinfo13           178
dtype: int64

In [44]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
nonsynonymous SNV          89
synonymous SNV             85
frameshift deletion         2
stopgain                    1
nonframeshift insertion     1
Name: count, dtype: int64

In [45]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_CAH/CAH_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882400,882400,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882400,chr4:882400:G:A,G,A,.,.,PR,GT,0/0
1,4,882432,882432,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882432,chr4:882432:G:A,G,A,.,.,PR,GT,0/0
2,4,882454,882454,C,T,intronic,GAK,.,.,.,.,0,.,.,4,882454,chr4:882454:C:T,C,T,.,.,PR,GT,0/0
3,4,882498,882498,C,G,intronic,GAK,.,.,.,.,0,.,.,4,882498,chr4:882498:C:G,C,G,.,.,PR,GT,0/0
4,4,882524,882524,A,G,intronic,GAK,.,.,.,.,0,.,.,4,882524,chr4:882524:A:G,A,G,.,.,PR,GT,0/0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3085,4,1008480,1008480,A,G,intergenic,IDUA;FGFRL1,dist=3916;dist=3146,.,.,.,0,.,.,4,1008480,chr4:1008480:A:G,A,G,.,.,PR,GT,0/0
3086,4,1008515,1008515,C,T,intergenic,IDUA;FGFRL1,dist=3951;dist=3111,.,.,.,0,.,.,4,1008515,chr4:1008515:C:T,C,T,.,.,PR,GT,0/0
3087,4,1008570,1008570,C,T,intergenic,IDUA;FGFRL1,dist=4006;dist=3056,.,.,.,0,.,.,4,1008570,chr4:1008570:C:T,C,T,.,.,PR,GT,0/0
3088,4,1008649,1008649,C,T,intergenic,IDUA;FGFRL1,dist=4085;dist=2977,.,.,.,0,.,.,4,1008649,chr4:1008649:C:T,C,T,.,.,PR,GT,0/0


In [46]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           2467
exonic              260
intergenic          168
UTR3                 87
downstream           46
UTR5                 31
upstream             30
exonic;splicing       1
Name: count, dtype: int64

In [47]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   260
Start                 260
End                   260
Ref                   260
Alt                   260
Func.refGene          260
Gene.refGene          260
GeneDetail.refGene    260
ExonicFunc.refGene    260
AAChange.refGene      260
clinvar_20140902      260
Otherinfo1            260
Otherinfo2            260
Otherinfo3            260
Otherinfo4            260
Otherinfo5            260
Otherinfo6            260
Otherinfo7            260
Otherinfo8            260
Otherinfo9            260
Otherinfo10           260
Otherinfo11           260
Otherinfo12           260
Otherinfo13           260
dtype: int64

In [48]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
nonsynonymous SNV         139
synonymous SNV            113
stopgain                    4
frameshift deletion         2
nonframeshift deletion      1
frameshift insertion        1
Name: count, dtype: int64

In [49]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_CAS/CAS_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882429,882429,G,C,intronic,GAK,.,.,.,.,0,.,.,4,882429,chr4:882429:G:C,G,C,.,.,PR,GT,0/0
1,4,882432,882432,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882432,chr4:882432:G:A,G,A,.,.,PR,GT,0/0
2,4,882468,882468,C,G,intronic,GAK,.,.,.,.,0,.,.,4,882468,chr4:882468:C:G,C,G,.,.,PR,GT,0/0
3,4,882498,882498,C,G,intronic,GAK,.,.,.,.,0,.,.,4,882498,chr4:882498:C:G,C,G,.,.,PR,GT,0/0
4,4,882501,882501,C,A,intronic,GAK,.,.,.,.,0,.,.,4,882501,chr4:882501:C:A,C,A,.,.,PR,GT,0/0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1950,4,1008371,1008371,C,-,intergenic,IDUA;FGFRL1,dist=3807;dist=3255,.,.,.,0,.,.,4,1008370,chr4:1008370:TC:T,TC,T,.,.,PR,GT,0/0
1951,4,1008480,1008480,A,G,intergenic,IDUA;FGFRL1,dist=3916;dist=3146,.,.,.,0,.,.,4,1008480,chr4:1008480:A:G,A,G,.,.,PR,GT,0/0
1952,4,1008538,1008538,A,G,intergenic,IDUA;FGFRL1,dist=3974;dist=3088,.,.,.,0,.,.,4,1008538,chr4:1008538:A:G,A,G,.,.,PR,GT,0/0
1953,4,1008570,1008570,C,T,intergenic,IDUA;FGFRL1,dist=4006;dist=3056,.,.,.,0,.,.,4,1008570,chr4:1008570:C:T,C,T,.,.,PR,GT,0/0


In [50]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           1568
exonic              167
intergenic           96
UTR3                 62
UTR5                 26
downstream           22
upstream             13
exonic;splicing       1
Name: count, dtype: int64

In [51]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   167
Start                 167
End                   167
Ref                   167
Alt                   167
Func.refGene          167
Gene.refGene          167
GeneDetail.refGene    167
ExonicFunc.refGene    167
AAChange.refGene      167
clinvar_20140902      167
Otherinfo1            167
Otherinfo2            167
Otherinfo3            167
Otherinfo4            167
Otherinfo5            167
Otherinfo6            167
Otherinfo7            167
Otherinfo8            167
Otherinfo9            167
Otherinfo10           167
Otherinfo11           167
Otherinfo12           167
Otherinfo13           167
dtype: int64

In [52]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
nonsynonymous SNV      89
synonymous SNV         73
stopgain                4
frameshift deletion     1
Name: count, dtype: int64

In [53]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_EAS/EAS_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882390,882390,C,G,intronic,GAK,.,.,.,.,0,.,.,4,882390,chr4:882390:C:G,C,G,.,.,PR,GT,0/0
1,4,882432,882432,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882432,chr4:882432:G:A,G,A,.,.,PR,GT,0/0
2,4,882501,882501,C,A,intronic,GAK,.,.,.,.,0,.,.,4,882501,chr4:882501:C:A,C,A,.,.,PR,GT,0/0
3,4,882507,882507,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882507,chr4:882507:G:A,G,A,.,.,PR,GT,0/0
4,4,882530,882530,C,T,intronic,GAK,.,.,.,.,1,.,.,4,882530,chr4:882530:C:T,C,T,.,.,PR,GT,1/1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3166,4,1008570,1008570,C,T,intergenic,IDUA;FGFRL1,dist=4006;dist=3056,.,.,.,0,.,.,4,1008570,chr4:1008570:C:T,C,T,.,.,PR,GT,0/0
3167,4,1008634,1008634,A,G,intergenic,IDUA;FGFRL1,dist=4070;dist=2992,.,.,.,0,.,.,4,1008634,chr4:1008634:A:G,A,G,.,.,PR,GT,0/0
3168,4,1008636,1008636,C,A,intergenic,IDUA;FGFRL1,dist=4072;dist=2990,.,.,.,0,.,.,4,1008636,chr4:1008636:C:A,C,A,.,.,PR,GT,0/0
3169,4,1008644,1008644,T,C,intergenic,IDUA;FGFRL1,dist=4080;dist=2982,.,.,.,0,.,.,4,1008644,chr4:1008644:T:C,T,C,.,.,PR,GT,0/0


In [54]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           2518
exonic              285
intergenic          174
UTR3                 99
UTR5                 43
downstream           33
upstream             17
exonic;splicing       1
splicing              1
Name: count, dtype: int64

In [55]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   285
Start                 285
End                   285
Ref                   285
Alt                   285
Func.refGene          285
Gene.refGene          285
GeneDetail.refGene    285
ExonicFunc.refGene    285
AAChange.refGene      285
clinvar_20140902      285
Otherinfo1            285
Otherinfo2            285
Otherinfo3            285
Otherinfo4            285
Otherinfo5            285
Otherinfo6            285
Otherinfo7            285
Otherinfo8            285
Otherinfo9            285
Otherinfo10           285
Otherinfo11           285
Otherinfo12           285
Otherinfo13           285
dtype: int64

In [56]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
nonsynonymous SNV         164
synonymous SNV            109
stopgain                    6
frameshift deletion         4
nonframeshift deletion      2
Name: count, dtype: int64

In [57]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_MDE/MDE_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882432,882432,G,A,intronic,GAK,.,.,.,.,0.0,.,.,4,882432,chr4:882432:G:A,G,A,.,.,PR,GT,0/0
1,4,882471,882471,G,A,intronic,GAK,.,.,.,.,0.0,.,.,4,882471,chr4:882471:G:A,G,A,.,.,PR,GT,0/0
2,4,882498,882498,C,G,intronic,GAK,.,.,.,.,0.0,.,.,4,882498,chr4:882498:C:G,C,G,.,.,PR,GT,0/0
3,4,882517,882517,C,A,intronic,GAK,.,.,.,.,0.0,.,.,4,882517,chr4:882517:C:A,C,A,.,.,PR,GT,0/0
4,4,882530,882530,C,T,intronic,GAK,.,.,.,.,0.5,.,.,4,882530,chr4:882530:C:T,C,T,.,.,PR,GT,0/1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1940,4,1008371,1008371,C,-,intergenic,IDUA;FGFRL1,dist=3807;dist=3255,.,.,.,0.0,.,.,4,1008370,chr4:1008370:TC:T,TC,T,.,.,PR,GT,0/0
1941,4,1008480,1008480,A,G,intergenic,IDUA;FGFRL1,dist=3916;dist=3146,.,.,.,0.5,.,.,4,1008480,chr4:1008480:A:G,A,G,.,.,PR,GT,0/1
1942,4,1008515,1008515,C,T,intergenic,IDUA;FGFRL1,dist=3951;dist=3111,.,.,.,0.0,.,.,4,1008515,chr4:1008515:C:T,C,T,.,.,PR,GT,0/0
1943,4,1008521,1008521,T,C,intergenic,IDUA;FGFRL1,dist=3957;dist=3105,.,.,.,0.0,.,.,4,1008521,chr4:1008521:T:C,T,C,.,.,PR,GT,0/0


In [58]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           1547
exonic              177
intergenic           94
UTR3                 59
downstream           34
UTR5                 22
upstream             11
exonic;splicing       1
Name: count, dtype: int64

In [59]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   177
Start                 177
End                   177
Ref                   177
Alt                   177
Func.refGene          177
Gene.refGene          177
GeneDetail.refGene    177
ExonicFunc.refGene    177
AAChange.refGene      177
clinvar_20140902      177
Otherinfo1            177
Otherinfo2            177
Otherinfo3            177
Otherinfo4            177
Otherinfo5            177
Otherinfo6            177
Otherinfo7            177
Otherinfo8            177
Otherinfo9            177
Otherinfo10           177
Otherinfo11           177
Otherinfo12           177
Otherinfo13           177
dtype: int64

In [60]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
synonymous SNV         91
nonsynonymous SNV      84
frameshift deletion     1
stopgain                1
Name: count, dtype: int64

In [61]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_SAS/SAS_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882397,882397,C,T,intronic,GAK,.,.,.,.,0,.,.,4,882397,chr4:882397:C:T,C,T,.,.,PR,GT,0/0
1,4,882530,882530,C,T,intronic,GAK,.,.,.,.,0,.,.,4,882530,chr4:882530:C:T,C,T,.,.,PR,GT,0/0
2,4,882566,882566,G,-,intronic,GAK,.,.,.,.,0,.,.,4,882565,chr4:882565:AG:A,AG,A,.,.,PR,GT,0/0
3,4,882611,882611,T,C,intronic,GAK,.,.,.,.,0,.,.,4,882611,chr4:882611:T:C,T,C,.,.,PR,GT,0/0
4,4,882678,882678,C,G,intronic,GAK,.,.,.,.,0,.,.,4,882678,chr4:882678:C:G,C,G,.,.,PR,GT,0/0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1723,4,1008246,1008246,C,T,intergenic,IDUA;FGFRL1,dist=3682;dist=3380,.,.,.,0,.,.,4,1008246,chr4:1008246:C:T,C,T,.,.,PR,GT,0/0
1724,4,1008337,1008337,C,T,intergenic,IDUA;FGFRL1,dist=3773;dist=3289,.,.,.,0.5,.,.,4,1008337,chr4:1008337:C:T,C,T,.,.,PR,GT,0/1
1725,4,1008371,1008371,C,-,intergenic,IDUA;FGFRL1,dist=3807;dist=3255,.,.,.,0,.,.,4,1008370,chr4:1008370:TC:T,TC,T,.,.,PR,GT,0/0
1726,4,1008480,1008480,A,G,intergenic,IDUA;FGFRL1,dist=3916;dist=3146,.,.,.,0,.,.,4,1008480,chr4:1008480:A:G,A,G,.,.,PR,GT,0/0


In [62]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           1412
exonic              143
intergenic           72
UTR3                 50
UTR5                 24
downstream           18
upstream              8
exonic;splicing       1
Name: count, dtype: int64

In [63]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   143
Start                 143
End                   143
Ref                   143
Alt                   143
Func.refGene          143
Gene.refGene          143
GeneDetail.refGene    143
ExonicFunc.refGene    143
AAChange.refGene      143
clinvar_20140902      143
Otherinfo1            143
Otherinfo2            143
Otherinfo3            143
Otherinfo4            143
Otherinfo5            143
Otherinfo6            143
Otherinfo7            143
Otherinfo8            143
Otherinfo9            143
Otherinfo10           143
Otherinfo11           143
Otherinfo12           143
Otherinfo13           143
dtype: int64

In [64]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
nonsynonymous SNV      75
synonymous SNV         64
stopgain                3
frameshift deletion     1
Name: count, dtype: int64

In [65]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_FIN/FIN_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882530,882530,C,T,intronic,GAK,.,.,.,.,0.5,.,.,4,882530,chr4:882530:C:T,C,T,.,.,PR,GT,0/1
1,4,882611,882611,T,C,intronic,GAK,.,.,.,.,0,.,.,4,882611,chr4:882611:T:C,T,C,.,.,PR,GT,0/0
2,4,882849,882849,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882849,chr4:882849:G:A,G,A,.,.,PR,GT,0/0
3,4,883018,883018,G,A,intronic,GAK,.,.,.,.,0,.,.,4,883018,chr4:883018:G:A,G,A,.,.,PR,GT,0/0
4,4,883133,883133,G,A,intronic,GAK,.,.,.,.,0,.,.,4,883133,chr4:883133:G:A,G,A,.,.,PR,GT,0/0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
600,4,1008009,1008009,C,T,intergenic,IDUA;FGFRL1,dist=3445;dist=3617,.,.,.,0.5,.,.,4,1008009,chr4:1008009:C:T,C,T,.,.,PR,GT,0/1
601,4,1008209,1008209,C,T,intergenic,IDUA;FGFRL1,dist=3645;dist=3417,.,.,.,0,.,.,4,1008209,chr4:1008209:C:T,C,T,.,.,PR,GT,0/0
602,4,1008337,1008337,C,T,intergenic,IDUA;FGFRL1,dist=3773;dist=3289,.,.,.,1,.,.,4,1008337,chr4:1008337:C:T,C,T,.,.,PR,GT,1/1
603,4,1008371,1008371,C,-,intergenic,IDUA;FGFRL1,dist=3807;dist=3255,.,.,.,0,.,.,4,1008370,chr4:1008370:TC:T,TC,T,.,.,PR,GT,0/0


In [66]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           491
exonic              50
intergenic          29
UTR3                20
downstream           6
upstream             5
UTR5                 3
exonic;splicing      1
Name: count, dtype: int64

In [67]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   50
Start                 50
End                   50
Ref                   50
Alt                   50
Func.refGene          50
Gene.refGene          50
GeneDetail.refGene    50
ExonicFunc.refGene    50
AAChange.refGene      50
clinvar_20140902      50
Otherinfo1            50
Otherinfo2            50
Otherinfo3            50
Otherinfo4            50
Otherinfo5            50
Otherinfo6            50
Otherinfo7            50
Otherinfo8            50
Otherinfo9            50
Otherinfo10           50
Otherinfo11           50
Otherinfo12           50
Otherinfo13           50
dtype: int64

In [68]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
synonymous SNV       27
nonsynonymous SNV    23
Name: count, dtype: int64

In [74]:
# Read in ANNOVAR multianno file
gene = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/EUR_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
display(gene)

Unnamed: 0,Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,clinvar_20140902,Otherinfo1,Otherinfo2,Otherinfo3,Otherinfo4,Otherinfo5,Otherinfo6,Otherinfo7,Otherinfo8,Otherinfo9,Otherinfo10,Otherinfo11,Otherinfo12,Otherinfo13
0,4,882390,882390,C,A,intronic,GAK,.,.,.,.,0,.,.,4,882390,chr4:882390:C:A,C,A,.,.,PR,GT,0/0
1,4,882397,882397,C,T,intronic,GAK,.,.,.,.,0,.,.,4,882397,chr4:882397:C:T,C,T,.,.,PR,GT,0/0
2,4,882422,882422,C,T,intronic,GAK,.,.,.,.,0,.,.,4,882422,chr4:882422:C:T,C,T,.,.,PR,GT,0/0
3,4,882432,882432,G,A,intronic,GAK,.,.,.,.,0,.,.,4,882432,chr4:882432:G:A,G,A,.,.,PR,GT,0/0
4,4,882454,882454,C,T,intronic,GAK,.,.,.,.,0,.,.,4,882454,chr4:882454:C:T,C,T,.,.,PR,GT,0/0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9499,4,1008570,1008570,C,T,intergenic,IDUA;FGFRL1,dist=4006;dist=3056,.,.,.,0,.,.,4,1008570,chr4:1008570:C:T,C,T,.,.,PR,GT,0/0
9500,4,1008571,1008571,G,A,intergenic,IDUA;FGFRL1,dist=4007;dist=3055,.,.,.,0,.,.,4,1008571,chr4:1008571:G:A,G,A,.,.,PR,GT,0/0
9501,4,1008596,1008596,C,T,intergenic,IDUA;FGFRL1,dist=4032;dist=3030,.,.,.,.,.,.,4,1008596,chr4:1008596:C:T,C,T,.,.,PR,GT,./.
9502,4,1008600,1008600,C,T,intergenic,IDUA;FGFRL1,dist=4036;dist=3026,.,.,.,0,.,.,4,1008600,chr4:1008600:C:T,C,T,.,.,PR,GT,0/0


In [75]:
gene["Func.refGene"].value_counts()

Func.refGene
intronic           7451
exonic              951
intergenic          474
UTR3                266
downstream          152
UTR5                114
upstream             90
splicing              5
exonic;splicing       1
Name: count, dtype: int64

In [76]:
# Filter exonic variants
coding = gene[gene['Func.refGene'] == 'exonic']
coding.count()

Chr                   951
Start                 951
End                   951
Ref                   951
Alt                   951
Func.refGene          951
Gene.refGene          951
GeneDetail.refGene    951
ExonicFunc.refGene    951
AAChange.refGene      951
clinvar_20140902      951
Otherinfo1            951
Otherinfo2            951
Otherinfo3            951
Otherinfo4            951
Otherinfo5            951
Otherinfo6            951
Otherinfo7            951
Otherinfo8            951
Otherinfo9            951
Otherinfo10           951
Otherinfo11           951
Otherinfo12           951
Otherinfo13           951
dtype: int64

In [77]:
coding["ExonicFunc.refGene"].value_counts()

ExonicFunc.refGene
nonsynonymous SNV          548
synonymous SNV             367
nonframeshift deletion      12
stopgain                    10
frameshift deletion          9
frameshift insertion         4
nonframeshift insertion      1
Name: count, dtype: int64

In [69]:
#Make lists of variants to keep - all coding, coding nonsynonymous (missense - as they are coded in ANNOVAR), deleterious (CADD_phred > 20)

for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    print(f'WORKING ON: {ancestry}')
    
    # Read in ANNOVAR multianno file
    gene = pd.read_csv(f'{WORK_DIR}/{ancestry}_TMEM175.annovar.hg38_multianno.txt', sep = '\t')
    
    #Print number of variants in the different categories
    results = [] 
    
    utr5 = gene[gene['Func.refGene']== 'UTR5']
    intronic = gene[gene['Func.refGene']== 'intronic']
    exonic = gene[gene['Func.refGene']== 'exonic']
    utr3 = gene[gene['Func.refGene']== 'UTR3']
    coding_nonsynonymous = gene[(gene['Func.refGene'] == 'exonic') & (gene['ExonicFunc.refGene'] == 'nonsynonymous SNV')]
    coding_synonymous = gene[(gene['Func.refGene'] == 'exonic') & (gene['ExonicFunc.refGene'] != 'nonsynonymous SNV')]
    lof = exonic[(exonic['ExonicFunc.refGene'] == 'stopgain') | (exonic['ExonicFunc.refGene'] == 'stoploss') | (exonic['ExonicFunc.refGene'] == 'frameshift deletion') | (exonic['ExonicFunc.refGene'] == 'frameshift insertion')]
    nonsynonymous_lof = pd.concat([coding_nonsynonymous, lof])
    
    print({ancestry})
    print('Total variants: ', len(gene))
    print("Intronic: ", len(intronic))
    print('UTR3: ', len(utr3))
    print('UTR5: ', len(utr5))
    print("Total exonic: ", len(exonic))
    print('  Synonymous: ', len(coding_synonymous))
    print("  Nonsynonymous: ", len(coding_nonsynonymous))
    print("nonsynonymous_lof: ", len(nonsynonymous_lof))
    results.append((gene, intronic, utr3, utr5, exonic, coding_synonymous, coding_nonsynonymous, nonsynonymous_lof))
    print('\n')
    
    # Save in PLINK format - coding nonsynonymous 
    # These are missense variants - other types of nonsynonymous variants (e.g stopgain/loss, or frameshift variants are coded differently in the ExonicFunc.refGene 
    variants_toKeep = nonsynonymous_lof[['Chr', 'Start', 'End', 'Gene.refGene']].copy()
    variants_toKeep.to_csv(f'{WORK_DIR}/{ancestry}_TMEM175.nonsynonymous_lof.variantstoKeep.txt', sep="\t", index=False, header=False)
    
    # Save in PLINK format - all coding variants
    variants_toKeep2 = exonic[['Chr', 'Start', 'End', 'Gene.refGene']].copy()
    variants_toKeep2.to_csv(f'{WORK_DIR}/{ancestry}_TMEM175.exonic.variantstoKeep.txt', sep="\t", index=False, header=False)

WORKING ON: AFR
{'AFR'}
Total variants:  3853
Intronic:  3069
UTR3:  118
UTR5:  45
Total exonic:  314
  Synonymous:  153
  Nonsynonymous:  161
nonsynonymous_lof:  167


WORKING ON: AAC
{'AAC'}
Total variants:  3449
Intronic:  2746
UTR3:  102
UTR5:  42
Total exonic:  287
  Synonymous:  132
  Nonsynonymous:  155
nonsynonymous_lof:  161


WORKING ON: FIN
{'FIN'}
Total variants:  605
Intronic:  491
UTR3:  20
UTR5:  3
Total exonic:  50
  Synonymous:  27
  Nonsynonymous:  23
nonsynonymous_lof:  23


WORKING ON: AMR
{'AMR'}
Total variants:  1987
Intronic:  1596
UTR3:  56
UTR5:  17
Total exonic:  178
  Synonymous:  89
  Nonsynonymous:  89
nonsynonymous_lof:  92


WORKING ON: EAS
{'EAS'}
Total variants:  3171
Intronic:  2518
UTR3:  99
UTR5:  43
Total exonic:  285
  Synonymous:  121
  Nonsynonymous:  164
nonsynonymous_lof:  174


WORKING ON: CAH
{'CAH'}
Total variants:  3090
Intronic:  2467
UTR3:  87
UTR5:  31
Total exonic:  260
  Synonymous:  121
  Nonsynonymous:  139
nonsynonymous_lof:  146




ALL variants

assoc

glm

ASSOC

In [70]:
#Run case-control analysis using plink assoc for all variants, not adjusting for any covariates
ancestries = ['AAC', 'AFR', 'AJ', 'AMR', 'CAS', 'EAS', 'EUR', 'FIN', 'MDE', 'SAS', 'CAH']

for ancestry in ancestries:

    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    
    ! /home/jupyter/tools/plink \
    --bfile {WORK_DIR}/{ancestry}_TMEM175 \
    --keep {WORK_DIR}/{ancestry}.samplestoKeep \
    --assoc \
    --allow-no-sex \
    --ci 0.95 \
    --maf 0.01 \
    --out {WORK_DIR}/{ancestry}_TMEM175.all

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175.all.log.
Options in effect:
  --allow-no-sex
  --assoc
  --bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175
  --ci 0.95
  --keep /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC.samplestoKeep
  --maf 0.01
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175.all

52223 MB RAM detected; reserving 26111 MB for main workspace.
3449 variants loaded from .bim file.
1111 people (455 males, 656 females) loaded from .fam.
1086 phenotype values loaded from .fam.
--keep: 1111 people remaining.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 1111 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243

In [9]:
# test if we can see TMEM175 p.M393T when set maf<0.01 in AFR
WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_AFR'

! /home/jupyter/tools/plink \
--bfile {WORK_DIR}/AFR_TMEM175 \
--keep {WORK_DIR}/AFR.samplestoKeep \
--allow-no-sex \
--max-maf 0.01 \
--ci 0.95 \
--assoc \
--out {WORK_DIR}/AFR_TMEM175.all.test

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.all.test.log.
Options in effect:
  --allow-no-sex
  --assoc
  --bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175
  --ci 0.95
  --keep /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR.samplestoKeep
  --max-maf 0.01
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.all.test

12984 MB RAM detected; reserving 6492 MB for main workspace.
3853 variants loaded from .bim file.
2643 people (1480 males, 1163 females) loaded from .fam.
2621 phenotype values loaded from .fam.
--keep: 2643 people remaining.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 2643 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353

In [10]:
WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_AFR'
freq = pd.read_csv(f'{WORK_DIR}/AFR_TMEM175.all.test.assoc', sep='\s+')
freq.to_csv(f'{WORK_DIR}/AFR.all_nonadj.test.csv')

In [81]:
ancestries = ['AAC', 'AFR', 'AJ', 'AMR', 'CAS', 'EAS', 'EUR', 'FIN', 'MDE', 'SAS', 'CAH']

for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    print(f'WORKING ON: {ancestry}')
    
    #Look at assoc results, only variants with nominal p-value < 0.05
    freq = pd.read_csv(f'{WORK_DIR}/{ancestry}_TMEM175.all.assoc', sep='\s+')
    sig_all_nonadj = freq[freq['P']<0.05]
    
    print(f'Variants with p-value < 0.05: {sig_all_nonadj.shape}')
    
    #Save FREQ to csv
    freq.to_csv(f'{WORK_DIR}/{ancestry}.all_nonadj.csv')

WORKING ON: AAC
Variants with p-value < 0.05: (26, 13)
WORKING ON: AFR
Variants with p-value < 0.05: (90, 13)
WORKING ON: AJ
Variants with p-value < 0.05: (115, 13)
WORKING ON: AMR
Variants with p-value < 0.05: (7, 13)
WORKING ON: CAS
Variants with p-value < 0.05: (38, 13)
WORKING ON: EAS
Variants with p-value < 0.05: (200, 13)
WORKING ON: EUR
Variants with p-value < 0.05: (202, 13)
WORKING ON: FIN
Variants with p-value < 0.05: (9, 13)
WORKING ON: MDE
Variants with p-value < 0.05: (84, 13)
WORKING ON: SAS
Variants with p-value < 0.05: (24, 13)
WORKING ON: CAH
Variants with p-value < 0.05: (54, 13)


In [82]:
#Run case-control analysis with covariates
ancestries = ['AAC', 'AFR', 'AJ', 'AMR', 'CAS', 'EAS', 'EUR', 'FIN', 'MDE', 'SAS', 'CAH']

for ancestry in ancestries:

    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'

    ! /home/jupyter/tools/plink2 \
    --bfile {WORK_DIR}/{ancestry}_TMEM175 \
    --keep {WORK_DIR}/{ancestry}.samplestoKeep \
    --allow-no-sex \
    --maf 0.01 \
    --ci 0.95 \
    --glm \
    --covar {WORK_DIR}/{ancestry}_covariate_file.txt \
    --covar-name SEX,AGE,PC1,PC2,PC3,PC4,PC5 \
    --covar-variance-standardize \
    --neg9-pheno-really-missing \
    --out {WORK_DIR}/{ancestry}_TMEM175.all_adj

PLINK v2.00a6LM 64-bit Intel (4 Jul 2024)      www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175.all_adj.log.
Options in effect:
  --allow-no-sex
  --bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175
  --ci 0.95
  --covar /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_covariate_file.txt
  --covar-name SEX,AGE,PC1,PC2,PC3,PC4,PC5
  --covar-variance-standardize
  --glm
  --keep /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC.samplestoKeep
  --maf 0.01
  --neg9-pheno-really-missing
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175.all_adj

Start time: Wed Jul 31 11:04:25 2024
Note: --allow-no-sex no longer has any effect.  (Missing-sex samples are
automatically excluded from association analysis when sex is a covariate, and
treated normally otherwise.)
52223 MiB RAM detected, ~50540 available

In [6]:
# test if we can see TMEM175 p.M393T when set maf<0.01 in AFR
WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_AFR'

! /home/jupyter/tools/plink2 \
--bfile {WORK_DIR}/AFR_TMEM175 \
--keep {WORK_DIR}/AFR.samplestoKeep \
--allow-no-sex \
--max-maf 0.01 \
--ci 0.95 \
--glm \
--covar {WORK_DIR}/AFR_covariate_file.txt \
--covar-name SEX,AGE,PC1,PC2,PC3,PC4,PC5 \
--covar-variance-standardize \
--neg9-pheno-really-missing \
--out {WORK_DIR}/AFR_TMEM175.all_adj.test

PLINK v2.0.0-a.6.0LM 64-bit Intel (11 Nov 2024)    cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.all_adj.test.log.
Options in effect:
  --allow-no-sex
  --bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175
  --ci 0.95
  --covar /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_covariate_file.txt
  --covar-name SEX,AGE,PC1,PC2,PC3,PC4,PC5
  --covar-variance-standardize
  --glm
  --keep /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR.samplestoKeep
  --max-maf 0.01
  --neg9-pheno-really-missing
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AFR/AFR_TMEM175.all_adj.test

Start time: Wed Mar  5 10:28:20 2025
Note: --allow-no-sex no longer has any effect.  (Missing-sex samples are
automatically excluded from association analysis when sex is a covariate, and
treated normally otherwise.)
12984 MiB RAM detected, ~1

In [7]:
#Read in plink glm results
WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_AFR'
assoc = pd.read_csv(f'{WORK_DIR}/AFR_TMEM175.all_adj.test.PHENO1.glm.logistic.hybrid', delim_whitespace=True)
assoc_add = assoc[assoc['TEST']=="ADD"]
assoc_add.to_csv(f'{WORK_DIR}/AFR.all_adj.test.csv')



In [83]:
#Process results from plink glm analysis including covariates
ancestries = ['AAC', 'AFR', 'AJ', 'AMR', 'CAS', 'EAS', 'EUR', 'FIN', 'MDE', 'SAS', 'CAH']

for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    print(f'WORKING ON: {ancestry}')
    
    #Read in plink glm results
    assoc = pd.read_csv(f'{WORK_DIR}/{ancestry}_TMEM175.all_adj.PHENO1.glm.logistic.hybrid', delim_whitespace=True)
    
    #Filter for additive test only - this is the variant results
    assoc_add = assoc[assoc['TEST']=="ADD"]
    
    #Check if there are any significant (p < 0.05) variants
    significant = assoc_add[assoc_add['P']<0.05]

    print(f'There are {len(significant)} variants with p-value < 0.05 in glm')
    
    #Check if there are any significant (p < 0.05) variants
    GWsignificant = assoc_add[assoc_add['P']<5e-8]

    print(f'There are {len(GWsignificant)} variants with p-value < 5e-8 in glm')
    
    #Save assoc_add to csv
    assoc_add.to_csv(f'{WORK_DIR}/{ancestry}.all_adj.csv')

WORKING ON: AAC
There are 24 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm




WORKING ON: AFR
There are 46 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm




WORKING ON: AJ
There are 121 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm
WORKING ON: AMR




There are 7 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm
WORKING ON: CAS
There are 26 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm




WORKING ON: EAS
There are 44 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm
WORKING ON: EUR
There are 148 variants with p-value < 0.05 in glm
There are 40 variants with p-value < 5e-8 in glm
WORKING ON: FIN
There are 15 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm




WORKING ON: MDE
There are 124 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm
WORKING ON: SAS




There are 42 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm
WORKING ON: CAH
There are 27 variants with p-value < 0.05 in glm
There are 0 variants with p-value < 5e-8 in glm




Burden Analyses using RVTests

In [84]:
ancestries = ['AAC', 'AFR', 'AJ', 'AMR', 'CAS', 'EAS', 'EUR', 'FIN', 'MDE', 'SAS', 'CAH']
variant_classes = ['exonic', 'nonsynonymous_lof']

#Loop over all the ancestries and the 2 variant classes - run rvtests for all coding and missense variants
for ancestry in ancestries:
    for variant_class in variant_classes:
        
        WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
        
        # Print the command to be executed (for debugging purposes)
        print(f'Running plink to extract {variant_class} variants for ancestry: {ancestry}')
        
        #Extract relevant variants
        ! /home/jupyter/tools/plink2 \
        --pfile {REL7_PATH}/imputed_genotypes/{ancestry}/chr4_{ancestry}_release7_vwb \
        --keep {WORK_DIR}/{ancestry}.samplestoKeep \
        --extract range {WORK_DIR}/{ancestry}_TMEM175.{variant_class}.variantstoKeep.txt \
        --recode vcf-iid \
        --out {WORK_DIR}/{ancestry}_TMEM175.{variant_class}
        
        # Print the command to be executed (for debugging purposes)
        print(f'Running bgzip and tabix for {variant_class} variants for ancestry: {ancestry}')
        
        ## Bgzip and Tabix (zip and index the file)
        ! bgzip -f {WORK_DIR}/{ancestry}_TMEM175.{variant_class}.vcf
        ! tabix -f -p vcf {WORK_DIR}/{ancestry}_TMEM175.{variant_class}.vcf.gz

Running plink to extract exonic variants for ancestry: AAC
PLINK v2.00a6LM 64-bit Intel (4 Jul 2024)      www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175.exonic.log.
Options in effect:
  --export vcf-iid
  --extract range /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175.exonic.variantstoKeep.txt
  --keep /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC.samplestoKeep
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175.exonic
  --pfile /home/jupyter/workspace/gp2_tier2_eu_release7_30042024/imputed_genotypes/AAC/chr4_AAC_release7_vwb

Start time: Wed Jul 31 11:09:28 2024
Note: --export 'vcf-iid' modifier is deprecated.  Use 'vcf' + 'id-paste=iid'.
52223 MiB RAM detected, ~50560 available; reserving 26111 MiB for main
workspace.
Using up to 8 compute threads.
1111 samples (656 females, 455 males; 1111 founder

In [4]:
#Run RVtests
ancestries = ['AAC', 'AFR', 'AJ', 'AMR', 'CAS', 'EAS', 'EUR', 'FIN', 'MDE', 'SAS', 'CAH']
variant_classes = ['exonic', 'nonsynonymous_lof']

for ancestry in ancestries:
    for variant_class in variant_classes:
        
        WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
        
        # Print the command to be executed (for debugging purposes)
        print(f'Running RVtests for {variant_class} variants for ancestry: {ancestry}')
        
        ## RVtests with covariates 
        #Make sure the pheno and covariate file starts with the first 5 columsn: fid, iid, fatid, matid, sex
        #The pheno-name flag only works when the pheno/covar file is structured properly
        ! /home/jupyter/tools/rvtests/executable/rvtest --noweb --hide-covar \
        --out {WORK_DIR}/{ancestry}_TMEM175.burden.{variant_class} \
        --kernel skato \
        --inVcf {WORK_DIR}/{ancestry}_TMEM175.{variant_class}.vcf.gz \
        --pheno {WORK_DIR}/{ancestry}_covariate_file.txt \
        --pheno-name PHENO \
        --gene TMEM175 \
        --geneFile ~/workspace/ws_files/TMEM175/refFlat.txt \
        --covar {WORK_DIR}/{ancestry}_covariate_file.txt \
        --covar-name SEX,AGE,PC1,PC2,PC3,PC4,PC5 \
        --freqUpper 0.01
# --burden cmc,zeggini,mb,fp,cmcWald --kernel skat,skato \

Running RVtests for exonic variants for ancestry: AAC
Thank you for using rvtests (version: 20190205, git: c86e589efef15382603300dc7f4c3394c82d69b8)
  For documentations, refer to http://zhanxw.github.io/rvtests/
  For questions and comments, plase send to Xiaowei Zhan <zhanxw@umich.edu>
  For bugs and feature requests, please submit at: https://github.com/zhanxw/rvtests/issues

The following parameters are available.  Ones with "[]" are in effect:

Available Options
      Basic Input/Output:
                          --inVcf [/home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175.exonic.vcf.gz]
                          --inBgen [], --inBgenSample [], --inKgg []
                          --out [/home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_TMEM175.burden.exonic]
                          --outputRaw
       Specify Covariate:
                          --covar [/home/jupyter/workspace/ws_files/TMEM175/TMEM175_AAC/AAC_covariate_file.txt]
                          --c

EUR

In [5]:
WORK_DIR = "~/workspace/ws_files/TMEM175"

In [6]:
#Check EUR all_coding variant results
## next step--do burden test
! cat {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	323773	0.9	0.0304464


In [7]:
#Make sure the pheno and covariate file starts with the first 5 columsn: fid, iid, fatid, matid, sex
        #The pheno-name flag only works when the pheno/covar file is structured properly
! /home/jupyter/tools/rvtests/executable/rvtest --noweb --hide-covar \
--out {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.burdenTEST.exonic \
--burden cmc,zeggini,mb,fp,cmcWald \
--inVcf {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.exonic.vcf.gz \
--pheno {WORK_DIR}/TMEM175_EUR/EUR_covariate_file.txt \
--pheno-name PHENO \
--gene TMEM175 \
--geneFile ~/workspace/ws_files/TMEM175/refFlat.txt \
--covar {WORK_DIR}/TMEM175_EUR/EUR_covariate_file.txt \
--covar-name SEX,AGE,PC1,PC2,PC3,PC4,PC5 \
--freqUpper 0.01

Thank you for using rvtests (version: 20190205, git: c86e589efef15382603300dc7f4c3394c82d69b8)
  For documentations, refer to http://zhanxw.github.io/rvtests/
  For questions and comments, plase send to Xiaowei Zhan <zhanxw@umich.edu>
  For bugs and feature requests, please submit at: https://github.com/zhanxw/rvtests/issues

The following parameters are available.  Ones with "[]" are in effect:

Available Options
      Basic Input/Output:
                          --inVcf [/home/jupyter/workspace/ws_files/TMEM175/TMEM175_EUR/EUR_TMEM175.exonic.vcf.gz]
                          --inBgen [], --inBgenSample [], --inKgg []
                          --out [/home/jupyter/workspace/ws_files/TMEM175/TMEM175_EUR/EUR_TMEM175.burdenTEST.exonic]
                          --outputRaw
       Specify Covariate:
                          --covar [/home/jupyter/workspace/ws_files/TMEM175/TMEM175_EUR/EUR_covariate_file.txt]
                          --covar-name [SEX,AGE,PC1,PC2,PC3,PC4,PC5], --sex
   

In [8]:
# see bureden test in exonic in EUR
! cat {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.burdenTEST.exonic.CMCWald.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	NonRefSite	Beta	SE	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	1500	0.0708554	0.0620621	0.253585
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	1500	-0.576489	0.0316899	6.02168e-74
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	1500	0.0309305	0.00133732	2.37789e-118
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	1500	-0.0224515	0.0086344	0.00931601
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	1500	0.0140758	0.00750917	0.0608638
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:9324

In [12]:
! cat {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.burdenTEST.exonic.CMC.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	NonRefSite	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	1500	0.253509


In [9]:
! cat {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.burdenTEST.exonic.Fp.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	0.0365734


In [10]:
! cat {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.burdenTEST.exonic.MadsonBrowning.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	10000	6504	6.10068	1000	0	0.153752


In [11]:
! cat {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.burdenTEST.exonic.Zeggini.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	174	109	0.227047


In [5]:
#Check EUR nonsynonymous_lof variant results
! cat {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	105	65	107233	0.0560423	10000	10000	107233	537	0	0.0537


In [13]:
#Check EUR nonsynonymous_lof variant results
! cat {WORK_DIR}/TMEM175_EUR/EUR_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	20918	105	65	53616.4	0	0.101898


AAC

In [7]:
#Check AAC all_coding variant results
! cat {WORK_DIR}/TMEM175_AAC/AAC_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	925	57	45	5938.41	0.692352	10000	1323	5938.41	1000	0	0.755858


In [8]:
#Check AAC all_coding variant results
! cat {WORK_DIR}/TMEM175_AAC/AAC_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	925	57	45	2977.49	1	0.5655


In [9]:
#Check AAC all_coding variant results
! cat {WORK_DIR}/TMEM175_AAC/AAC_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	925	38	31	3451.67	0.742505	10000	1275	3451.67	1000	0	0.784314


In [10]:
#Check AAC all_coding variant results
! cat {WORK_DIR}/TMEM175_AAC/AAC_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	925	38	31	1792.92	1	0.607447


AFR

In [11]:
#Check AFR all_coding variant results
! cat {WORK_DIR}/TMEM175_AFR/AFR_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	1062	60	43	7846.37	0.311869	10000	2587	7846.37	1000	0	0.386548


In [12]:
#Check AFR all_coding variant results
! cat {WORK_DIR}/TMEM175_AFR/AFR_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	1062	60	43	3923.18	0	0.495811


In [13]:
#Check AFR all_coding variant results
! cat {WORK_DIR}/TMEM175_AFR/AFR_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	1062	38	27	6145.09	0.371636	10000	2581	6145.09	1000	0	0.387447


In [14]:
#Check AFR all_coding variant results
! cat {WORK_DIR}/TMEM175_AFR/AFR_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	1062	38	27	3072.54	0	0.56138


AJ

In [15]:
#Check AJ all_coding variant results
! cat {WORK_DIR}/TMEM175_AJ/AJ_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	1410	20	15	2660.25	0.548165	10000	1693	2660.25	1000	0	0.590667


In [16]:
#Check AJ all_coding variant results
! cat {WORK_DIR}/TMEM175_AJ/AJ_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	1410	20	15	594.277	1	0.740559


In [17]:
#Check AJ all_coding variant results
! cat {WORK_DIR}/TMEM175_AJ/AJ_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	1410	13	10	2189.49	0.52697	10000	1745	2189.49	1000	0	0.573066


In [18]:
#Check AJ all_coding variant results
! cat {WORK_DIR}/TMEM175_AJ/AJ_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	1410	13	10	1094.74	0	0.708875


AMR

In [19]:
#Check AMR all_coding variant results
! cat {WORK_DIR}/TMEM175_AMR/AMR_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	447	36	26	4759.54	0.274054	10000	4048	4759.54	1000	0	0.247036


In [20]:
#Check AMR all_coding variant results
! cat {WORK_DIR}/TMEM175_AMR/AMR_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	447	36	26	2379.77	0	0.433785


In [21]:
#Check AMR all_coding variant results
! cat {WORK_DIR}/TMEM175_AMR/AMR_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	447	18	14	3656.15	0.188299	10000	6542	3656.15	1000	0	0.152858


In [22]:
#Check AMR all_coding variant results
! cat {WORK_DIR}/TMEM175_AMR/AMR_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	447	18	14	1828.07	0	0.311882


CAH

In [23]:
#Check CAH all_coding variant results
! cat {WORK_DIR}/TMEM175_CAH/CAH_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	693	43	34	4011.03	0.742795	10000	1290	4011.03	1000	0	0.775194


In [24]:
#Check CAH all_coding variant results
! cat {WORK_DIR}/TMEM175_CAH/CAH_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	693	43	34	14226.6	1	0.0312151


In [25]:
#Check CAH all_coding variant results
! cat {WORK_DIR}/TMEM175_CAH/CAH_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	693	26	22	3545.62	0.531319	10000	1628	3545.62	1000	0	0.614251


In [26]:
#Check CAH all_coding variant results
! cat {WORK_DIR}/TMEM175_CAH/CAH_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	693	26	22	10667.7	1	0.0308928


CAS

In [27]:
#Check CAS all_coding variant results
! cat {WORK_DIR}/TMEM175_CAS/CAS_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	658	30	19	589.951	0.984523	10000	1000	589.951	1000	0	1


In [28]:
#Check CAS all_coding variant results
! cat {WORK_DIR}/TMEM175_CAS/CAS_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	658	30	19	447.27	1	0.767453


In [29]:
#Check CAS all_coding variant results
! cat {WORK_DIR}/TMEM175_CAS/CAS_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	658	22	11	445.823	0.237621	10000	4284	445.823	1000	0	0.233427


In [30]:
#Check CAS all_coding variant results
! cat {WORK_DIR}/TMEM175_CAS/CAS_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	658	22	11	222.912	0	0.351341


EAS

In [31]:
#Check EAS all_coding variant results
! cat {WORK_DIR}/TMEM175_EAS/EAS_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	2523	45	24	6681.36	0.298387	10000	3613	6681.36	1000	0	0.276778


In [32]:
#Check EAS all_coding variant results
! cat {WORK_DIR}/TMEM175_EAS/EAS_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	2523	45	24	3340.68	0	0.45024


In [33]:
#Check EAS all_coding variant results
! cat {WORK_DIR}/TMEM175_EAS/EAS_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	2523	31	15	7571.83	0.0863703	10000	10000	7571.83	648	0	0.0648


In [34]:
#Check EAS all_coding variant results
! cat {WORK_DIR}/TMEM175_EAS/EAS_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	2523	31	15	3785.91	0	0.141074


FIN

In [35]:
#Check FIN all_coding variant results
! cat {WORK_DIR}/TMEM175_FIN/FIN_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	40	2	0	NA	NA	NA	NA	NA	NA	NA	NA


In [36]:
#Check FIN all_coding variant results
! cat {WORK_DIR}/TMEM175_FIN/FIN_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	40	2	0	NA	NA	NA


In [37]:
#Check FIN all_coding variant results
! cat {WORK_DIR}/TMEM175_FIN/FIN_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	40	2	0	NA	NA	NA	NA	NA	NA	NA	NA


In [38]:
#Check FIN all_coding variant results
! cat {WORK_DIR}/TMEM175_FIN/FIN_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	40	2	0	NA	NA	NA


MDE

In [39]:
#Check MDE all_coding variant results
! cat {WORK_DIR}/TMEM175_MDE/MDE_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	379	30	13	234.147	0.708651	10000	1101	234.147	1000	0	0.908265


In [40]:
#Check MDE all_coding variant results
! cat {WORK_DIR}/TMEM175_MDE/MDE_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	379	30	13	149.403	1	0.681695


In [41]:
#Check MDE all_coding variant results
! cat {WORK_DIR}/TMEM175_MDE/MDE_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	379	15	6	58.3182	1	10000	1013	58.3182	1000	0	0.987167


In [42]:
#Check MDE all_coding variant results
! cat {WORK_DIR}/TMEM175_MDE/MDE_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	379	15	6	29.0677	1	0.839732


SAS

In [43]:
#Check SAS all_coding variant results
! cat {WORK_DIR}/TMEM175_SAS/SAS_TMEM175.burden.exonic.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	296	17	10	1810.68	0.10187	10000	10000	1810.68	430	0	0.043


In [44]:
#Check SAS all_coding variant results
! cat {WORK_DIR}/TMEM175_SAS/SAS_TMEM175.burden.exonic.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	296	17	10	2890.96	1	0.0183843


In [45]:
#Check SAS all_coding variant results
! cat {WORK_DIR}/TMEM175_SAS/SAS_TMEM175.burden.nonsynonymous_lof.Skat.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	Pvalue	NumPerm	ActualPerm	Stat	NumGreater	NumEqual	PermPvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	296	12	8	1859.12	0.0974177	10000	10000	1859.12	397	0	0.0397


In [46]:
#Check EUR all_coding variant results
! cat {WORK_DIR}/TMEM175_SAS/SAS_TMEM175.burden.nonsynonymous_lof.SkatO.assoc

Gene	RANGE	N_INFORMATIVE	NumVar	NumPolyVar	Q	rho	Pvalue
TMEM175	4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932459-958655,4:932386-958655,4:932459-958655	296	12	8	2650.87	1	0.0230414


change format of association file in EUR

In [7]:
#If you need to read in the sumstats into python - you can skip this is if your data is already in python
df = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/EUR.all_adj.csv')

In [8]:
df

Unnamed: 0.1,Unnamed: 0,#CHROM,POS,ID,REF,ALT,PROVISIONAL_REF?,A1,OMITTED,A1_FREQ,FIRTH?,TEST,OBS_CT,OR,LOG(OR)_SE,L95,U95,Z_STAT,P,ERRCODE
0,0,4,882530,chr4:882530:C:T,C,T,Y,T,C,0.164467,N,ADD,20837,1.034410,0.030398,0.974580,1.097910,1.112890,0.265757,.
1,8,4,882611,chr4:882611:T:C,T,C,Y,C,T,0.051185,N,ADD,20885,0.931946,0.050102,0.844780,1.028110,-1.406740,0.159504,.
2,16,4,883018,chr4:883018:G:A,G,A,Y,A,G,0.050599,N,ADD,20880,0.928749,0.050369,0.841441,1.025120,-1.467500,0.142240,.
3,24,4,883133,chr4:883133:G:A,G,A,Y,A,G,0.012734,N,ADD,20811,0.809012,0.096557,0.669523,0.977562,-2.194990,0.028164,.
4,32,4,883483,chr4:883483:G:A,G,A,Y,A,G,0.035357,N,ADD,20901,0.883241,0.058911,0.786926,0.991344,-2.107530,0.035071,.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,3136,4,1007954,chr4:1007954:T:C,T,C,Y,T,C,0.279743,N,ADD,18971,0.913461,0.025919,0.868217,0.961063,-3.492280,0.000479,.
393,3144,4,1008009,chr4:1008009:C:T,C,T,Y,C,T,0.444961,N,ADD,18814,0.949197,0.023666,0.906174,0.994262,-2.203110,0.027587,.
394,3152,4,1008209,chr4:1008209:C:T,C,T,Y,T,C,0.036468,N,ADD,20717,1.002060,0.059756,0.891315,1.126570,0.034499,0.972480,.
395,3160,4,1008337,chr4:1008337:C:T,C,T,Y,C,T,0.279831,N,ADD,18965,0.913397,0.025912,0.868167,0.960983,-3.495880,0.000473,.


In [9]:
#First just select relevant columns: chromosome, bp position and p-value
export_ldassoc = df[['#CHROM', 'POS', 'P']].copy()

In [10]:
export_ldassoc

Unnamed: 0,#CHROM,POS,P
0,4,882530,0.265757
1,4,882611,0.159504
2,4,883018,0.142240
3,4,883133,0.028164
4,4,883483,0.035071
...,...,...,...
392,4,1007954,0.000479
393,4,1008009,0.027587
394,4,1008209,0.972480
395,4,1008337,0.000473


In [11]:
#Rename the #CHROM column to remove the hashtag as I think this might be confusing LDassoc
export_ldassoc = export_ldassoc.rename(columns={'#CHROM': 'CHROM'}) 

In [12]:
export_ldassoc

Unnamed: 0,CHROM,POS,P
0,4,882530,0.265757
1,4,882611,0.159504
2,4,883018,0.142240
3,4,883133,0.028164
4,4,883483,0.035071
...,...,...,...
392,4,1007954,0.000479
393,4,1008009,0.027587
394,4,1008209,0.972480
395,4,1008337,0.000473


In [13]:
#Then export as a tab-separated, not comma-separated file

export_ldassoc.to_csv('EUR.all_adj.formatted.tab', sep = '\t', index=False)

Person and conditional analysis with glm (rs3755958)

In [15]:
%load_ext rpy2.ipython

In [7]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_EUR/EUR_TMEM175 \
--chr 4 \
--from-bp 958159 \
--to-bp 958159 \
--recode A \
--out {WORK_DIR}/TMEM175_EUR/TMEM175_rs34311866_nalls \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/TMEM175_rs34311866_nalls.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175
  --chr 4
  --double-id
  --from-bp 958159
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/TMEM175_rs34311866_nalls
  --recode A
  --to-bp 958159

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 9504 variants loaded from .bim file.
38839 people (23296 males, 15543 females) loaded from .fam.
30412 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 38839 founders and 0 nonfounders present.
Calculating allele frequencies... 1011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697

In [8]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_EUR/EUR_TMEM175 \
--chr 4 \
--from-bp 969254 \
--to-bp 969254 \
--recode A \
--out {WORK_DIR}/TMEM175_EUR/TMEM175_rs3755958 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/TMEM175_rs3755958.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175
  --chr 4
  --double-id
  --from-bp 969254
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/TMEM175_rs3755958
  --recode A
  --to-bp 969254

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 9504 variants loaded from .bim file.
38839 people (23296 males, 15543 females) loaded from .fam.
30412 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 38839 founders and 0 nonfounders present.
Calculating allele frequencies... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767

In [9]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/TMEM175_rs34311866_nalls.raw',sep=" ")
rs3755958 = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/TMEM175_rs3755958.raw',sep=" ")

In [10]:
total = pd.merge(rs34311866, rs3755958, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:969254:C:A_A
0,0,APGS_000002_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
1,0,APGS_000003_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
2,0,APGS_000004_s1,0,0,2,2,1.0,0,0,0,2,2,0.0
3,0,APGS_000008_s1,0,0,1,2,1.0,0,0,0,1,2,1.0
4,0,APGS_000011_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
38834,0,YMS_000043_s1,0,0,1,1,0.0,0,0,0,1,1,0.0
38835,0,YMS_000044_s1,0,0,1,2,,0,0,0,1,2,0.0
38836,0,YMS_000048_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0
38837,0,YMS_000050_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0


In [13]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/EUR_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,1.0,,2.0,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,1.0,,2.0,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,2.0,,2.0,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,1.0,,2.0,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,1.0,,2.0,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,2.0,60.0,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,1.0,67.0,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,1.0,69.0,2.0,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,,,,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [14]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:969254:C:A_A,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000008_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-33.303205,-35.527912,-10.818937,-12.897920,6.851688
4,0,APGS_000011_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-35.698278,-34.940878,-11.376421,-5.590934,24.578940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38833,0,YMS_000043_s1,0,0,1,1,0.0,0,0,0,1,1,0.0,0,0,0,1.0,70.0,1.0,-33.608572,-35.293481,-11.033493,-9.916257,24.680699
38834,0,YMS_000044_s1,0,0,1,2,,0,0,0,1,2,0.0,0,0,0,1.0,73.0,2.0,-32.287712,-35.705842,-11.819036,-6.511252,18.970359
38835,0,YMS_000048_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0,0,0,0,1.0,76.0,,-34.569818,-33.847077,-6.792226,-3.816187,21.473458
38836,0,YMS_000050_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0,0,0,0,1.0,67.0,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555


In [15]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:969254:C:A_A,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000008_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-33.303205,-35.527912,-10.818937,-12.897920,6.851688
4,0,APGS_000011_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-35.698278,-34.940878,-11.376421,-5.590934,24.578940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38830,0,YMS_000034_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,58.0,2.0,-34.720268,-33.255119,-10.532694,-6.846457,23.835050
38831,0,YMS_000037_s1,0,0,2,1,1.0,0,0,0,2,1,1.0,0,0,0,2.0,54.0,1.0,-33.437058,-36.858277,-11.974232,-9.557805,22.390045
38832,0,YMS_000039_s1,0,0,1,2,2.0,0,0,0,1,2,1.0,0,0,0,1.0,62.0,2.0,-33.216739,-40.302510,-11.191126,-9.715245,22.341078
38833,0,YMS_000043_s1,0,0,1,1,0.0,0,0,0,1,1,0.0,0,0,0,1.0,70.0,1.0,-33.608572,-35.293481,-11.033493,-9.916257,24.680699


In [16]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:969254:C:A_A", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [18]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs3755958","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs3755958,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,APGS_000002_s1,2,0.0,0.0,1.0,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,APGS_000003_s1,2,0.0,0.0,1.0,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,APGS_000004_s1,2,1.0,0.0,2.0,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,APGS_000008_s1,2,1.0,1.0,1.0,,-33.303205,-35.527912,-10.818937,-12.897920,6.851688
4,APGS_000011_s1,2,1.0,0.0,1.0,,-35.698278,-34.940878,-11.376421,-5.590934,24.578940
...,...,...,...,...,...,...,...,...,...,...,...
38830,YMS_000034_s1,2,0.0,0.0,2.0,58.0,-34.720268,-33.255119,-10.532694,-6.846457,23.835050
38831,YMS_000037_s1,1,1.0,1.0,2.0,54.0,-33.437058,-36.858277,-11.974232,-9.557805,22.390045
38832,YMS_000039_s1,2,2.0,1.0,1.0,62.0,-33.216739,-40.302510,-11.191126,-9.715245,22.341078
38833,YMS_000043_s1,1,0.0,0.0,1.0,70.0,-33.608572,-35.293481,-11.033493,-9.916257,24.680699


In [19]:
total.to_csv(f'{WORK_DIR}/TMEM175_EUR/TMEM175_rs34311866_nalls_rs3755958.csv')

In [20]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_EUR/TMEM175_rs34311866_nalls_rs3755958.csv")

In [21]:
%%R
head(total)

  X            IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs3755958 SEX AGE
1 0 APGS_000002_s1         2                        0                 0   1  NA
2 1 APGS_000003_s1         2                        0                 0   1  NA
3 2 APGS_000004_s1         2                        1                 0   2  NA
4 3 APGS_000008_s1         2                        1                 1   1  NA
5 4 APGS_000011_s1         2                        1                 0   1  NA
6 5 APGS_000016_s1         2                        2                 2  NA  NA
        PC1       PC2        PC3        PC4       PC5
1 -29.48330 -38.47300  -9.398904 -10.842865 25.744648
2 -33.06896 -34.28886 -11.116951  -8.056146 22.924043
3 -28.20444 -30.96948 -15.883093  -7.004351 18.647007
4 -33.30320 -35.52791 -10.818937 -12.897920  6.851688
5 -35.69828 -34.94088 -11.376421  -5.590934 24.578940
6 -30.73116 -35.70957  -8.981066  -8.204220 21.164661


In [22]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs3755958, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs3755958, data = total)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.0843 -0.1797 -0.1797 -0.0843  1.8202 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)       0.179750   0.002513   71.52   <2e-16 ***
TMEM175_rs3755958 0.904552   0.004751  190.38   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3814 on 29621 degrees of freedom
  (788 observations deleted due to missingness)
Multiple R-squared:  0.5503,	Adjusted R-squared:  0.5503 
F-statistic: 3.624e+04 on 1 and 29621 DF,  p-value: < 2.2e-16



In [24]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs3755958 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs3755958 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               1.4640748  0.0670080  21.849  < 2e-16 ***
TMEM175_rs34311866_nalls  0.0444886  0.0080195   5.548 2.93e-08 ***
TMEM175_rs3755958         0.0062393  0.0097750   0.638 0.523293    
SEX                      -0.1143468  0.0062396 -18.326  < 2e-16 ***
AGE                       0.0062552  0.0002632  23.769  < 2e-16 ***
PC1                      -0.0033226  0.0016881  -1.968 0.049046 *  
PC2                       0.0024482  0.0014562   1.681 0.092733 .  
PC3                      -0.0032285  0.0015289  -2.112 0.034733 *  
PC4                       0.0023981  0.0015756   1.522 0.128014    
PC5                      -0.0023372  0.0006426  -3.637 0.000276 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gau

Person and conditional analysis with glm (rs11731377)

In [5]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_EUR/EUR_TMEM175 \
--chr 4 \
--from-bp 977478 \
--to-bp 977478 \
--recode A \
--out {WORK_DIR}/TMEM175_EUR/TMEM175_rs11731377 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/TMEM175_rs11731377.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175
  --chr 4
  --double-id
  --from-bp 977478
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/TMEM175_rs11731377
  --recode A
  --to-bp 977478

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 9504 variants loaded from .bim file.
38839 people (23296 males, 15543 females) loaded from .fam.
30412 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 38839 founders and 0 nonfounders present.
Calculating allele frequencies... 1011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757

In [6]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/TMEM175_rs34311866_nalls.raw',sep=" ")
rs11731377 = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/TMEM175_rs11731377.raw',sep=" ")

In [7]:
total = pd.merge(rs34311866, rs11731377, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:977478:T:G_G
0,0,APGS_000002_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
1,0,APGS_000003_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
2,0,APGS_000004_s1,0,0,2,2,1.0,0,0,0,2,2,0.0
3,0,APGS_000008_s1,0,0,1,2,1.0,0,0,0,1,2,1.0
4,0,APGS_000011_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
38834,0,YMS_000043_s1,0,0,1,1,0.0,0,0,0,1,1,0.0
38835,0,YMS_000044_s1,0,0,1,2,,0,0,0,1,2,0.0
38836,0,YMS_000048_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0
38837,0,YMS_000050_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0


In [8]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/EUR_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,1.0,,2.0,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,1.0,,2.0,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,2.0,,2.0,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,1.0,,2.0,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,1.0,,2.0,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,2.0,60.0,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,1.0,67.0,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,1.0,69.0,2.0,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,,,,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [9]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:977478:T:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000008_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-33.303205,-35.527912,-10.818937,-12.897920,6.851688
4,0,APGS_000011_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-35.698278,-34.940878,-11.376421,-5.590934,24.578940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38833,0,YMS_000043_s1,0,0,1,1,0.0,0,0,0,1,1,0.0,0,0,0,1.0,70.0,1.0,-33.608572,-35.293481,-11.033493,-9.916257,24.680699
38834,0,YMS_000044_s1,0,0,1,2,,0,0,0,1,2,0.0,0,0,0,1.0,73.0,2.0,-32.287712,-35.705842,-11.819036,-6.511252,18.970359
38835,0,YMS_000048_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0,0,0,0,1.0,76.0,,-34.569818,-33.847077,-6.792226,-3.816187,21.473458
38836,0,YMS_000050_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0,0,0,0,1.0,67.0,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555


In [10]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:977478:T:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000008_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-33.303205,-35.527912,-10.818937,-12.897920,6.851688
4,0,APGS_000011_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-35.698278,-34.940878,-11.376421,-5.590934,24.578940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38830,0,YMS_000034_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,58.0,2.0,-34.720268,-33.255119,-10.532694,-6.846457,23.835050
38831,0,YMS_000037_s1,0,0,2,1,1.0,0,0,0,2,1,1.0,0,0,0,2.0,54.0,1.0,-33.437058,-36.858277,-11.974232,-9.557805,22.390045
38832,0,YMS_000039_s1,0,0,1,2,2.0,0,0,0,1,2,1.0,0,0,0,1.0,62.0,2.0,-33.216739,-40.302510,-11.191126,-9.715245,22.341078
38833,0,YMS_000043_s1,0,0,1,1,0.0,0,0,0,1,1,0.0,0,0,0,1.0,70.0,1.0,-33.608572,-35.293481,-11.033493,-9.916257,24.680699


In [11]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:977478:T:G_G", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [12]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs11731377","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs11731377,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,APGS_000002_s1,2,0.0,0.0,1.0,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,APGS_000003_s1,2,0.0,0.0,1.0,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,APGS_000004_s1,2,1.0,0.0,2.0,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,APGS_000008_s1,2,1.0,1.0,1.0,,-33.303205,-35.527912,-10.818937,-12.897920,6.851688
4,APGS_000011_s1,2,1.0,0.0,1.0,,-35.698278,-34.940878,-11.376421,-5.590934,24.578940
...,...,...,...,...,...,...,...,...,...,...,...
38830,YMS_000034_s1,2,0.0,0.0,2.0,58.0,-34.720268,-33.255119,-10.532694,-6.846457,23.835050
38831,YMS_000037_s1,1,1.0,1.0,2.0,54.0,-33.437058,-36.858277,-11.974232,-9.557805,22.390045
38832,YMS_000039_s1,2,2.0,1.0,1.0,62.0,-33.216739,-40.302510,-11.191126,-9.715245,22.341078
38833,YMS_000043_s1,1,0.0,0.0,1.0,70.0,-33.608572,-35.293481,-11.033493,-9.916257,24.680699


In [13]:
total.to_csv(f'{WORK_DIR}/TMEM175_EUR/TMEM175_rs34311866_nalls_rs11731377.csv')

In [16]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_EUR/TMEM175_rs34311866_nalls_rs11731377.csv")

In [17]:
%%R
head(total)

  X            IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs11731377 SEX
1 0 APGS_000002_s1         2                        0                  0   1
2 1 APGS_000003_s1         2                        0                  0   1
3 2 APGS_000004_s1         2                        1                  0   2
4 3 APGS_000008_s1         2                        1                  1   1
5 4 APGS_000011_s1         2                        1                  0   1
6 5 APGS_000016_s1         2                        2                  2  NA
  AGE       PC1       PC2        PC3        PC4       PC5
1  NA -29.48330 -38.47300  -9.398904 -10.842865 25.744648
2  NA -33.06896 -34.28886 -11.116951  -8.056146 22.924043
3  NA -28.20444 -30.96948 -15.883093  -7.004351 18.647007
4  NA -33.30320 -35.52791 -10.818937 -12.897920  6.851688
5  NA -35.69828 -34.94088 -11.376421  -5.590934 24.578940
6  NA -30.73116 -35.70957  -8.981066  -8.204220 21.164661


In [18]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs11731377, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs11731377, data = total)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.5365 -0.1908 -0.1908  0.1363  1.8092 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)        0.190852   0.003092   61.73   <2e-16 ***
TMEM175_rs11731377 0.672804   0.005106  131.78   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4513 on 29531 degrees of freedom
  (878 observations deleted due to missingness)
Multiple R-squared:  0.3703,	Adjusted R-squared:  0.3703 
F-statistic: 1.737e+04 on 1 and 29531 DF,  p-value: < 2.2e-16



In [19]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs11731377 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs11731377 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               1.4639329  0.0671373  21.805  < 2e-16 ***
TMEM175_rs34311866_nalls  0.0478139  0.0067679   7.065 1.66e-12 ***
TMEM175_rs11731377        0.0004801  0.0074602   0.064 0.948688    
SEX                      -0.1141855  0.0062491 -18.272  < 2e-16 ***
AGE                       0.0062403  0.0002636  23.671  < 2e-16 ***
PC1                      -0.0034063  0.0016915  -2.014 0.044049 *  
PC2                       0.0025310  0.0014578   1.736 0.082553 .  
PC3                      -0.0031779  0.0015307  -2.076 0.037903 *  
PC4                       0.0022741  0.0015782   1.441 0.149605    
PC5                      -0.0023100  0.0006439  -3.588 0.000335 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for ga

Person and conditional analysis with glm(rs6599391)

In [20]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_EUR/EUR_TMEM175 \
--chr 4 \
--from-bp 985391 \
--to-bp 985391 \
--recode A \
--out {WORK_DIR}/TMEM175_EUR/TMEM175_rs6599391 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/TMEM175_rs6599391.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175
  --chr 4
  --double-id
  --from-bp 985391
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/TMEM175_rs6599391
  --recode A
  --to-bp 985391

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 9504 variants loaded from .bim file.
38839 people (23296 males, 15543 females) loaded from .fam.
30412 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 38839 founders and 0 nonfounders present.
Calculating allele frequencies... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767

In [21]:
rs6599391 = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/TMEM175_rs6599391.raw',sep=" ")

In [22]:
total = pd.merge(rs34311866, rs6599391, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:985391:T:G_G
0,0,APGS_000002_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
1,0,APGS_000003_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
2,0,APGS_000004_s1,0,0,2,2,1.0,0,0,0,2,2,0.0
3,0,APGS_000008_s1,0,0,1,2,1.0,0,0,0,1,2,1.0
4,0,APGS_000011_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
38834,0,YMS_000043_s1,0,0,1,1,0.0,0,0,0,1,1,0.0
38835,0,YMS_000044_s1,0,0,1,2,,0,0,0,1,2,0.0
38836,0,YMS_000048_s1,0,0,1,-9,0.0,0,0,0,1,-9,1.0
38837,0,YMS_000050_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0


In [23]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:985391:T:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000008_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-33.303205,-35.527912,-10.818937,-12.897920,6.851688
4,0,APGS_000011_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-35.698278,-34.940878,-11.376421,-5.590934,24.578940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38833,0,YMS_000043_s1,0,0,1,1,0.0,0,0,0,1,1,0.0,0,0,0,1.0,70.0,1.0,-33.608572,-35.293481,-11.033493,-9.916257,24.680699
38834,0,YMS_000044_s1,0,0,1,2,,0,0,0,1,2,0.0,0,0,0,1.0,73.0,2.0,-32.287712,-35.705842,-11.819036,-6.511252,18.970359
38835,0,YMS_000048_s1,0,0,1,-9,0.0,0,0,0,1,-9,1.0,0,0,0,1.0,76.0,,-34.569818,-33.847077,-6.792226,-3.816187,21.473458
38836,0,YMS_000050_s1,0,0,1,-9,0.0,0,0,0,1,-9,0.0,0,0,0,1.0,67.0,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555


In [24]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:985391:T:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000008_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-33.303205,-35.527912,-10.818937,-12.897920,6.851688
4,0,APGS_000011_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-35.698278,-34.940878,-11.376421,-5.590934,24.578940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38830,0,YMS_000034_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,58.0,2.0,-34.720268,-33.255119,-10.532694,-6.846457,23.835050
38831,0,YMS_000037_s1,0,0,2,1,1.0,0,0,0,2,1,1.0,0,0,0,2.0,54.0,1.0,-33.437058,-36.858277,-11.974232,-9.557805,22.390045
38832,0,YMS_000039_s1,0,0,1,2,2.0,0,0,0,1,2,1.0,0,0,0,1.0,62.0,2.0,-33.216739,-40.302510,-11.191126,-9.715245,22.341078
38833,0,YMS_000043_s1,0,0,1,1,0.0,0,0,0,1,1,0.0,0,0,0,1.0,70.0,1.0,-33.608572,-35.293481,-11.033493,-9.916257,24.680699


In [25]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:985391:T:G_G", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [26]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs6599391","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs6599391,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,APGS_000002_s1,2,0.0,0.0,1.0,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,APGS_000003_s1,2,0.0,0.0,1.0,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,APGS_000004_s1,2,1.0,0.0,2.0,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,APGS_000008_s1,2,1.0,1.0,1.0,,-33.303205,-35.527912,-10.818937,-12.897920,6.851688
4,APGS_000011_s1,2,1.0,0.0,1.0,,-35.698278,-34.940878,-11.376421,-5.590934,24.578940
...,...,...,...,...,...,...,...,...,...,...,...
38830,YMS_000034_s1,2,0.0,0.0,2.0,58.0,-34.720268,-33.255119,-10.532694,-6.846457,23.835050
38831,YMS_000037_s1,1,1.0,1.0,2.0,54.0,-33.437058,-36.858277,-11.974232,-9.557805,22.390045
38832,YMS_000039_s1,2,2.0,1.0,1.0,62.0,-33.216739,-40.302510,-11.191126,-9.715245,22.341078
38833,YMS_000043_s1,1,0.0,0.0,1.0,70.0,-33.608572,-35.293481,-11.033493,-9.916257,24.680699


In [27]:
total.to_csv(f'{WORK_DIR}/TMEM175_EUR/TMEM175_rs34311866_nalls_rs6599391.csv')

In [28]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_EUR/TMEM175_rs34311866_nalls_rs6599391.csv")

In [29]:
%%R
head(total)

  X            IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs6599391 SEX AGE
1 0 APGS_000002_s1         2                        0                 0   1  NA
2 1 APGS_000003_s1         2                        0                 0   1  NA
3 2 APGS_000004_s1         2                        1                 0   2  NA
4 3 APGS_000008_s1         2                        1                 1   1  NA
5 4 APGS_000011_s1         2                        1                 0   1  NA
6 5 APGS_000016_s1         2                        2                 2  NA  NA
        PC1       PC2        PC3        PC4       PC5
1 -29.48330 -38.47300  -9.398904 -10.842865 25.744648
2 -33.06896 -34.28886 -11.116951  -8.056146 22.924043
3 -28.20444 -30.96948 -15.883093  -7.004351 18.647007
4 -33.30320 -35.52791 -10.818937 -12.897920  6.851688
5 -35.69828 -34.94088 -11.376421  -5.590934 24.578940
6 -30.73116 -35.70957  -8.981066  -8.204220 21.164661


In [30]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs6599391, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs6599391, data = total)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.5355 -0.1963 -0.1963  0.1341  1.8037 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)       0.196263   0.003114   63.02   <2e-16 ***
TMEM175_rs6599391 0.669607   0.005276  126.92   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4534 on 28870 degrees of freedom
  (1539 observations deleted due to missingness)
Multiple R-squared:  0.3581,	Adjusted R-squared:  0.3581 
F-statistic: 1.611e+04 on 1 and 28870 DF,  p-value: < 2.2e-16



In [31]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs6599391 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs6599391 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               1.4611815  0.0679926  21.490  < 2e-16 ***
TMEM175_rs34311866_nalls  0.0492999  0.0068278   7.220 5.37e-13 ***
TMEM175_rs6599391        -0.0003852  0.0076512  -0.050  0.95985    
SEX                      -0.1157631  0.0063212 -18.314  < 2e-16 ***
AGE                       0.0062624  0.0002664  23.510  < 2e-16 ***
PC1                      -0.0035487  0.0017098  -2.075  0.03796 *  
PC2                       0.0026485  0.0014743   1.796  0.07245 .  
PC3                      -0.0030284  0.0015488  -1.955  0.05055 .  
PC4                       0.0023599  0.0015956   1.479  0.13914    
PC5                      -0.0020974  0.0006556  -3.199  0.00138 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gau

Person and conditional analysis with glm (rs3822023)

In [6]:
%load_ext rpy2.ipython

In [7]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_AAC/AAC_TMEM175 \
--chr 4 \
--from-bp 958159 \
--to-bp 958159 \
--recode A \
--out {WORK_DIR}/TMEM175_AAC/TMEM175_rs34311866_nalls \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AAC/TMEM175_rs34311866_nalls.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AAC/AAC_TMEM175
  --chr 4
  --double-id
  --from-bp 958159
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AAC/TMEM175_rs34311866_nalls
  --recode A
  --to-bp 958159

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 3449 variants loaded from .bim file.
1111 people (455 males, 656 females) loaded from .fam.
1086 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 1111 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273

In [8]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_AAC/AAC_TMEM175 \
--chr 4 \
--from-bp 902698 \
--to-bp 902698 \
--recode A \
--out {WORK_DIR}/TMEM175_AAC/TMEM175_rs3822023 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AAC/TMEM175_rs3822023.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AAC/AAC_TMEM175
  --chr 4
  --double-id
  --from-bp 902698
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AAC/TMEM175_rs3822023
  --recode A
  --to-bp 902698

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 3449 variants loaded from .bim file.
1111 people (455 males, 656 females) loaded from .fam.
1086 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 1111 founders and 0 nonfounders present.
Calculating allele frequencies... 1011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980

In [9]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_AAC/TMEM175_rs34311866_nalls.raw',sep=" ")
rs3822023 = pd.read_csv(f'{WORK_DIR}/TMEM175_AAC/TMEM175_rs3822023.raw',sep=" ")

In [10]:
total = pd.merge(rs34311866, rs3822023, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:902698:T:C_C
0,0,BCM_000142_s1,0,0,1,2,0.0,0,0,0,1,2,1.0
1,0,BCM_000150_s1,0,0,2,2,0.0,0,0,0,2,2,0.0
2,0,BCM_000204_s1,0,0,2,2,0.0,0,0,0,2,2,0.0
3,0,BCM_000215_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
4,0,BCM_000270_s1,0,0,2,1,0.0,0,0,0,2,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106,0,UPENNU19_000409_s1,0,0,1,2,0.0,0,0,0,1,2,2.0
1107,0,UPENNU19_000432_s1,0,0,2,2,1.0,0,0,0,2,2,1.0
1108,0,VIPD_000151_s1,0,0,2,2,0.0,0,0,0,2,2,0.0
1109,0,VIPD_000679_s1,0,0,2,2,1.0,0,0,0,2,2,1.0


In [11]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_AAC/AAC_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,,,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,,,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,,,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,,,,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,,,,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,,,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,,,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,,,,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,1.0,53.0,1.0,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [12]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:902698:T:C_C,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,BCM_000142_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,42.344694,-18.080400,-4.379556,-4.231518,11.945426
1,0,BCM_000150_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,46.211997,-16.728937,-2.939694,-4.306565,5.530103
2,0,BCM_000204_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,53.796533,-13.345479,-2.312777,-4.141204,3.542342
3,0,BCM_000215_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,33.149467,-17.891235,-5.542113,-2.706507,6.964059
4,0,BCM_000270_s1,0,0,2,1,0.0,0,0,0,2,1,1.0,0,0,0,2.0,,1.0,44.397319,-15.104927,-4.648018,-3.126988,7.345741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106,0,UPENNU19_000409_s1,0,0,1,2,0.0,0,0,0,1,2,2.0,0,0,0,1.0,79.386111,2.0,59.974838,-11.604280,-4.482499,-2.758223,3.541477
1107,0,UPENNU19_000432_s1,0,0,2,2,1.0,0,0,0,2,2,1.0,0,0,0,2.0,77.661111,2.0,55.937250,-11.575462,-5.033753,-2.485275,6.130544
1108,0,VIPD_000151_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,63.000000,2.0,45.339968,-18.329921,-5.496370,-0.623469,2.827065
1109,0,VIPD_000679_s1,0,0,2,2,1.0,0,0,0,2,2,1.0,0,0,0,2.0,60.000000,2.0,24.426378,-24.074607,-5.665107,-6.488002,12.017162


In [13]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:902698:T:C_C,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,BCM_000142_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,42.344694,-18.080400,-4.379556,-4.231518,11.945426
1,0,BCM_000150_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,46.211997,-16.728937,-2.939694,-4.306565,5.530103
2,0,BCM_000204_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,53.796533,-13.345479,-2.312777,-4.141204,3.542342
3,0,BCM_000215_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,33.149467,-17.891235,-5.542113,-2.706507,6.964059
4,0,BCM_000270_s1,0,0,2,1,0.0,0,0,0,2,1,1.0,0,0,0,2.0,,1.0,44.397319,-15.104927,-4.648018,-3.126988,7.345741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106,0,UPENNU19_000409_s1,0,0,1,2,0.0,0,0,0,1,2,2.0,0,0,0,1.0,79.386111,2.0,59.974838,-11.604280,-4.482499,-2.758223,3.541477
1107,0,UPENNU19_000432_s1,0,0,2,2,1.0,0,0,0,2,2,1.0,0,0,0,2.0,77.661111,2.0,55.937250,-11.575462,-5.033753,-2.485275,6.130544
1108,0,VIPD_000151_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,63.000000,2.0,45.339968,-18.329921,-5.496370,-0.623469,2.827065
1109,0,VIPD_000679_s1,0,0,2,2,1.0,0,0,0,2,2,1.0,0,0,0,2.0,60.000000,2.0,24.426378,-24.074607,-5.665107,-6.488002,12.017162


In [14]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:902698:T:C_C", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [15]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs3822023","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs3822023,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,BCM_000142_s1,2,0.0,1.0,1.0,,42.344694,-18.080400,-4.379556,-4.231518,11.945426
1,BCM_000150_s1,2,0.0,0.0,2.0,,46.211997,-16.728937,-2.939694,-4.306565,5.530103
2,BCM_000204_s1,2,0.0,0.0,2.0,,53.796533,-13.345479,-2.312777,-4.141204,3.542342
3,BCM_000215_s1,2,0.0,0.0,1.0,,33.149467,-17.891235,-5.542113,-2.706507,6.964059
4,BCM_000270_s1,1,0.0,1.0,2.0,,44.397319,-15.104927,-4.648018,-3.126988,7.345741
...,...,...,...,...,...,...,...,...,...,...,...
1106,UPENNU19_000409_s1,2,0.0,2.0,1.0,79.386111,59.974838,-11.604280,-4.482499,-2.758223,3.541477
1107,UPENNU19_000432_s1,2,1.0,1.0,2.0,77.661111,55.937250,-11.575462,-5.033753,-2.485275,6.130544
1108,VIPD_000151_s1,2,0.0,0.0,2.0,63.000000,45.339968,-18.329921,-5.496370,-0.623469,2.827065
1109,VIPD_000679_s1,2,1.0,1.0,2.0,60.000000,24.426378,-24.074607,-5.665107,-6.488002,12.017162


In [16]:
total.to_csv(f'{WORK_DIR}/TMEM175_AAC/TMEM175_rs34311866_nalls_rs3822023.csv')

In [17]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_AAC/TMEM175_rs34311866_nalls_rs3822023.csv")

In [18]:
%%R
head(total)

  X           IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs3822023 SEX AGE
1 0 BCM_000142_s1         2                        0                 1   1  NA
2 1 BCM_000150_s1         2                        0                 0   2  NA
3 2 BCM_000204_s1         2                        0                 0   2  NA
4 3 BCM_000215_s1         2                        0                 0   1  NA
5 4 BCM_000270_s1         1                        0                 1   2  NA
6 5 BCM_000277_s1         1                        1                 1   2  NA
       PC1       PC2       PC3       PC4        PC5
1 42.34469 -18.08040 -4.379556 -4.231518 11.9454258
2 46.21200 -16.72894 -2.939694 -4.306565  5.5301026
3 53.79653 -13.34548 -2.312777 -4.141204  3.5423418
4 33.14947 -17.89123 -5.542113 -2.706507  6.9640586
5 44.39732 -15.10493 -4.648018 -3.126988  7.3457406
6 63.11613 -11.49276 -2.482421 -1.453234  0.2000133


In [19]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs3822023, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs3822023, data = total)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.10548 -0.10548 -0.09122 -0.09122  1.90878 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        0.10548    0.01440   7.325 4.72e-13 ***
TMEM175_rs3822023 -0.01427    0.01340  -1.064    0.287    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2979 on 1055 degrees of freedom
  (29 observations deleted due to missingness)
Multiple R-squared:  0.001073,	Adjusted R-squared:  0.0001258 
F-statistic: 1.133 on 1 and 1055 DF,  p-value: 0.2874



In [20]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs3822023 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs3822023 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               2.363025   0.119358  19.798  < 2e-16 ***
TMEM175_rs34311866_nalls -0.025002   0.041030  -0.609 0.542434    
TMEM175_rs3822023         0.060082   0.018168   3.307 0.000981 ***
SEX                      -0.097427   0.025418  -3.833 0.000135 ***
AGE                       0.003558   0.001217   2.924 0.003544 ** 
PC1                      -0.016870   0.001332 -12.663  < 2e-16 ***
PC2                       0.014707   0.004384   3.355 0.000827 ***
PC3                      -0.009898   0.004942  -2.003 0.045501 *  
PC4                       0.005723   0.006161   0.929 0.353165    
PC5                      -0.044458   0.005513  -8.065 2.36e-15 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gaussian famil

Person and conditional analysis with glm (rs188897299)

In [21]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_CAH/CAH_TMEM175 \
--chr 4 \
--from-bp 896281 \
--to-bp 896281 \
--recode A \
--out {WORK_DIR}/TMEM175_CAH/TMEM175_rs188897299 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_CAH/TMEM175_rs188897299.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_CAH/CAH_TMEM175
  --chr 4
  --double-id
  --from-bp 896281
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_CAH/TMEM175_rs188897299
  --recode A
  --to-bp 896281

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 3090 variants loaded from .bim file.
851 people (446 males, 405 females) loaded from .fam.
827 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 851 founders and 0 nonfounders present.
Calculating allele frequencies... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798

In [22]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_CAH/CAH_TMEM175 \
--chr 4 \
--from-bp 958159 \
--to-bp 958159 \
--recode A \
--out {WORK_DIR}/TMEM175_CAH/TMEM175_rs34311866_nalls \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_CAH/TMEM175_rs34311866_nalls.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_CAH/CAH_TMEM175
  --chr 4
  --double-id
  --from-bp 958159
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_CAH/TMEM175_rs34311866_nalls
  --recode A
  --to-bp 958159

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 3090 variants loaded from .bim file.
851 people (446 males, 405 females) loaded from .fam.
827 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 851 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747

In [23]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_CAH/TMEM175_rs34311866_nalls.raw',sep=" ")
rs188897299 = pd.read_csv(f'{WORK_DIR}/TMEM175_CAH/TMEM175_rs188897299.raw',sep=" ")

In [24]:
total = pd.merge(rs34311866, rs188897299, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:896281:A:C_C
0,0,APGS_000114_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
1,0,APGS_000299_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
2,0,APGS_000386_s1,0,0,2,2,1.0,0,0,0,2,2,0.0
3,0,APGS_000416_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
4,0,APGS_000684_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
846,0,VIPD_000068_s1,0,0,2,2,1.0,0,0,0,2,2,0.0
847,0,VIPD_000375_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
848,0,VIPD_000647_s1,0,0,2,1,,0,0,0,2,1,0.0
849,0,VIPD_000742_s1,0,0,1,2,1.0,0,0,0,1,2,0.0


In [25]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_CAH/CAH_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,,,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,,,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,,,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,,,,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,,,,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,,,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,,,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,,,,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,,,,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [26]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:896281:A:C_C,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000114_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-8.644799,-0.454641,5.388369,-2.623423,12.250261
1,0,APGS_000299_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-27.117332,-6.534283,18.966258,4.719556,10.820025
2,0,APGS_000386_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-27.918558,3.239078,-8.978111,-17.386542,16.116924
3,0,APGS_000416_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-19.850066,-15.093607,11.557804,-2.183026,7.027059
4,0,APGS_000684_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-26.649221,-7.140707,-4.581517,-7.305596,13.061927
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
846,0,VIPD_000068_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,58.0,2.0,-25.086819,-15.832343,17.822988,4.304263,12.155667
847,0,VIPD_000375_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,56.0,2.0,16.583566,-22.589907,-4.884012,-5.291012,11.423993
848,0,VIPD_000647_s1,0,0,2,1,,0,0,0,2,1,0.0,0,0,0,2.0,56.0,1.0,-2.504160,-11.905903,-7.795025,-8.134426,14.603985
849,0,VIPD_000742_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,81.0,2.0,-27.020436,-7.366797,-10.300603,-15.562663,-7.277205


In [27]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:896281:A:C_C,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000114_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-8.644799,-0.454641,5.388369,-2.623423,12.250261
1,0,APGS_000299_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-27.117332,-6.534283,18.966258,4.719556,10.820025
2,0,APGS_000386_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-27.918558,3.239078,-8.978111,-17.386542,16.116924
3,0,APGS_000416_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-19.850066,-15.093607,11.557804,-2.183026,7.027059
4,0,APGS_000684_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-26.649221,-7.140707,-4.581517,-7.305596,13.061927
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
845,0,UPENNU19_000406_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,67.405556,2.0,-10.061335,-19.094153,-22.136097,10.401768,4.482694
846,0,VIPD_000068_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,58.000000,2.0,-25.086819,-15.832343,17.822988,4.304263,12.155667
847,0,VIPD_000375_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,56.000000,2.0,16.583566,-22.589907,-4.884012,-5.291012,11.423993
848,0,VIPD_000647_s1,0,0,2,1,,0,0,0,2,1,0.0,0,0,0,2.0,56.000000,1.0,-2.504160,-11.905903,-7.795025,-8.134426,14.603985


In [28]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:896281:A:C_C", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [29]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs188897299","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs188897299,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,APGS_000114_s1,2,0.0,0.0,1.0,,-8.644799,-0.454641,5.388369,-2.623423,12.250261
1,APGS_000299_s1,2,0.0,0.0,1.0,,-27.117332,-6.534283,18.966258,4.719556,10.820025
2,APGS_000386_s1,2,1.0,0.0,2.0,,-27.918558,3.239078,-8.978111,-17.386542,16.116924
3,APGS_000416_s1,2,0.0,0.0,1.0,,-19.850066,-15.093607,11.557804,-2.183026,7.027059
4,APGS_000684_s1,2,0.0,0.0,1.0,,-26.649221,-7.140707,-4.581517,-7.305596,13.061927
...,...,...,...,...,...,...,...,...,...,...,...
845,UPENNU19_000406_s1,2,0.0,0.0,2.0,67.405556,-10.061335,-19.094153,-22.136097,10.401768,4.482694
846,VIPD_000068_s1,2,1.0,0.0,2.0,58.000000,-25.086819,-15.832343,17.822988,4.304263,12.155667
847,VIPD_000375_s1,2,0.0,0.0,1.0,56.000000,16.583566,-22.589907,-4.884012,-5.291012,11.423993
848,VIPD_000647_s1,1,,0.0,2.0,56.000000,-2.504160,-11.905903,-7.795025,-8.134426,14.603985


In [30]:
total.to_csv(f'{WORK_DIR}/TMEM175_CAH/TMEM175_rs34311866_nalls_rs188897299.csv')

In [31]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_CAH/TMEM175_rs34311866_nalls_rs188897299.csv")

In [32]:
%%R
head(total)

  X            IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs188897299 SEX
1 0 APGS_000114_s1         2                        0                   0   1
2 1 APGS_000299_s1         2                        0                   0   1
3 2 APGS_000386_s1         2                        1                   0   2
4 3 APGS_000416_s1         2                        0                   0   1
5 4 APGS_000684_s1         2                        0                   0   1
6 5 APGS_000724_s1         2                        0                   0   1
  AGE        PC1         PC2       PC3        PC4       PC5
1  NA  -8.644799  -0.4546406  5.388369  -2.623423 12.250261
2  NA -27.117332  -6.5342828 18.966258   4.719556 10.820025
3  NA -27.918558   3.2390784 -8.978111 -17.386542 16.116924
4  NA -19.850066 -15.0936070 11.557804  -2.183026  7.027059
5  NA -26.649221  -7.1407070 -4.581517  -7.305596 13.061927
6  NA  -3.051137  -9.3939669  4.065017  -4.493076  6.053414


In [33]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs188897299, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs188897299, 
    data = total)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.3123 -0.3123 -0.3123  0.6877  1.6877 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)          0.31227    0.01789  17.452   <2e-16 ***
TMEM175_rs188897299 -0.18282    0.09582  -1.908   0.0568 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4994 on 801 degrees of freedom
  (24 observations deleted due to missingness)
Multiple R-squared:  0.004524,	Adjusted R-squared:  0.003281 
F-statistic:  3.64 on 1 and 801 DF,  p-value: 0.05675



In [34]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs188897299 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs188897299 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               1.246928   0.082737  15.071  < 2e-16 ***
TMEM175_rs34311866_nalls -0.010578   0.034906  -0.303 0.761959    
TMEM175_rs188897299      -0.354698   0.095306  -3.722 0.000215 ***
SEX                      -0.105538   0.033602  -3.141 0.001760 ** 
AGE                       0.010337   0.001039   9.954  < 2e-16 ***
PC1                       0.001416   0.001143   1.239 0.215828    
PC2                      -0.001086   0.002031  -0.535 0.592971    
PC3                      -0.004465   0.002309  -1.934 0.053560 .  
PC4                       0.003527   0.003393   1.040 0.298937    
PC5                      -0.012965   0.003909  -3.317 0.000960 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gaussian fam

Person and conditional analysis with glm (rs3733344)

In [36]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_EAS/EAS_TMEM175 \
--chr 4 \
--from-bp 958159 \
--to-bp 958159 \
--recode A \
--out {WORK_DIR}/TMEM175_EAS/TMEM175_rs34311866_nalls \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EAS/TMEM175_rs34311866_nalls.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EAS/EAS_TMEM175
  --chr 4
  --double-id
  --from-bp 958159
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EAS/TMEM175_rs34311866_nalls
  --recode A
  --to-bp 958159

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 3171 variants loaded from .bim file.
5167 people (3278 males, 1889 females) loaded from .fam.
5123 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 5167 founders and 0 nonfounders present.
Calculating allele frequencies... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172

In [37]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_EAS/EAS_TMEM175 \
--chr 4 \
--from-bp 957252 \
--to-bp 957252 \
--recode A \
--out {WORK_DIR}/TMEM175_EAS/TMEM175_rs3733344 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EAS/TMEM175_rs3733344.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EAS/EAS_TMEM175
  --chr 4
  --double-id
  --from-bp 957252
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EAS/TMEM175_rs3733344
  --recode A
  --to-bp 957252

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 3171 variants loaded from .bim file.
5167 people (3278 males, 1889 females) loaded from .fam.
5123 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 5167 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879

In [38]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_EAS/TMEM175_rs34311866_nalls.raw',sep=" ")
rs3733344 = pd.read_csv(f'{WORK_DIR}/TMEM175_EAS/TMEM175_rs3733344.raw',sep=" ")

In [39]:
total = pd.merge(rs34311866, rs3733344, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:957252:T:G_G
0,0,APGS_000051_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
1,0,APGS_000111_s1,0,0,2,2,,0,0,0,2,2,1.0
2,0,APGS_000113_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
3,0,APGS_000135_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
4,0,APGS_000186_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5162,0,VIPD_000429_s1,0,0,2,1,0.0,0,0,0,2,1,0.0
5163,0,VIPD_000459_s1,0,0,1,1,0.0,0,0,0,1,1,1.0
5164,0,VIPD_000488_s1,0,0,2,1,1.0,0,0,0,2,1,1.0
5165,0,VIPD_000518_s1,0,0,2,1,0.0,0,0,0,2,1,2.0


In [40]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_EAS/EAS_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,,,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,,,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,,,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,,,,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,,,,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,,,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,,,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,,,,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,,,,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [41]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:957252:T:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000051_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-11.099685,83.973722,-10.188699,-28.239733,1.512131
1,0,APGS_000111_s1,0,0,2,2,,0,0,0,2,2,1.0,0,0,0,2.0,,2.0,-11.020378,69.201961,2.908329,-15.959031,-1.925108
2,0,APGS_000113_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-12.691932,85.440708,-10.451702,-23.738136,0.265317
3,0,APGS_000135_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-11.713387,85.918556,-11.571290,-21.040961,0.389155
4,0,APGS_000186_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-11.635789,82.221344,-10.439033,-23.815033,-4.063397
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5162,0,VIPD_000429_s1,0,0,2,1,0.0,0,0,0,2,1,0.0,0,0,0,2.0,63.0,1.0,-13.523491,87.708089,-12.864001,-21.945369,-1.873662
5163,0,VIPD_000459_s1,0,0,1,1,0.0,0,0,0,1,1,1.0,0,0,0,1.0,73.0,1.0,-14.882064,85.648896,-10.945596,-20.418271,0.562908
5164,0,VIPD_000488_s1,0,0,2,1,1.0,0,0,0,2,1,1.0,0,0,0,2.0,82.0,1.0,-10.597269,84.769771,-11.325140,-24.225874,-1.196641
5165,0,VIPD_000518_s1,0,0,2,1,0.0,0,0,0,2,1,2.0,0,0,0,2.0,55.0,1.0,-13.600388,84.885882,-8.698538,-26.835134,-0.078363


In [42]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:957252:T:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000051_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-11.099685,83.973722,-10.188699,-28.239733,1.512131
1,0,APGS_000111_s1,0,0,2,2,,0,0,0,2,2,1.0,0,0,0,2.0,,2.0,-11.020378,69.201961,2.908329,-15.959031,-1.925108
2,0,APGS_000113_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-12.691932,85.440708,-10.451702,-23.738136,0.265317
3,0,APGS_000135_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-11.713387,85.918556,-11.571290,-21.040961,0.389155
4,0,APGS_000186_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-11.635789,82.221344,-10.439033,-23.815033,-4.063397
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5162,0,VIPD_000429_s1,0,0,2,1,0.0,0,0,0,2,1,0.0,0,0,0,2.0,63.0,1.0,-13.523491,87.708089,-12.864001,-21.945369,-1.873662
5163,0,VIPD_000459_s1,0,0,1,1,0.0,0,0,0,1,1,1.0,0,0,0,1.0,73.0,1.0,-14.882064,85.648896,-10.945596,-20.418271,0.562908
5164,0,VIPD_000488_s1,0,0,2,1,1.0,0,0,0,2,1,1.0,0,0,0,2.0,82.0,1.0,-10.597269,84.769771,-11.325140,-24.225874,-1.196641
5165,0,VIPD_000518_s1,0,0,2,1,0.0,0,0,0,2,1,2.0,0,0,0,2.0,55.0,1.0,-13.600388,84.885882,-8.698538,-26.835134,-0.078363


In [43]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:957252:T:G_G", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [44]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs3733344","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs3733344,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,APGS_000051_s1,2,0.0,0.0,1.0,,-11.099685,83.973722,-10.188699,-28.239733,1.512131
1,APGS_000111_s1,2,,1.0,2.0,,-11.020378,69.201961,2.908329,-15.959031,-1.925108
2,APGS_000113_s1,2,0.0,0.0,1.0,,-12.691932,85.440708,-10.451702,-23.738136,0.265317
3,APGS_000135_s1,2,1.0,0.0,1.0,,-11.713387,85.918556,-11.571290,-21.040961,0.389155
4,APGS_000186_s1,2,0.0,0.0,1.0,,-11.635789,82.221344,-10.439033,-23.815033,-4.063397
...,...,...,...,...,...,...,...,...,...,...,...
5162,VIPD_000429_s1,1,0.0,0.0,2.0,63.0,-13.523491,87.708089,-12.864001,-21.945369,-1.873662
5163,VIPD_000459_s1,1,0.0,1.0,1.0,73.0,-14.882064,85.648896,-10.945596,-20.418271,0.562908
5164,VIPD_000488_s1,1,1.0,1.0,2.0,82.0,-10.597269,84.769771,-11.325140,-24.225874,-1.196641
5165,VIPD_000518_s1,1,0.0,2.0,2.0,55.0,-13.600388,84.885882,-8.698538,-26.835134,-0.078363


In [45]:
total.to_csv(f'{WORK_DIR}/TMEM175_EAS/TMEM175_rs34311866_nalls_rs3733344.csv')

In [46]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_EAS/TMEM175_rs34311866_nalls_rs3733344.csv")

In [47]:
%%R
head(total)

  X            IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs3733344 SEX AGE
1 0 APGS_000051_s1         2                        0                 0   1  NA
2 1 APGS_000111_s1         2                       NA                 1   2  NA
3 2 APGS_000113_s1         2                        0                 0   1  NA
4 3 APGS_000135_s1         2                        1                 0   1  NA
5 4 APGS_000186_s1         2                        0                 0   1  NA
6 5 APGS_000201_s1         2                        0                 1   1  NA
        PC1      PC2        PC3       PC4        PC5
1 -11.09969 83.97372 -10.188699 -28.23973  1.5121309
2 -11.02038 69.20196   2.908329 -15.95903 -1.9251078
3 -12.69193 85.44071 -10.451702 -23.73814  0.2653173
4 -11.71339 85.91856 -11.571290 -21.04096  0.3891547
5 -11.63579 82.22134 -10.439033 -23.81503 -4.0633972
6 -12.55287 83.85939 -10.935292 -23.49726 -1.3028277


In [48]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs3733344, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs3733344, data = total)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.4003 -0.2807 -0.2807  0.5997  1.8389 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        0.40032    0.01225   32.67   <2e-16 ***
TMEM175_rs3733344 -0.11959    0.01058  -11.31   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4965 on 4279 degrees of freedom
  (842 observations deleted due to missingness)
Multiple R-squared:  0.02901,	Adjusted R-squared:  0.02878 
F-statistic: 127.8 on 1 and 4279 DF,  p-value: < 2.2e-16



In [49]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs3733344 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs3733344 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               0.6578537  0.2514478   2.616  0.00895 ** 
TMEM175_rs34311866_nalls  0.0263518  0.0196011   1.344  0.17896    
TMEM175_rs3733344        -0.0333941  0.0142200  -2.348  0.01895 *  
SEX                      -0.0780377  0.0193676  -4.029 5.79e-05 ***
AGE                       0.0074881  0.0009197   8.141 6.62e-16 ***
PC1                      -0.0052974  0.0058178  -0.911  0.36264    
PC2                      -0.0031903  0.0035305  -0.904  0.36628    
PC3                      -0.0136178  0.0033581  -4.055 5.19e-05 ***
PC4                      -0.0291088  0.0032354  -8.997  < 2e-16 ***
PC5                       0.0033633  0.0058578   0.574  0.56592    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gau

Person and conditional analysis with glm (rs11736451)

In [50]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_MDE/MDE_TMEM175 \
--chr 4 \
--from-bp 958159 \
--to-bp 958159 \
--recode A \
--out {WORK_DIR}/TMEM175_MDE/TMEM175_rs34311866_nalls \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_MDE/TMEM175_rs34311866_nalls.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_MDE/MDE_TMEM175
  --chr 4
  --double-id
  --from-bp 958159
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_MDE/TMEM175_rs34311866_nalls
  --recode A
  --to-bp 958159

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 1945 variants loaded from .bim file.
581 people (369 males, 212 females) loaded from .fam.
536 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 581 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747

In [51]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_MDE/MDE_TMEM175 \
--chr 4 \
--from-bp 983234 \
--to-bp 983234 \
--recode A \
--out {WORK_DIR}/TMEM175_MDE/TMEM175_rs11736451 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_MDE/TMEM175_rs11736451.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_MDE/MDE_TMEM175
  --chr 4
  --double-id
  --from-bp 983234
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_MDE/TMEM175_rs11736451
  --recode A
  --to-bp 983234

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 1945 variants loaded from .bim file.
581 people (369 males, 212 females) loaded from .fam.
536 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 581 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808

In [52]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_MDE/TMEM175_rs34311866_nalls.raw',sep=" ")
rs11736451 = pd.read_csv(f'{WORK_DIR}/TMEM175_MDE/TMEM175_rs11736451.raw',sep=" ")

In [53]:
total = pd.merge(rs34311866, rs11736451, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:983234:A:G_G
0,0,APGS_000259_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
1,0,APGS_000450_s1,0,0,1,2,0.0,0,0,0,1,2,1.0
2,0,APGS_000533_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
3,0,APGS_001032_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
4,0,APGS_001052_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
576,0,UPENNU19_000213_s1,0,0,1,2,1.0,0,0,0,1,2,2.0
577,0,VIPD_000392_s1,0,0,1,2,1.0,0,0,0,1,2,1.0
578,0,VIPD_000513_s1,0,0,2,2,0.0,0,0,0,2,2,1.0
579,0,VIPD_000686_s1,0,0,1,2,1.0,0,0,0,1,2,1.0


In [54]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_MDE/MDE_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,,,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,,,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,,,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,,,,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,,,,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,,,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,,,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,,,,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,,,,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [55]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:983234:A:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000259_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-27.473365,-35.756649,-2.366968,-14.380825,-18.192957
1,0,APGS_000450_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-27.280911,-26.394449,3.902227,-9.422375,-7.879637
2,0,APGS_000533_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-26.885886,-36.017059,-1.258755,-16.204448,-17.018512
3,0,APGS_001032_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-25.519682,-34.281074,3.505687,-7.390488,-14.634728
4,0,APGS_001052_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-23.508154,-35.614394,-2.816420,-15.496602,-15.969493
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
576,0,UPENNU19_000213_s1,0,0,1,2,1.0,0,0,0,1,2,2.0,0,0,0,1.0,60.952778,2.0,-31.049844,-37.984529,-4.238697,-12.146182,-13.399832
577,0,VIPD_000392_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,82.000000,2.0,-19.256276,-35.006982,-0.734883,-14.500160,-17.138191
578,0,VIPD_000513_s1,0,0,2,2,0.0,0,0,0,2,2,1.0,0,0,0,2.0,70.000000,2.0,-29.819688,-33.986530,-0.535363,-12.587826,-20.197365
579,0,VIPD_000686_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-21.248868,-30.072230,-2.408483,-14.194412,-11.072102


In [56]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:983234:A:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000259_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-27.473365,-35.756649,-2.366968,-14.380825,-18.192957
1,0,APGS_000450_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-27.280911,-26.394449,3.902227,-9.422375,-7.879637
2,0,APGS_000533_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-26.885886,-36.017059,-1.258755,-16.204448,-17.018512
3,0,APGS_001032_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-25.519682,-34.281074,3.505687,-7.390488,-14.634728
4,0,APGS_001052_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-23.508154,-35.614394,-2.816420,-15.496602,-15.969493
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
576,0,UPENNU19_000213_s1,0,0,1,2,1.0,0,0,0,1,2,2.0,0,0,0,1.0,60.952778,2.0,-31.049844,-37.984529,-4.238697,-12.146182,-13.399832
577,0,VIPD_000392_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,82.000000,2.0,-19.256276,-35.006982,-0.734883,-14.500160,-17.138191
578,0,VIPD_000513_s1,0,0,2,2,0.0,0,0,0,2,2,1.0,0,0,0,2.0,70.000000,2.0,-29.819688,-33.986530,-0.535363,-12.587826,-20.197365
579,0,VIPD_000686_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-21.248868,-30.072230,-2.408483,-14.194412,-11.072102


In [57]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:983234:A:G_G", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [58]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs11736451","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs11736451,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,APGS_000259_s1,2,0.0,0.0,1.0,,-27.473365,-35.756649,-2.366968,-14.380825,-18.192957
1,APGS_000450_s1,2,0.0,1.0,1.0,,-27.280911,-26.394449,3.902227,-9.422375,-7.879637
2,APGS_000533_s1,2,0.0,0.0,1.0,,-26.885886,-36.017059,-1.258755,-16.204448,-17.018512
3,APGS_001032_s1,2,0.0,0.0,1.0,,-25.519682,-34.281074,3.505687,-7.390488,-14.634728
4,APGS_001052_s1,2,0.0,0.0,1.0,,-23.508154,-35.614394,-2.816420,-15.496602,-15.969493
...,...,...,...,...,...,...,...,...,...,...,...
576,UPENNU19_000213_s1,2,1.0,2.0,1.0,60.952778,-31.049844,-37.984529,-4.238697,-12.146182,-13.399832
577,VIPD_000392_s1,2,1.0,1.0,1.0,82.000000,-19.256276,-35.006982,-0.734883,-14.500160,-17.138191
578,VIPD_000513_s1,2,0.0,1.0,2.0,70.000000,-29.819688,-33.986530,-0.535363,-12.587826,-20.197365
579,VIPD_000686_s1,2,1.0,1.0,1.0,,-21.248868,-30.072230,-2.408483,-14.194412,-11.072102


In [59]:
total.to_csv(f'{WORK_DIR}/TMEM175_MDE/TMEM175_rs34311866_nalls_rs11736451.csv')

In [60]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_MDE/TMEM175_rs34311866_nalls_rs11736451.csv")

In [61]:
%%R
head(total)

  X            IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs11736451 SEX
1 0 APGS_000259_s1         2                        0                  0   1
2 1 APGS_000450_s1         2                        0                  1   1
3 2 APGS_000533_s1         2                        0                  0   1
4 3 APGS_001032_s1         2                        0                  0   1
5 4 APGS_001052_s1         2                        0                  0   1
6 5 APGS_001085_s1         2                        0                  0   2
  AGE       PC1       PC2       PC3        PC4        PC5
1  NA -27.47337 -35.75665 -2.366968 -14.380825 -18.192957
2  NA -27.28091 -26.39445  3.902227  -9.422375  -7.879637
3  NA -26.88589 -36.01706 -1.258755 -16.204448 -17.018512
4  NA -25.51968 -34.28107  3.505687  -7.390488 -14.634728
5  NA -23.50815 -35.61439 -2.816420 -15.496602 -15.969493
6  NA -27.14900 -34.28756 -2.630555 -15.052248 -16.566321


In [62]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs11736451, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs11736451, data = total)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.64573 -0.21563 -0.21563  0.06932  1.78437 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)         0.21563    0.02574   8.376 5.77e-16 ***
TMEM175_rs11736451  0.71505    0.04180  17.107  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.487 on 491 degrees of freedom
  (43 observations deleted due to missingness)
Multiple R-squared:  0.3734,	Adjusted R-squared:  0.3722 
F-statistic: 292.6 on 1 and 491 DF,  p-value: < 2.2e-16



In [63]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs11736451 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs11736451 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               1.186923   0.214160   5.542 6.05e-08 ***
TMEM175_rs34311866_nalls  0.047466   0.050130   0.947   0.3444    
TMEM175_rs11736451        0.104750   0.058520   1.790   0.0744 .  
SEX                      -0.087899   0.050259  -1.749   0.0812 .  
AGE                       0.013538   0.002143   6.318 8.42e-10 ***
PC1                       0.009936   0.004588   2.166   0.0311 *  
PC2                       0.010594   0.007843   1.351   0.1777    
PC3                      -0.013670   0.009357  -1.461   0.1449    
PC4                       0.003441   0.012337   0.279   0.7805    
PC5                      -0.015837   0.009017  -1.756   0.0799 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gaussian fami

Person and conditional analysis with glm (rs41286661)

In [64]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_SAS/SAS_TMEM175 \
--chr 4 \
--from-bp 958159 \
--to-bp 958159 \
--recode A \
--out {WORK_DIR}/TMEM175_SAS/TMEM175_rs34311866_nalls \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_SAS/TMEM175_rs34311866_nalls.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_SAS/SAS_TMEM175
  --chr 4
  --double-id
  --from-bp 958159
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_SAS/TMEM175_rs34311866_nalls
  --recode A
  --to-bp 958159

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 1728 variants loaded from .bim file.
635 people (408 males, 227 females) loaded from .fam.
609 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 635 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747

In [65]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_SAS/SAS_TMEM175 \
--chr 4 \
--from-bp 963496 \
--to-bp 963496 \
--recode A \
--out {WORK_DIR}/TMEM175_SAS/TMEM175_rs41286661 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_SAS/TMEM175_rs41286661.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_SAS/SAS_TMEM175
  --chr 4
  --double-id
  --from-bp 963496
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_SAS/TMEM175_rs41286661
  --recode A
  --to-bp 963496

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 1728 variants loaded from .bim file.
635 people (408 males, 227 females) loaded from .fam.
609 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 635 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808

In [119]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_SAS/TMEM175_rs34311866_nalls.raw',sep=" ")
rs41286661 = pd.read_csv(f'{WORK_DIR}/TMEM175_SAS/TMEM175_rs41286661.raw',sep=" ")

In [120]:
total = pd.merge(rs34311866, rs41286661, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:963496:C:T_T
0,0,APGS_000062_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
1,0,APGS_000119_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
2,0,APGS_000331_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
3,0,APGS_000378_s1,0,0,2,2,0.0,0,0,0,2,2,2.0
4,0,APGS_000454_s1,0,0,2,2,0.0,0,0,0,2,2,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
630,0,VIPD_000468_s1,0,0,1,2,0.0,0,0,0,1,2,1.0
631,0,VIPD_000528_s1,0,0,2,1,2.0,0,0,0,2,1,1.0
632,0,VIPD_000621_s1,0,0,2,2,0.0,0,0,0,2,2,0.0
633,0,VIPD_000685_s1,0,0,1,2,0.0,0,0,0,1,2,0.0


In [121]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_SAS/SAS_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,,,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,,,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,,,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,,,,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,,,,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,,,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,,,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,,,,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,,,,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [122]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:963496:C:T_T,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000062_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-18.643078,10.448041,56.455919,19.558080,2.613308
1,0,APGS_000119_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-17.346120,11.654072,54.213536,17.490254,-0.899534
2,0,APGS_000331_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-17.542875,9.571579,54.761073,20.988884,-3.120079
3,0,APGS_000378_s1,0,0,2,2,0.0,0,0,0,2,2,2.0,0,0,0,2.0,,2.0,-18.133737,10.699043,54.508907,21.545525,-0.074081
4,0,APGS_000454_s1,0,0,2,2,0.0,0,0,0,2,2,1.0,0,0,0,2.0,,2.0,-18.943754,11.428258,55.862024,17.373778,2.199425
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
630,0,VIPD_000468_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,77.0,2.0,-23.470497,-4.351335,34.866203,13.610903,-2.024323
631,0,VIPD_000528_s1,0,0,2,1,2.0,0,0,0,2,1,1.0,0,0,0,2.0,56.0,1.0,-19.677553,7.093584,45.937983,17.255940,0.348782
632,0,VIPD_000621_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,60.0,2.0,-19.140984,8.864642,56.528287,21.219827,1.010630
633,0,VIPD_000685_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-20.743509,-5.905185,37.703391,11.985115,-1.618144


In [123]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:963496:C:T_T,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000062_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-18.643078,10.448041,56.455919,19.558080,2.613308
1,0,APGS_000119_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-17.346120,11.654072,54.213536,17.490254,-0.899534
2,0,APGS_000331_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-17.542875,9.571579,54.761073,20.988884,-3.120079
3,0,APGS_000378_s1,0,0,2,2,0.0,0,0,0,2,2,2.0,0,0,0,2.0,,2.0,-18.133737,10.699043,54.508907,21.545525,-0.074081
4,0,APGS_000454_s1,0,0,2,2,0.0,0,0,0,2,2,1.0,0,0,0,2.0,,2.0,-18.943754,11.428258,55.862024,17.373778,2.199425
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
630,0,VIPD_000468_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,77.0,2.0,-23.470497,-4.351335,34.866203,13.610903,-2.024323
631,0,VIPD_000528_s1,0,0,2,1,2.0,0,0,0,2,1,1.0,0,0,0,2.0,56.0,1.0,-19.677553,7.093584,45.937983,17.255940,0.348782
632,0,VIPD_000621_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,60.0,2.0,-19.140984,8.864642,56.528287,21.219827,1.010630
633,0,VIPD_000685_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-20.743509,-5.905185,37.703391,11.985115,-1.618144


In [124]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:963496:C:T_T", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [125]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs41286661","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs41286661,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,APGS_000062_s1,2,1.0,0.0,1.0,,-18.643078,10.448041,56.455919,19.558080,2.613308
1,APGS_000119_s1,2,0.0,0.0,1.0,,-17.346120,11.654072,54.213536,17.490254,-0.899534
2,APGS_000331_s1,2,1.0,0.0,1.0,,-17.542875,9.571579,54.761073,20.988884,-3.120079
3,APGS_000378_s1,2,0.0,2.0,2.0,,-18.133737,10.699043,54.508907,21.545525,-0.074081
4,APGS_000454_s1,2,0.0,1.0,2.0,,-18.943754,11.428258,55.862024,17.373778,2.199425
...,...,...,...,...,...,...,...,...,...,...,...
630,VIPD_000468_s1,2,0.0,1.0,1.0,77.0,-23.470497,-4.351335,34.866203,13.610903,-2.024323
631,VIPD_000528_s1,1,2.0,1.0,2.0,56.0,-19.677553,7.093584,45.937983,17.255940,0.348782
632,VIPD_000621_s1,2,0.0,0.0,2.0,60.0,-19.140984,8.864642,56.528287,21.219827,1.010630
633,VIPD_000685_s1,2,0.0,0.0,1.0,,-20.743509,-5.905185,37.703391,11.985115,-1.618144


In [126]:
total.to_csv(f'{WORK_DIR}/TMEM175_SAS/TMEM175_rs34311866_nalls_rs41286661.csv')

In [127]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_SAS/TMEM175_rs34311866_nalls_rs41286661.csv")

In [128]:
%%R
head(total)

  X            IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs41286661 SEX
1 0 APGS_000062_s1         2                        1                  0   1
2 1 APGS_000119_s1         2                        0                  0   1
3 2 APGS_000331_s1         2                        1                  0   1
4 3 APGS_000378_s1         2                        0                  2   2
5 4 APGS_000454_s1         2                        0                  1   2
6 5 APGS_000502_s1         2                        1                  0   2
  AGE       PC1       PC2      PC3      PC4         PC5
1  NA -18.64308 10.448041 56.45592 19.55808  2.61330835
2  NA -17.34612 11.654072 54.21354 17.49025 -0.89953411
3  NA -17.54287  9.571579 54.76107 20.98888 -3.12007878
4  NA -18.13374 10.699043 54.50891 21.54553 -0.07408062
5  NA -18.94375 11.428258 55.86202 17.37378  2.19942509
6  NA -21.84607 -6.697138 32.28942 14.85375  4.39039445


In [129]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs41286661, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs41286661, data = total)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.8047 -0.6437  0.2758  0.3563  1.3563 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)         0.64372    0.03587  17.947   <2e-16 ***
TMEM175_rs41286661  0.08051    0.05049   1.594    0.111    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.6771 on 547 degrees of freedom
  (60 observations deleted due to missingness)
Multiple R-squared:  0.004626,	Adjusted R-squared:  0.002806 
F-statistic: 2.542 on 1 and 547 DF,  p-value: 0.1114



In [130]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs41286661 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs41286661 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               1.509685   0.330146   4.573 7.46e-06 ***
TMEM175_rs34311866_nalls  0.110888   0.040011   2.771  0.00599 ** 
TMEM175_rs41286661        0.078018   0.045723   1.706  0.08915 .  
SEX                      -0.118111   0.055894  -2.113  0.03555 *  
AGE                       0.008347   0.001917   4.354 1.93e-05 ***
PC1                      -0.010355   0.011495  -0.901  0.36851    
PC2                      -0.005929   0.004349  -1.363  0.17393    
PC3                      -0.017942   0.007805  -2.299  0.02231 *  
PC4                       0.018871   0.013792   1.368  0.17241    
PC5                       0.015171   0.011597   1.308  0.19196    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gaussian fami

Person and conditional analysis with glm (rs6599388)

In [78]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_AJ/AJ_TMEM175 \
--chr 4 \
--from-bp 958159 \
--to-bp 958159 \
--recode A \
--out {WORK_DIR}/TMEM175_AJ/TMEM175_rs34311866_nalls \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AJ/TMEM175_rs34311866_nalls.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AJ/AJ_TMEM175
  --chr 4
  --double-id
  --from-bp 958159
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AJ/TMEM175_rs34311866_nalls
  --recode A
  --to-bp 958159

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 1285 variants loaded from .bim file.
2655 people (1648 males, 1007 females) loaded from .fam.
1703 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 2655 founders and 0 nonfounders present.
Calculating allele frequencies... 1011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374

In [79]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_AJ/AJ_TMEM175 \
--chr 4 \
--from-bp 945299 \
--to-bp 945299 \
--recode A \
--out {WORK_DIR}/TMEM175_AJ/TMEM175_rs6599388 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AJ/TMEM175_rs6599388.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AJ/AJ_TMEM175
  --chr 4
  --double-id
  --from-bp 945299
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AJ/TMEM175_rs6599388
  --recode A
  --to-bp 945299

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 1285 variants loaded from .bim file.
2655 people (1648 males, 1007 females) loaded from .fam.
1703 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 2655 founders and 0 nonfounders present.
Calculating allele frequencies... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081

In [80]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_AJ/TMEM175_rs34311866_nalls.raw',sep=" ")
rs6599388 = pd.read_csv(f'{WORK_DIR}/TMEM175_AJ/TMEM175_rs6599388.raw',sep=" ")

In [81]:
total = pd.merge(rs34311866, rs6599388, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:945299:C:T_C
0,0,APGS_000021_s1,0,0,1,2,2.0,0,0,0,1,2,0.0
1,0,APGS_000475_s1,0,0,1,2,0.0,0,0,0,1,2,2.0
2,0,APGS_000486_s1,0,0,1,2,0.0,0,0,0,1,2,1.0
3,0,APGS_000646_s1,0,0,2,2,1.0,0,0,0,2,2,0.0
4,0,APGS_000808_s1,0,0,1,2,0.0,0,0,0,1,2,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2650,0,VIPD_000746_s1,0,0,1,2,1.0,0,0,0,1,2,1.0
2651,0,VIPD_000748_s1,0,0,1,2,1.0,0,0,0,1,2,1.0
2652,0,YMS_000030_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
2653,0,YMS_000031_s1,0,0,1,1,1.0,0,0,0,1,1,1.0


In [82]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_AJ/AJ_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,,,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,,,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,,,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,,,,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,,,,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,,,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,,,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,,,,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,,,,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [83]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:945299:C:T_C,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000021_s1,0,0,1,2,2.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-28.028682,-35.735363,-6.580637,-17.501411,-23.026102
1,0,APGS_000475_s1,0,0,1,2,0.0,0,0,0,1,2,2.0,0,0,0,1.0,,2.0,-28.154728,-36.708128,-5.730325,-20.750956,-21.729629
2,0,APGS_000486_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-29.810620,-37.561248,-9.373941,-16.773748,-27.482223
3,0,APGS_000646_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-31.967732,-34.309571,-8.603258,-17.143810,-21.027547
4,0,APGS_000808_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-29.929420,-33.911640,-9.610071,-18.087631,-24.559511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2650,0,VIPD_000746_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,74.0,2.0,-31.455498,-35.058387,-9.777408,-18.399794,-27.254806
2651,0,VIPD_000748_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,73.0,2.0,-31.319358,-39.960596,-10.932457,-17.389794,-1.963005
2652,0,YMS_000030_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,72.0,2.0,-29.635489,-34.956373,-6.466043,-14.761441,-20.758634
2653,0,YMS_000031_s1,0,0,1,1,1.0,0,0,0,1,1,1.0,0,0,0,1.0,69.0,1.0,-32.862131,-35.306282,-9.166195,-15.693077,-21.088431


In [84]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:945299:C:T_C,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000021_s1,0,0,1,2,2.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-28.028682,-35.735363,-6.580637,-17.501411,-23.026102
1,0,APGS_000475_s1,0,0,1,2,0.0,0,0,0,1,2,2.0,0,0,0,1.0,,2.0,-28.154728,-36.708128,-5.730325,-20.750956,-21.729629
2,0,APGS_000486_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-29.810620,-37.561248,-9.373941,-16.773748,-27.482223
3,0,APGS_000646_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-31.967732,-34.309571,-8.603258,-17.143810,-21.027547
4,0,APGS_000808_s1,0,0,1,2,0.0,0,0,0,1,2,1.0,0,0,0,1.0,,2.0,-29.929420,-33.911640,-9.610071,-18.087631,-24.559511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2650,0,VIPD_000746_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,74.0,2.0,-31.455498,-35.058387,-9.777408,-18.399794,-27.254806
2651,0,VIPD_000748_s1,0,0,1,2,1.0,0,0,0,1,2,1.0,0,0,0,1.0,73.0,2.0,-31.319358,-39.960596,-10.932457,-17.389794,-1.963005
2652,0,YMS_000030_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,72.0,2.0,-29.635489,-34.956373,-6.466043,-14.761441,-20.758634
2653,0,YMS_000031_s1,0,0,1,1,1.0,0,0,0,1,1,1.0,0,0,0,1.0,69.0,1.0,-32.862131,-35.306282,-9.166195,-15.693077,-21.088431


In [85]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:945299:C:T_C", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [86]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs6599388","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs6599388,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,APGS_000021_s1,2,2.0,0.0,1.0,,-28.028682,-35.735363,-6.580637,-17.501411,-23.026102
1,APGS_000475_s1,2,0.0,2.0,1.0,,-28.154728,-36.708128,-5.730325,-20.750956,-21.729629
2,APGS_000486_s1,2,0.0,1.0,1.0,,-29.810620,-37.561248,-9.373941,-16.773748,-27.482223
3,APGS_000646_s1,2,1.0,0.0,2.0,,-31.967732,-34.309571,-8.603258,-17.143810,-21.027547
4,APGS_000808_s1,2,0.0,1.0,1.0,,-29.929420,-33.911640,-9.610071,-18.087631,-24.559511
...,...,...,...,...,...,...,...,...,...,...,...
2650,VIPD_000746_s1,2,1.0,1.0,1.0,74.0,-31.455498,-35.058387,-9.777408,-18.399794,-27.254806
2651,VIPD_000748_s1,2,1.0,1.0,1.0,73.0,-31.319358,-39.960596,-10.932457,-17.389794,-1.963005
2652,YMS_000030_s1,2,1.0,0.0,1.0,72.0,-29.635489,-34.956373,-6.466043,-14.761441,-20.758634
2653,YMS_000031_s1,1,1.0,1.0,1.0,69.0,-32.862131,-35.306282,-9.166195,-15.693077,-21.088431


In [87]:
total.to_csv(f'{WORK_DIR}/TMEM175_AJ/TMEM175_rs34311866_nalls_rs6599388.csv')

In [88]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_AJ/TMEM175_rs34311866_nalls_rs6599388.csv")

In [89]:
%%R
head(total)

  X            IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs6599388 SEX AGE
1 0 APGS_000021_s1         2                        2                 0   1  NA
2 1 APGS_000475_s1         2                        0                 2   1  NA
3 2 APGS_000486_s1         2                        0                 1   1  NA
4 3 APGS_000646_s1         2                        1                 0   2  NA
5 4 APGS_000808_s1         2                        0                 1   1  NA
6 5 APGS_001059_s1         2                        2                 0   1  NA
        PC1       PC2       PC3       PC4       PC5
1 -28.02868 -35.73536 -6.580637 -17.50141 -23.02610
2 -28.15473 -36.70813 -5.730325 -20.75096 -21.72963
3 -29.81062 -37.56125 -9.373941 -16.77375 -27.48222
4 -31.96773 -34.30957 -8.603258 -17.14381 -21.02755
5 -29.92942 -33.91164 -9.610071 -18.08763 -24.55951
6 -28.19315 -35.13333 -4.791519 -16.36048 -22.63633


In [90]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs6599388, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs6599388, data = total)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.2440 -0.2440 -0.0387  0.3586  1.3586 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        1.24402    0.02051   60.67   <2e-16 ***
TMEM175_rs6599388 -0.60266    0.01739  -34.66   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.5062 on 1694 degrees of freedom
  (7 observations deleted due to missingness)
Multiple R-squared:  0.4149,	Adjusted R-squared:  0.4146 
F-statistic:  1201 on 1 and 1694 DF,  p-value: < 2.2e-16



In [91]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs6599388 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs6599388 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               2.1758085  0.2664038   8.167 6.99e-16 ***
TMEM175_rs34311866_nalls -0.0155229  0.0190741  -0.814  0.41588    
TMEM175_rs6599388        -0.0716701  0.0180552  -3.970 7.57e-05 ***
SEX                      -0.1095720  0.0203843  -5.375 8.95e-08 ***
AGE                       0.0030017  0.0009700   3.094  0.00201 ** 
PC1                       0.0044928  0.0061006   0.736  0.46157    
PC2                       0.0073501  0.0052384   1.403  0.16080    
PC3                      -0.0004471  0.0052311  -0.085  0.93191    
PC4                      -0.0069490  0.0053371  -1.302  0.19313    
PC5                       0.0018644  0.0025604   0.728  0.46664    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gau

Person and conditional analysis with glm (rs74391911)

In [92]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_AJ/AJ_TMEM175 \
--chr 4 \
--from-bp 956095 \
--to-bp 956095 \
--recode A \
--out {WORK_DIR}/TMEM175_AJ/TMEM175_rs74391911 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AJ/TMEM175_rs74391911.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AJ/AJ_TMEM175
  --chr 4
  --double-id
  --from-bp 956095
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AJ/TMEM175_rs74391911
  --recode A
  --to-bp 956095

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 1285 variants loaded from .bim file.
2655 people (1648 males, 1007 females) loaded from .fam.
1703 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 2655 founders and 0 nonfounders present.
Calculating allele frequencies... 1011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980

In [93]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_AJ/TMEM175_rs34311866_nalls.raw',sep=" ")
rs74391911 = pd.read_csv(f'{WORK_DIR}/TMEM175_AJ/TMEM175_rs74391911.raw',sep=" ")

In [94]:
total = pd.merge(rs34311866, rs74391911, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:956095:C:G_G
0,0,APGS_000021_s1,0,0,1,2,2.0,0,0,0,1,2,0.0
1,0,APGS_000475_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
2,0,APGS_000486_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
3,0,APGS_000646_s1,0,0,2,2,1.0,0,0,0,2,2,0.0
4,0,APGS_000808_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2650,0,VIPD_000746_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
2651,0,VIPD_000748_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
2652,0,YMS_000030_s1,0,0,1,2,1.0,0,0,0,1,2,0.0
2653,0,YMS_000031_s1,0,0,1,1,1.0,0,0,0,1,1,0.0


In [95]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_AJ/AJ_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,,,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,,,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,,,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,,,,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,,,,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,,,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,,,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,,,,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,,,,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [96]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:956095:C:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000021_s1,0,0,1,2,2.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-28.028682,-35.735363,-6.580637,-17.501411,-23.026102
1,0,APGS_000475_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-28.154728,-36.708128,-5.730325,-20.750956,-21.729629
2,0,APGS_000486_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.810620,-37.561248,-9.373941,-16.773748,-27.482223
3,0,APGS_000646_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-31.967732,-34.309571,-8.603258,-17.143810,-21.027547
4,0,APGS_000808_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.929420,-33.911640,-9.610071,-18.087631,-24.559511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2650,0,VIPD_000746_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,74.0,2.0,-31.455498,-35.058387,-9.777408,-18.399794,-27.254806
2651,0,VIPD_000748_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,73.0,2.0,-31.319358,-39.960596,-10.932457,-17.389794,-1.963005
2652,0,YMS_000030_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,72.0,2.0,-29.635489,-34.956373,-6.466043,-14.761441,-20.758634
2653,0,YMS_000031_s1,0,0,1,1,1.0,0,0,0,1,1,0.0,0,0,0,1.0,69.0,1.0,-32.862131,-35.306282,-9.166195,-15.693077,-21.088431


In [97]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:956095:C:G_G,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000021_s1,0,0,1,2,2.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-28.028682,-35.735363,-6.580637,-17.501411,-23.026102
1,0,APGS_000475_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-28.154728,-36.708128,-5.730325,-20.750956,-21.729629
2,0,APGS_000486_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.810620,-37.561248,-9.373941,-16.773748,-27.482223
3,0,APGS_000646_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,-31.967732,-34.309571,-8.603258,-17.143810,-21.027547
4,0,APGS_000808_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,-29.929420,-33.911640,-9.610071,-18.087631,-24.559511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2650,0,VIPD_000746_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,74.0,2.0,-31.455498,-35.058387,-9.777408,-18.399794,-27.254806
2651,0,VIPD_000748_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,73.0,2.0,-31.319358,-39.960596,-10.932457,-17.389794,-1.963005
2652,0,YMS_000030_s1,0,0,1,2,1.0,0,0,0,1,2,0.0,0,0,0,1.0,72.0,2.0,-29.635489,-34.956373,-6.466043,-14.761441,-20.758634
2653,0,YMS_000031_s1,0,0,1,1,1.0,0,0,0,1,1,0.0,0,0,0,1.0,69.0,1.0,-32.862131,-35.306282,-9.166195,-15.693077,-21.088431


In [98]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:956095:C:G_G", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [99]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs74391911","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs74391911,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,APGS_000021_s1,2,2.0,0.0,1.0,,-28.028682,-35.735363,-6.580637,-17.501411,-23.026102
1,APGS_000475_s1,2,0.0,0.0,1.0,,-28.154728,-36.708128,-5.730325,-20.750956,-21.729629
2,APGS_000486_s1,2,0.0,0.0,1.0,,-29.810620,-37.561248,-9.373941,-16.773748,-27.482223
3,APGS_000646_s1,2,1.0,0.0,2.0,,-31.967732,-34.309571,-8.603258,-17.143810,-21.027547
4,APGS_000808_s1,2,0.0,0.0,1.0,,-29.929420,-33.911640,-9.610071,-18.087631,-24.559511
...,...,...,...,...,...,...,...,...,...,...,...
2650,VIPD_000746_s1,2,1.0,0.0,1.0,74.0,-31.455498,-35.058387,-9.777408,-18.399794,-27.254806
2651,VIPD_000748_s1,2,1.0,0.0,1.0,73.0,-31.319358,-39.960596,-10.932457,-17.389794,-1.963005
2652,YMS_000030_s1,2,1.0,0.0,1.0,72.0,-29.635489,-34.956373,-6.466043,-14.761441,-20.758634
2653,YMS_000031_s1,1,1.0,0.0,1.0,69.0,-32.862131,-35.306282,-9.166195,-15.693077,-21.088431


In [100]:
total.to_csv(f'{WORK_DIR}/TMEM175_AJ/TMEM175_rs34311866_nalls_rs74391911.csv')

In [102]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_AJ/TMEM175_rs34311866_nalls_rs74391911.csv")

In [103]:
%%R
head(total)

  X            IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs74391911 SEX
1 0 APGS_000021_s1         2                        2                  0   1
2 1 APGS_000475_s1         2                        0                  0   1
3 2 APGS_000486_s1         2                        0                  0   1
4 3 APGS_000646_s1         2                        1                  0   2
5 4 APGS_000808_s1         2                        0                  0   1
6 5 APGS_001059_s1         2                        2                  0   1
  AGE       PC1       PC2       PC3       PC4       PC5
1  NA -28.02868 -35.73536 -6.580637 -17.50141 -23.02610
2  NA -28.15473 -36.70813 -5.730325 -20.75096 -21.72963
3  NA -29.81062 -37.56125 -9.373941 -16.77375 -27.48222
4  NA -31.96773 -34.30957 -8.603258 -17.14381 -21.02755
5  NA -29.92942 -33.91164 -9.610071 -18.08763 -24.55951
6  NA -28.19315 -35.13333 -4.791519 -16.36048 -22.63633


In [104]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs74391911, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs74391911, data = total)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6832 -0.6832  0.3168  0.3168  1.3168 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)         0.68316    0.01621  42.142  < 2e-16 ***
TMEM175_rs74391911 -0.35884    0.10969  -3.271  0.00109 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.6599 on 1692 degrees of freedom
  (9 observations deleted due to missingness)
Multiple R-squared:  0.006285,	Adjusted R-squared:  0.005698 
F-statistic:  10.7 on 1 and 1692 DF,  p-value: 0.001092



In [105]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs74391911 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs74391911 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               2.0268287  0.2643574   7.667 3.28e-14 ***
TMEM175_rs34311866_nalls  0.0290173  0.0145414   1.995  0.04618 *  
TMEM175_rs74391911       -0.3015576  0.0687406  -4.387 1.24e-05 ***
SEX                      -0.1092647  0.0203588  -5.367 9.37e-08 ***
AGE                       0.0030782  0.0009690   3.177  0.00152 ** 
PC1                       0.0026219  0.0060937   0.430  0.66707    
PC2                       0.0076134  0.0052298   1.456  0.14568    
PC3                       0.0006995  0.0052268   0.134  0.89357    
PC4                      -0.0068997  0.0053302  -1.294  0.19573    
PC5                       0.0011696  0.0025393   0.461  0.64515    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for ga

Person and conditional analysis with glm (rs6856425)

In [106]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

command = f"""
/home/jupyter/tools/plink \
--bfile {WORK_DIR}/TMEM175_AAC/AAC_TMEM175 \
--chr 4 \
--from-bp 983130 \
--to-bp 983130 \
--recode A \
--out {WORK_DIR}/TMEM175_AAC/TMEM175_rs6856425 \
--double-id
"""

!{command}

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AAC/TMEM175_rs6856425.log.
Options in effect:
  --bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AAC/AAC_TMEM175
  --chr 4
  --double-id
  --from-bp 983130
  --out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_AAC/TMEM175_rs6856425
  --recode A
  --to-bp 983130

52223 MB RAM detected; reserving 26111 MB for main workspace.
1 out of 3449 variants loaded from .bim file.
1111 people (455 males, 656 females) loaded from .fam.
1086 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 1111 founders and 0 nonfounders present.
Calculating allele frequencies... 1011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980

In [107]:
rs34311866 = pd.read_csv(f'{WORK_DIR}/TMEM175_AAC/TMEM175_rs34311866_nalls.raw',sep=" ")
rs6856425 = pd.read_csv(f'{WORK_DIR}/TMEM175_AAC/TMEM175_rs6856425.raw',sep=" ")

In [108]:
total = pd.merge(rs34311866, rs6856425, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:983130:T:C_C
0,0,BCM_000142_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
1,0,BCM_000150_s1,0,0,2,2,0.0,0,0,0,2,2,0.0
2,0,BCM_000204_s1,0,0,2,2,0.0,0,0,0,2,2,0.0
3,0,BCM_000215_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
4,0,BCM_000270_s1,0,0,2,1,0.0,0,0,0,2,1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106,0,UPENNU19_000409_s1,0,0,1,2,0.0,0,0,0,1,2,0.0
1107,0,UPENNU19_000432_s1,0,0,2,2,1.0,0,0,0,2,2,0.0
1108,0,VIPD_000151_s1,0,0,2,2,0.0,0,0,0,2,2,1.0
1109,0,VIPD_000679_s1,0,0,2,2,1.0,0,0,0,2,2,0.0


In [109]:
cov = pd.read_csv(f'{WORK_DIR}/TMEM175_AAC/AAC_covariate_file.txt', sep = '\t')
cov

Unnamed: 0,FID,IID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,APGS_000002_s1,0,0,,,,-29.483298,-38.473005,-9.398904,-10.842865,25.744648
1,0,APGS_000003_s1,0,0,,,,-33.068958,-34.288862,-11.116951,-8.056146,22.924043
2,0,APGS_000004_s1,0,0,,,,-28.204441,-30.969481,-15.883093,-7.004351,18.647007
3,0,APGS_000005_s1,0,0,,,,-35.039753,-39.565822,-12.522753,-9.339179,21.956533
4,0,APGS_000006_s1,0,0,,,,-30.279907,-34.988905,-10.664628,-7.407177,26.357548
...,...,...,...,...,...,...,...,...,...,...,...,...
58204,0,YMS_000049_s1,0,0,,,,-33.320502,-37.170809,-9.867466,-6.818637,18.344976
58205,0,YMS_000050_s1,0,0,,,,-33.550740,-38.934634,-13.550840,-11.161991,26.601555
58206,0,YMS_000051_s1,0,0,,,,-36.415950,-36.634710,-9.623981,-8.333470,19.165537
58207,0,YMS_000052_s1,0,0,1.0,53.0,1.0,55.200734,-12.367759,-2.246785,-0.681236,5.281365


In [110]:
total = pd.merge(total, cov, on="IID")
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:983130:T:C_C,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,BCM_000142_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,42.344694,-18.080400,-4.379556,-4.231518,11.945426
1,0,BCM_000150_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,46.211997,-16.728937,-2.939694,-4.306565,5.530103
2,0,BCM_000204_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,53.796533,-13.345479,-2.312777,-4.141204,3.542342
3,0,BCM_000215_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,33.149467,-17.891235,-5.542113,-2.706507,6.964059
4,0,BCM_000270_s1,0,0,2,1,0.0,0,0,0,2,1,0.0,0,0,0,2.0,,1.0,44.397319,-15.104927,-4.648018,-3.126988,7.345741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106,0,UPENNU19_000409_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,79.386111,2.0,59.974838,-11.604280,-4.482499,-2.758223,3.541477
1107,0,UPENNU19_000432_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,77.661111,2.0,55.937250,-11.575462,-5.033753,-2.485275,6.130544
1108,0,VIPD_000151_s1,0,0,2,2,0.0,0,0,0,2,2,1.0,0,0,0,2.0,63.000000,2.0,45.339968,-18.329921,-5.496370,-0.623469,2.827065
1109,0,VIPD_000679_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,60.000000,2.0,24.426378,-24.074607,-5.665107,-6.488002,12.017162


In [111]:
total = total[(total['PHENOTYPE_x']==1) | (total['PHENOTYPE_x']==2)]
total

Unnamed: 0,FID_x,IID,PAT_x,MAT_x,SEX_x,PHENOTYPE_x,chr4:958159:T:C_C,FID_y,PAT_y,MAT_y,SEX_y,PHENOTYPE_y,chr4:983130:T:C_C,FID,FATID,MATID,SEX,AGE,PHENO,PC1,PC2,PC3,PC4,PC5
0,0,BCM_000142_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,42.344694,-18.080400,-4.379556,-4.231518,11.945426
1,0,BCM_000150_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,46.211997,-16.728937,-2.939694,-4.306565,5.530103
2,0,BCM_000204_s1,0,0,2,2,0.0,0,0,0,2,2,0.0,0,0,0,2.0,,2.0,53.796533,-13.345479,-2.312777,-4.141204,3.542342
3,0,BCM_000215_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,,2.0,33.149467,-17.891235,-5.542113,-2.706507,6.964059
4,0,BCM_000270_s1,0,0,2,1,0.0,0,0,0,2,1,0.0,0,0,0,2.0,,1.0,44.397319,-15.104927,-4.648018,-3.126988,7.345741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106,0,UPENNU19_000409_s1,0,0,1,2,0.0,0,0,0,1,2,0.0,0,0,0,1.0,79.386111,2.0,59.974838,-11.604280,-4.482499,-2.758223,3.541477
1107,0,UPENNU19_000432_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,77.661111,2.0,55.937250,-11.575462,-5.033753,-2.485275,6.130544
1108,0,VIPD_000151_s1,0,0,2,2,0.0,0,0,0,2,2,1.0,0,0,0,2.0,63.000000,2.0,45.339968,-18.329921,-5.496370,-0.623469,2.827065
1109,0,VIPD_000679_s1,0,0,2,2,1.0,0,0,0,2,2,0.0,0,0,0,2.0,60.000000,2.0,24.426378,-24.074607,-5.665107,-6.488002,12.017162


In [112]:
total = total[["IID",'PHENOTYPE_x',"chr4:958159:T:C_C","chr4:983130:T:C_C", "SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]]

In [113]:
total.columns = ["IID",'PHENOTYPE',"TMEM175_rs34311866_nalls","TMEM175_rs6856425","SEX", "AGE", "PC1","PC2","PC3","PC4","PC5"]
total

Unnamed: 0,IID,PHENOTYPE,TMEM175_rs34311866_nalls,TMEM175_rs6856425,SEX,AGE,PC1,PC2,PC3,PC4,PC5
0,BCM_000142_s1,2,0.0,0.0,1.0,,42.344694,-18.080400,-4.379556,-4.231518,11.945426
1,BCM_000150_s1,2,0.0,0.0,2.0,,46.211997,-16.728937,-2.939694,-4.306565,5.530103
2,BCM_000204_s1,2,0.0,0.0,2.0,,53.796533,-13.345479,-2.312777,-4.141204,3.542342
3,BCM_000215_s1,2,0.0,0.0,1.0,,33.149467,-17.891235,-5.542113,-2.706507,6.964059
4,BCM_000270_s1,1,0.0,0.0,2.0,,44.397319,-15.104927,-4.648018,-3.126988,7.345741
...,...,...,...,...,...,...,...,...,...,...,...
1106,UPENNU19_000409_s1,2,0.0,0.0,1.0,79.386111,59.974838,-11.604280,-4.482499,-2.758223,3.541477
1107,UPENNU19_000432_s1,2,1.0,0.0,2.0,77.661111,55.937250,-11.575462,-5.033753,-2.485275,6.130544
1108,VIPD_000151_s1,2,0.0,1.0,2.0,63.000000,45.339968,-18.329921,-5.496370,-0.623469,2.827065
1109,VIPD_000679_s1,2,1.0,0.0,2.0,60.000000,24.426378,-24.074607,-5.665107,-6.488002,12.017162


In [114]:
total.to_csv(f'{WORK_DIR}/TMEM175_AAC/TMEM175_rs34311866_nalls_rs6856425.csv')

In [115]:
%%R
total <- read.csv("~/workspace/ws_files/TMEM175/TMEM175_AAC/TMEM175_rs34311866_nalls_rs6856425.csv")

In [116]:
%%R
head(total)

  X           IID PHENOTYPE TMEM175_rs34311866_nalls TMEM175_rs6856425 SEX AGE
1 0 BCM_000142_s1         2                        0                 0   1  NA
2 1 BCM_000150_s1         2                        0                 0   2  NA
3 2 BCM_000204_s1         2                        0                 0   2  NA
4 3 BCM_000215_s1         2                        0                 0   1  NA
5 4 BCM_000270_s1         1                        0                 0   2  NA
6 5 BCM_000277_s1         1                        1                 0   2  NA
       PC1       PC2       PC3       PC4        PC5
1 42.34469 -18.08040 -4.379556 -4.231518 11.9454258
2 46.21200 -16.72894 -2.939694 -4.306565  5.5301026
3 53.79653 -13.34548 -2.312777 -4.141204  3.5423418
4 33.14947 -17.89123 -5.542113 -2.706507  6.9640586
5 44.39732 -15.10493 -4.648018 -3.126988  7.3457406
6 63.11613 -11.49276 -2.482421 -1.453234  0.2000133


In [117]:
%%R

correlation.model <- lm(TMEM175_rs34311866_nalls ~ TMEM175_rs6856425, data = total)
summary(correlation.model)


Call:
lm(formula = TMEM175_rs34311866_nalls ~ TMEM175_rs6856425, data = total)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.11449 -0.11449 -0.11449 -0.04595  1.88551 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        0.11449    0.01059  10.812  < 2e-16 ***
TMEM175_rs6856425 -0.06854    0.01842  -3.721 0.000209 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2975 on 1054 degrees of freedom
  (30 observations deleted due to missingness)
Multiple R-squared:  0.01297,	Adjusted R-squared:  0.01203 
F-statistic: 13.85 on 1 and 1054 DF,  p-value: 0.0002089



In [118]:
%%R
dep_corr<- glm(PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs6856425 + SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data=total)
summary(dep_corr)


Call:
glm(formula = PHENOTYPE ~ TMEM175_rs34311866_nalls + TMEM175_rs6856425 + 
    SEX + AGE + PC1 + PC2 + PC3 + PC4 + PC5, data = total)

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               2.410913   0.116470  20.700  < 2e-16 ***
TMEM175_rs34311866_nalls -0.017839   0.041021  -0.435 0.663766    
TMEM175_rs6856425         0.073564   0.025318   2.906 0.003756 ** 
SEX                      -0.101059   0.025386  -3.981 7.42e-05 ***
AGE                       0.003901   0.001211   3.220 0.001327 ** 
PC1                      -0.017289   0.001316 -13.133  < 2e-16 ***
PC2                       0.015487   0.004307   3.595 0.000342 ***
PC3                      -0.009166   0.004893  -1.873 0.061341 .  
PC4                       0.006983   0.006148   1.136 0.256380    
PC5                      -0.044488   0.005483  -8.113 1.63e-15 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gaussian famil

GCTA conditional analysis

In [6]:
#Read in summary statistics
sumstats = pd.read_csv(f'{WORK_DIR}/TMEM175_EUR/EUR.all_adj.csv')
sumstats

Unnamed: 0.1,Unnamed: 0,#CHROM,POS,ID,REF,ALT,PROVISIONAL_REF?,A1,OMITTED,A1_FREQ,FIRTH?,TEST,OBS_CT,OR,LOG(OR)_SE,L95,U95,Z_STAT,P,ERRCODE
0,0,4,882530,chr4:882530:C:T,C,T,Y,T,C,0.164467,N,ADD,20837,1.034410,0.030398,0.974580,1.097910,1.112890,0.265757,.
1,8,4,882611,chr4:882611:T:C,T,C,Y,C,T,0.051185,N,ADD,20885,0.931946,0.050102,0.844780,1.028110,-1.406740,0.159504,.
2,16,4,883018,chr4:883018:G:A,G,A,Y,A,G,0.050599,N,ADD,20880,0.928749,0.050369,0.841441,1.025120,-1.467500,0.142240,.
3,24,4,883133,chr4:883133:G:A,G,A,Y,A,G,0.012734,N,ADD,20811,0.809012,0.096557,0.669523,0.977562,-2.194990,0.028164,.
4,32,4,883483,chr4:883483:G:A,G,A,Y,A,G,0.035357,N,ADD,20901,0.883241,0.058911,0.786926,0.991344,-2.107530,0.035071,.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,3136,4,1007954,chr4:1007954:T:C,T,C,Y,T,C,0.279743,N,ADD,18971,0.913461,0.025919,0.868217,0.961063,-3.492280,0.000479,.
393,3144,4,1008009,chr4:1008009:C:T,C,T,Y,C,T,0.444961,N,ADD,18814,0.949197,0.023666,0.906174,0.994262,-2.203110,0.027587,.
394,3152,4,1008209,chr4:1008209:C:T,C,T,Y,T,C,0.036468,N,ADD,20717,1.002060,0.059756,0.891315,1.126570,0.034499,0.972480,.
395,3160,4,1008337,chr4:1008337:C:T,C,T,Y,C,T,0.279831,N,ADD,18965,0.913397,0.025912,0.868167,0.960983,-3.495880,0.000473,.


In [8]:
#Format summary statistics for GCTA-COJO
#First get the log odds ratio - this is required for COJO
#1) For a case-control study, the effect size should be log(odds ratio) with its corresponding standard error.
sumstats_formatted = sumstats.copy()
sumstats_formatted['b'] = np.log(sumstats_formatted['OR'])

In [9]:
sumstats_formatted

Unnamed: 0.1,Unnamed: 0,#CHROM,POS,ID,REF,ALT,PROVISIONAL_REF?,A1,OMITTED,A1_FREQ,FIRTH?,TEST,OBS_CT,OR,LOG(OR)_SE,L95,U95,Z_STAT,P,ERRCODE,b
0,0,4,882530,chr4:882530:C:T,C,T,Y,T,C,0.164467,N,ADD,20837,1.034410,0.030398,0.974580,1.097910,1.112890,0.265757,.,0.033831
1,8,4,882611,chr4:882611:T:C,T,C,Y,C,T,0.051185,N,ADD,20885,0.931946,0.050102,0.844780,1.028110,-1.406740,0.159504,.,-0.070480
2,16,4,883018,chr4:883018:G:A,G,A,Y,A,G,0.050599,N,ADD,20880,0.928749,0.050369,0.841441,1.025120,-1.467500,0.142240,.,-0.073917
3,24,4,883133,chr4:883133:G:A,G,A,Y,A,G,0.012734,N,ADD,20811,0.809012,0.096557,0.669523,0.977562,-2.194990,0.028164,.,-0.211942
4,32,4,883483,chr4:883483:G:A,G,A,Y,A,G,0.035357,N,ADD,20901,0.883241,0.058911,0.786926,0.991344,-2.107530,0.035071,.,-0.124157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,3136,4,1007954,chr4:1007954:T:C,T,C,Y,T,C,0.279743,N,ADD,18971,0.913461,0.025919,0.868217,0.961063,-3.492280,0.000479,.,-0.090515
393,3144,4,1008009,chr4:1008009:C:T,C,T,Y,C,T,0.444961,N,ADD,18814,0.949197,0.023666,0.906174,0.994262,-2.203110,0.027587,.,-0.052139
394,3152,4,1008209,chr4:1008209:C:T,C,T,Y,T,C,0.036468,N,ADD,20717,1.002060,0.059756,0.891315,1.126570,0.034499,0.972480,.,0.002058
395,3160,4,1008337,chr4:1008337:C:T,C,T,Y,C,T,0.279831,N,ADD,18965,0.913397,0.025912,0.868167,0.960983,-3.495880,0.000473,.,-0.090585


In [10]:
#Now select just the necessary columns for COJO
sumstats_export = sumstats_formatted[['ID', 'A1', 'OMITTED', 'A1_FREQ', 'b', 'LOG(OR)_SE', 'P', 'OBS_CT']].copy()

In [11]:
sumstats_export

Unnamed: 0,ID,A1,OMITTED,A1_FREQ,b,LOG(OR)_SE,P,OBS_CT
0,chr4:882530:C:T,T,C,0.164467,0.033831,0.030398,0.265757,20837
1,chr4:882611:T:C,C,T,0.051185,-0.070480,0.050102,0.159504,20885
2,chr4:883018:G:A,A,G,0.050599,-0.073917,0.050369,0.142240,20880
3,chr4:883133:G:A,A,G,0.012734,-0.211942,0.096557,0.028164,20811
4,chr4:883483:G:A,A,G,0.035357,-0.124157,0.058911,0.035071,20901
...,...,...,...,...,...,...,...,...
392,chr4:1007954:T:C,T,C,0.279743,-0.090515,0.025919,0.000479,18971
393,chr4:1008009:C:T,C,T,0.444961,-0.052139,0.023666,0.027587,18814
394,chr4:1008209:C:T,T,C,0.036468,0.002058,0.059756,0.972480,20717
395,chr4:1008337:C:T,C,T,0.279831,-0.090585,0.025912,0.000473,18965


In [12]:
#Rename columns following COJO format
sumstats_export = sumstats_export.rename(columns = {'ID':'SNP', 'OMITTED':'A2', 'A1_FREQ':'freq', 'LOG(OR)_SE':'se', 'P':'p', 'OBS_CT':'N'})

In [13]:
sumstats_export

Unnamed: 0,SNP,A1,A2,freq,b,se,p,N
0,chr4:882530:C:T,T,C,0.164467,0.033831,0.030398,0.265757,20837
1,chr4:882611:T:C,C,T,0.051185,-0.070480,0.050102,0.159504,20885
2,chr4:883018:G:A,A,G,0.050599,-0.073917,0.050369,0.142240,20880
3,chr4:883133:G:A,A,G,0.012734,-0.211942,0.096557,0.028164,20811
4,chr4:883483:G:A,A,G,0.035357,-0.124157,0.058911,0.035071,20901
...,...,...,...,...,...,...,...,...
392,chr4:1007954:T:C,T,C,0.279743,-0.090515,0.025919,0.000479,18971
393,chr4:1008009:C:T,C,T,0.444961,-0.052139,0.023666,0.027587,18814
394,chr4:1008209:C:T,T,C,0.036468,0.002058,0.059756,0.972480,20717
395,chr4:1008337:C:T,C,T,0.279831,-0.090585,0.025912,0.000473,18965


In [14]:
#Export
sumstats_export.to_csv(f'{WORK_DIR}/TMEM175_EUR/EUR.all_adj.sumstats.ma', sep = '\t', index=False)

In [15]:
! wget https://yanglab.westlake.edu.cn/software/gcta/bin/gcta-1.94.1-linux-kernel-3-x86_64.zip

--2024-08-07 11:14:11--  https://yanglab.westlake.edu.cn/software/gcta/bin/gcta-1.94.1-linux-kernel-3-x86_64.zip
Resolving yanglab.westlake.edu.cn (yanglab.westlake.edu.cn)... 124.160.108.195, 42.247.30.189, 2001:250:6413:1002:250:56ff:10:195
Connecting to yanglab.westlake.edu.cn (yanglab.westlake.edu.cn)|124.160.108.195|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 14333133 (14M) [application/zip]
Saving to: ‘gcta-1.94.1-linux-kernel-3-x86_64.zip’


2024-08-07 11:14:32 (824 KB/s) - ‘gcta-1.94.1-linux-kernel-3-x86_64.zip’ saved [14333133/14333133]



In [16]:
! unzip gcta-1.94.1-linux-kernel-3-x86_64.zip

Archive:  gcta-1.94.1-linux-kernel-3-x86_64.zip
   creating: gcta-1.94.1-linux-kernel-3-x86_64/
  inflating: __MACOSX/._gcta-1.94.1-linux-kernel-3-x86_64  
  inflating: gcta-1.94.1-linux-kernel-3-x86_64/gcta64  
  inflating: __MACOSX/gcta-1.94.1-linux-kernel-3-x86_64/._gcta64  
  inflating: gcta-1.94.1-linux-kernel-3-x86_64/.DS_Store  
  inflating: __MACOSX/gcta-1.94.1-linux-kernel-3-x86_64/._.DS_Store  
  inflating: gcta-1.94.1-linux-kernel-3-x86_64/test.bim  
  inflating: __MACOSX/gcta-1.94.1-linux-kernel-3-x86_64/._test.bim  
  inflating: gcta-1.94.1-linux-kernel-3-x86_64/MIT_License.txt  
  inflating: __MACOSX/gcta-1.94.1-linux-kernel-3-x86_64/._MIT_License.txt  
  inflating: gcta-1.94.1-linux-kernel-3-x86_64/test.fam  
  inflating: __MACOSX/gcta-1.94.1-linux-kernel-3-x86_64/._test.fam  
  inflating: gcta-1.94.1-linux-kernel-3-x86_64/README.txt  
  inflating: __MACOSX/gcta-1.94.1-linux-kernel-3-x86_64/._README.txt  
  inflating: gcta-1.94.1-linux-kernel-3-x86_64/test.bed  
  inflat

In [17]:
! mv ~/workspace/ws_files/gcta-1.94.1-linux-kernel-3-x86_64 /home/jupyter/tools/

In [18]:
! ls /home/jupyter/tools/

LICENSE				   plink			    prettify
annovar				   plink2			    rvtests
annovar.latest.tar.gz		   plink2_linux_x86_64_latest.zip   toy.map
gcta-1.94.1-linux-kernel-3-x86_64  plink_linux_x86_64_20190304.zip  toy.ped


In [20]:
! /home/jupyter/tools/gcta-1.94.1-linux-kernel-3-x86_64/gcta64 --version

/bin/bash: line 1: /home/jupyter/tools/gcta-1.94.1-linux-kernel-3-x86_64/gcta64: Permission denied


In [21]:
! chmod u+x /home/jupyter/tools/gcta-1.94.1-linux-kernel-3-x86_64/gcta64

In [24]:
! /home/jupyter/tools/gcta-1.94.1-linux-kernel-3-x86_64/gcta64 --version

[0;32m[0m*******************************************************************
[0;32m[0m* Genome-wide Complex Trait Analysis (GCTA)
[0;32m[0m* version v1.94.1 Linux
[0;32m[0m* Built at Nov 15 2022 21:14:25, by GCC 8.5
[0;32m[0m* (C) 2010-present, Yang Lab, Westlake University
[0;32m[0m* Please report bugs to Jian Yang <jian.yang@westlake.edu.cn>
[0;32m[0m*******************************************************************
[0;32mAnalysis started [0mat 11:22:48 UTC on Wed Aug 07 2024.
[0;32m[0mHostname: jupyterlabvertexai20240708
[0;32m[0m
[0;31mError: [0mthe --out option is missing.
[0m[0mAn error occurs, please check the options or data


In [25]:
#Select multiple associated SNPs based on significance p-value 5e-08
#Can change the p-value for significance if needed
#bfile is referring to the full dataset in plink binary format, e.g. GP2 - whatever you used to run the GWAS
! /home/jupyter/tools/gcta-1.94.1-linux-kernel-3-x86_64/gcta64 --bfile {WORK_DIR}/TMEM175_EUR/EUR_TMEM175 \
--maf 0.01 \
--cojo-file {WORK_DIR}/TMEM175_EUR/EUR.all_adj.sumstats.ma \
--cojo-p 5e-8 \
--cojo-slct \
--out {WORK_DIR}/TMEM175_EUR/EUR.all_adj.COJO

[0;32m[0m*******************************************************************
[0;32m[0m* Genome-wide Complex Trait Analysis (GCTA)
[0;32m[0m* version v1.94.1 Linux
[0;32m[0m* Built at Nov 15 2022 21:14:25, by GCC 8.5
[0;32m[0m* (C) 2010-present, Yang Lab, Westlake University
[0;32m[0m* Please report bugs to Jian Yang <jian.yang@westlake.edu.cn>
[0;32m[0m*******************************************************************
[0;32mAnalysis started [0mat 11:25:36 UTC on Wed Aug 07 2024.
[0;32m[0mHostname: jupyterlabvertexai20240708
[0;32m[0m
Accepted options:
--bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175
--maf 0.01
--cojo-file /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR.all_adj.sumstats.ma
--cojo-p 5e-08
--cojo-slct
--out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR.all_adj.COJO


Reading PLINK FAM file from [/home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175.fam].
38839 individuals to be included from [

In [28]:
! head {WORK_DIR}/TMEM175_EUR/EUR.all_adj.COJO.cma.cojo

Chr	SNP	bp	refA	freq	b	se	p	n	freq_geno	bC	bC_se	pC
4	chr4:882530:C:T	882530	T	0.164467	0.0338312	0.0303975	0.265725	20489.9	0.164537	-0.0279995	0.0303977	0.356994
4	chr4:882611:T:C	882611	C	0.0511851	-0.0704804	0.0501022	0.159507	21340.5	0.0504798	-0.0330724	0.0501033	0.5092
4	chr4:883018:G:A	883018	A	0.0505987	-0.0739168	0.0503694	0.142242	21346	0.0499974	-0.0364219	0.0503708	0.469634
4	chr4:883133:G:A	883133	A	0.0127337	-0.211942	0.0965569	0.0281642	22193.8	0.0132536	-0.170603	0.0965652	0.0772761
4	chr4:883483:G:A	883483	A	0.0353572	-0.124157	0.0589113	0.0350719	21976.2	0.0366916	-0.0786346	0.0589159	0.181977
4	chr4:883624:C:T	883624	T	0.181816	0.0947191	0.0295052	0.00132619	20080.8	0.179007	-0.0154726	0.029512	0.600084
4	chr4:883837:G:A	883837	A	0.400635	-0.031951	0.022935	0.163586	20597	0.404776	-0.028599	0.0229355	0.212423
4	chr4:883921:T:C	883921	C	0.448883	-0.0438296	0.02274	0.0539267	20335.1	0.452722	-0.0401386	0.0227415	0.0775649
4	chr4:884131:C:T	884131	T	0.0117174	0.0247414

In [29]:
! head {WORK_DIR}/TMEM175_EUR/EUR.all_adj.COJO.jma.cojo

Chr	SNP	bp	refA	freq	b	se	p	n	freq_geno	bJ	bJ_se	pJ	LD_r
4	chr4:958159:T:C	958159	C	0.203811	0.258912	0.0290424	4.87943e-19	18930.3	0.198229	0.258912	0.0291025	5.76097e-19	0


In [30]:
! head {WORK_DIR}/TMEM175_EUR/EUR.all_adj.COJO.ldr.cojo

SNP	chr4:958159:T:C	
chr4:958159:T:C	1	


In [31]:
! head {WORK_DIR}/TMEM175_EUR/EUR.all_adj.COJO.log

*******************************************************************
* Genome-wide Complex Trait Analysis (GCTA)
* version v1.94.1 Linux
* Built at Nov 15 2022 21:14:25, by GCC 8.5
* (C) 2010-present, Yang Lab, Westlake University
* Please report bugs to Jian Yang <jian.yang@westlake.edu.cn>
*******************************************************************
Analysis started at 11:25:36 UTC on Wed Aug 07 2024.
Hostname: jupyterlabvertexai20240708



In [5]:
#Select multiple associated SNPs based on significance p-value 5e-08
#Can change the p-value for significance if needed
#bfile is referring to the full dataset in plink binary format, e.g. GP2 - whatever you used to run the GWAS
! /home/jupyter/tools/gcta-1.94.1-linux-kernel-3-x86_64/gcta64 --bfile {WORK_DIR}/TMEM175_EUR/EUR_TMEM175 \
--maf 0.01 \
--cojo-file {WORK_DIR}/TMEM175_EUR/EUR.all_adj.sumstats.ma \
--cojo-p 4.59e-4 \
--cojo-slct \
--out {WORK_DIR}/TMEM175_EUR/EUR.all_adj.LDprune.COJO

[0;32m[0m*******************************************************************
[0;32m[0m* Genome-wide Complex Trait Analysis (GCTA)
[0;32m[0m* version v1.94.1 Linux
[0;32m[0m* Built at Nov 15 2022 21:14:25, by GCC 8.5
[0;32m[0m* (C) 2010-present, Yang Lab, Westlake University
[0;32m[0m* Please report bugs to Jian Yang <jian.yang@westlake.edu.cn>
[0;32m[0m*******************************************************************
[0;32mAnalysis started [0mat 12:48:26 UTC on Tue Aug 20 2024.
[0;32m[0mHostname: jupyterlabvertexai20240708
[0;32m[0m
Accepted options:
--bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175
--maf 0.01
--cojo-file /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR.all_adj.sumstats.ma
--cojo-p 0.000459
--cojo-slct
--out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR.all_adj.LDprune.COJO


Reading PLINK FAM file from [/home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175.fam].
38839 individuals to be incl

In [32]:
#Or you can run COJO and select just the top 10 independent SNPs (can be below GWAS significance p-value)
#Select top 10 SNPs
! /home/jupyter/tools/gcta-1.94.1-linux-kernel-3-x86_64/gcta64 --bfile {WORK_DIR}/TMEM175_EUR/EUR_TMEM175 \
--maf 0.01 \
--cojo-file {WORK_DIR}/TMEM175_EUR/EUR.all_adj.sumstats.ma \
--cojo-top-SNPs 10 \
--out {WORK_DIR}/TMEM175_EUR/EUR.all_adj.top10

[0;32m[0m*******************************************************************
[0;32m[0m* Genome-wide Complex Trait Analysis (GCTA)
[0;32m[0m* version v1.94.1 Linux
[0;32m[0m* Built at Nov 15 2022 21:14:25, by GCC 8.5
[0;32m[0m* (C) 2010-present, Yang Lab, Westlake University
[0;32m[0m* Please report bugs to Jian Yang <jian.yang@westlake.edu.cn>
[0;32m[0m*******************************************************************
[0;32mAnalysis started [0mat 11:33:10 UTC on Wed Aug 07 2024.
[0;32m[0mHostname: jupyterlabvertexai20240708
[0;32m[0m
Accepted options:
--bfile /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175
--maf 0.01
--cojo-file /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR.all_adj.sumstats.ma
--cojo-top-SNPs 10
--out /home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR.all_adj.top10


Reading PLINK FAM file from [/home/jupyter/workspace/ws_files/TMEM175//TMEM175_EUR/EUR_TMEM175.fam].
38839 individuals to be included from [/home/j

In [33]:
! head {WORK_DIR}/TMEM175_EUR/EUR.all_adj.top10.cma.cojo

Chr	SNP	bp	refA	freq	b	se	p	n	freq_geno	bC	bC_se	pC
4	chr4:882530:C:T	882530	T	0.164467	0.0338312	0.0303975	0.265725	20489.9	0.164537	-0.277937	0.105335	0.00832474
4	chr4:882611:T:C	882611	C	0.0511851	-0.0704804	0.0501022	0.159507	21340.5	0.0504798	-0.122516	0.0814926	0.132737
4	chr4:883018:G:A	883018	A	0.0505987	-0.0739168	0.0503694	0.142242	21346	0.0499974	-0.114817	0.0418044	0.00602305
4	chr4:883483:G:A	883483	A	0.0353572	-0.124157	0.0589113	0.0350719	21976.2	0.0366916	0.177534	0.0999408	0.075669
4	chr4:883624:C:T	883624	T	0.181816	0.0947191	0.0295052	0.00132619	20080.8	0.179007	-0.0724453	0.0497149	0.145057
4	chr4:883837:G:A	883837	A	0.400635	-0.031951	0.022935	0.163586	20597	0.404776	0.282062	0.0325328	4.31799e-18
4	chr4:883921:T:C	883921	C	0.448883	-0.0438296	0.02274	0.0539267	20335.1	0.452722	0.110611	0.0509097	0.0298032
4	chr4:884131:C:T	884131	T	0.0117174	0.0247414	0.104187	0.812291	20698.7	0.0122093	-0.136355	0.0806028	0.0907057
4	chr4:884260:C:T	884260	T	0.335987	0.0328447	0

In [34]:
! head {WORK_DIR}/TMEM175_EUR/EUR.all_adj.top10.jma.cojo

Chr	SNP	bp	refA	freq	b	se	p	n	freq_geno	bJ	bJ_se	pJ	LD_r
4	chr4:883133:G:A	883133	A	0.0127337	-0.211942	0.0965569	0.0281642	22193.8	0.0132536	-0.277937	0.105335	0.00832474	-0.0174097
4	chr4:905058:T:C	905058	C	0.0220098	0.103396	0.0776335	0.182912	20053.8	0.0211291	-0.122516	0.0814926	0.132737	-0.0460043
4	chr4:931588:C:T	931588	T	0.098972	-0.14224	0.0364747	9.63155e-05	21915.3	0.0994334	-0.114817	0.0418044	0.00602305	0.372451
4	chr4:933370:G:A	933370	A	0.0153177	0.0255118	0.0925403	0.782792	20143.3	0.0149901	0.177534	0.0999408	0.075669	-0.0298242
4	chr4:939809:A:G	939809	G	0.0536428	-0.111124	0.048938	0.0231648	21395.3	0.0540681	-0.0724453	0.0497149	0.145057	-0.117727
4	chr4:958159:T:C	958159	C	0.203811	0.258912	0.0290424	4.87943e-19	18930.3	0.198229	0.282062	0.0325328	4.31799e-18	-0.121267
4	chr4:983473:C:T	983473	T	0.0623297	0.02634	0.0460307	0.567168	21010.5	0.063255	0.110611	0.0509097	0.0298032	-0.0387902
4	chr4:988557:C:T	988557	T	0.022547	0.0935541	0.076603	0.221978	20117.6	0.02

use p threshould 4.59e-4

In [12]:
ancestries = {'AAC','AFR','AJ','AMR','CAS','EAS','EUR','FIN','MDE','SAS','CAH'}

for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    
    #Select multiple associated SNPs based on LD pruning p-value 0.00238
    #Can change the p-value for significance if needed
    #bfile is referring to the full dataset in plink binary format, e.g. GP2 - whatever you used to run the GWAS
    ! /home/jupyter/tools/gcta-1.94.1-linux-kernel-3-x86_64/gcta64 \
    --bfile {WORK_DIR}/{ancestry}_TMEM175 \
    --maf 0.01 \
    --cojo-file {WORK_DIR}/{ancestry}.all_adj.sumstats.ma \
    --cojo-p 4.59e-4 \
    --cojo-slct \
    --out {WORK_DIR}/{ancestry}.all_adj.ldprune.COJO

[0;32m[0m*******************************************************************
[0;32m[0m* Genome-wide Complex Trait Analysis (GCTA)
[0;32m[0m* version v1.94.1 Linux
[0;32m[0m* Built at Nov 15 2022 21:14:25, by GCC 8.5
[0;32m[0m* (C) 2010-present, Yang Lab, Westlake University
[0;32m[0m* Please report bugs to Jian Yang <jian.yang@westlake.edu.cn>
[0;32m[0m*******************************************************************
[0;32mAnalysis started [0mat 13:32:01 UTC on Tue Aug 20 2024.
[0;32m[0mHostname: jupyterlabvertexai20240708
[0;32m[0m
Accepted options:
--bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AMR/AMR_TMEM175
--maf 0.01
--cojo-file /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AMR/AMR.all_adj.sumstats.ma
--cojo-p 0.000459
--cojo-slct
--out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AMR/AMR.all_adj.ldprune.COJO


Reading PLINK FAM file from [/home/jupyter/workspace/ws_files/TMEM175/TMEM175_AMR/AMR_TMEM175.fam].
646 individuals to be included f

In [9]:
WORK_DIR = "~/workspace/ws_files/TMEM175/"

In [10]:
# prepare file for COJO
ancestries = {'AAC','AFR','AJ','AMR','CAS','EAS','EUR','FIN','MDE','SAS','CAH'}

for ancestry in ancestries:
    #Read in summary statistics
    sumstats = pd.read_csv(f'{WORK_DIR}/TMEM175_{ancestry}/{ancestry}.all_adj.csv')
    
    #Format summary statistics for GCTA-COJO
    #First get the log odds ratio - this is required for COJO
    #1) For a case-control study, the effect size should be log(odds ratio) with its corresponding standard error.
    sumstats_formatted = sumstats.copy()
    sumstats_formatted['b'] = np.log(sumstats_formatted['OR'])
    
    #Now select just the necessary columns for COJO
    sumstats_export = sumstats_formatted[['ID', 'A1', 'OMITTED', 'A1_FREQ', 'b', 'LOG(OR)_SE', 'P', 'OBS_CT']].copy()
    
    #Rename columns following COJO format
    sumstats_export = sumstats_export.rename(columns = {'ID':'SNP', 'OMITTED':'A2', 'A1_FREQ':'freq', 'LOG(OR)_SE':'se', 'P':'p', 'OBS_CT':'N'})
    
    #Export
    sumstats_export.to_csv(f'{WORK_DIR}/TMEM175_{ancestry}/{ancestry}.all_adj.sumstats.ma', sep = '\t', index=False)

In [11]:
ancestries = {'AAC','AFR','AJ','AMR','CAS','EAS','EUR','FIN','MDE','SAS','CAH'}

for ancestry in ancestries:
    
    WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_{ancestry}'
    
    #Select multiple associated SNPs based on significance p-value 5e-08
    #Can change the p-value for significance if needed
    #bfile is referring to the full dataset in plink binary format, e.g. GP2 - whatever you used to run the GWAS
    ! /home/jupyter/tools/gcta-1.94.1-linux-kernel-3-x86_64/gcta64 \
    --bfile {WORK_DIR}/{ancestry}_TMEM175 \
    --maf 0.01 \
    --cojo-file {WORK_DIR}/{ancestry}.all_adj.sumstats.ma \
    --cojo-top-SNPs 10 \
    --out {WORK_DIR}/{ancestry}.all_adj.top10

[0;32m[0m*******************************************************************
[0;32m[0m* Genome-wide Complex Trait Analysis (GCTA)
[0;32m[0m* version v1.94.1 Linux
[0;32m[0m* Built at Nov 15 2022 21:14:25, by GCC 8.5
[0;32m[0m* (C) 2010-present, Yang Lab, Westlake University
[0;32m[0m* Please report bugs to Jian Yang <jian.yang@westlake.edu.cn>
[0;32m[0m*******************************************************************
[0;32mAnalysis started [0mat 13:04:39 UTC on Tue Aug 20 2024.
[0;32m[0mHostname: jupyterlabvertexai20240708
[0;32m[0m
Accepted options:
--bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AMR/AMR_TMEM175
--maf 0.01
--cojo-file /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AMR/AMR.all_adj.sumstats.ma
--cojo-top-SNPs 10
--out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_AMR/AMR.all_adj.top10


Reading PLINK FAM file from [/home/jupyter/workspace/ws_files/TMEM175/TMEM175_AMR/AMR_TMEM175.fam].
646 individuals to be included from [/home/jupyter

change format of association file

In [13]:
ancestries = {'AAC','AFR','AJ','AMR','CAS','EAS','EUR','FIN','MDE','SAS','CAH'}

for ancestry in ancestries:
    WORK_DIR = "~/workspace/ws_files/TMEM175/"
    print(f'WORKING ON: {ancestry}')
    
    df = pd.read_csv(f'{WORK_DIR}/TMEM175_{ancestry}/{ancestry}.all_adj.csv')
    
    #First just select relevant columns: chromosome, bp position and p-value
    export_ldassoc = df[['#CHROM', 'POS', 'P']].copy()
    
    #Rename the #CHROM column to remove the hashtag as I think this might be confusing LDassoc
    export_ldassoc = export_ldassoc.rename(columns={'#CHROM': 'CHROM'}) 
    
    #Then export as a tab-separated, not comma-separated file

    export_ldassoc.to_csv(f'{WORK_DIR}/TMEM175_{ancestry}/{ancestry}.all_adj.formatted.tab', sep = '\t', index=False)

WORKING ON: AMR
WORKING ON: CAS
WORKING ON: MDE
WORKING ON: AJ
WORKING ON: AFR
WORKING ON: EAS
WORKING ON: FIN
WORKING ON: AAC
WORKING ON: CAH
WORKING ON: SAS
WORKING ON: EUR


validate GLM output file in EAS ancestry again

In [4]:
WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_EAS'

! /home/jupyter/tools/plink2 \
--pfile {REL7_PATH}/imputed_genotypes/EAS/chr4_EAS_release7_vwb \
--chr 4 \
--from-bp 882387 \
--to-bp 1008656 \
--make-bed \
--out {WORK_DIR}/EAS_TMEM175

PLINK v2.00a6LM 64-bit Intel (4 Jul 2024)      www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS_TMEM175.log.
Options in effect:
  --chr 4
  --from-bp 882387
  --make-bed
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS_TMEM175
  --pfile /home/jupyter/workspace/gp2_tier2_eu_release7_30042024/imputed_genotypes/EAS/chr4_EAS_release7_vwb
  --to-bp 1008656

Start time: Thu Aug 29 12:41:04 2024
52223 MiB RAM detected, ~50548 available; reserving 26111 MiB for main
workspace.
Using up to 8 compute threads.
5167 samples (1889 females, 3278 males; 5167 founders) loaded from
/home/jupyter/workspace/gp2_tier2_eu_release7_30042024/imputed_genotypes/EAS/chr4_EAS_release7_vwb.psam.
3235052 variants loaded from
/home/jupyter/workspace/gp2_tier2_eu_release7_30042024/imputed_genotypes/EAS/chr4_EAS_release7_vwb.pvar.
1 binary phenotype loaded (2662 cases, 2461

In [5]:
# ASSOC analysis

WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_EAS'
    
! /home/jupyter/tools/plink \
--bfile {WORK_DIR}/EAS_TMEM175 \
--keep {WORK_DIR}/EAS.samplestoKeep \
--assoc \
--allow-no-sex \
--ci 0.95 \
--maf 0.01 \
--out {WORK_DIR}/EAS_TMEM175.all

PLINK v1.90b6.9 64-bit (4 Mar 2019)            www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS_TMEM175.all.log.
Options in effect:
  --allow-no-sex
  --assoc
  --bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS_TMEM175
  --ci 0.95
  --keep /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS.samplestoKeep
  --maf 0.01
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS_TMEM175.all

52223 MB RAM detected; reserving 26111 MB for main workspace.
3171 variants loaded from .bim file.
5167 people (3278 males, 1889 females) loaded from .fam.
5123 phenotype values loaded from .fam.
--keep: 5167 people remaining.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 5167 founders and 0 nonfounders present.
Calculating allele frequencies... 101112131415161718192021222324252627282930313233343536373839404142

In [6]:
#Look at assoc results
WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_EAS'
freq = pd.read_csv(f'{WORK_DIR}/EAS_TMEM175.all.assoc', sep='\s+')
freq

Unnamed: 0,CHR,SNP,BP,A1,F_A,F_U,A2,CHISQ,P,OR,SE,L95,U95
0,4,chr4:882530:C:T,882530,T,0.29880,0.28300,C,3.0680,0.07984,1.0800,0.04382,0.9909,1.1770
1,4,chr4:882611:T:C,882611,C,0.06649,0.07835,T,5.3640,0.02056,0.8379,0.07645,0.7213,0.9733
2,4,chr4:883018:G:A,883018,A,0.06649,0.07814,G,5.1880,0.02275,0.8403,0.07649,0.7233,0.9762
3,4,chr4:883483:G:A,883483,A,0.07547,0.06904,G,1.5690,0.21040,1.1010,0.07663,0.9472,1.2790
4,4,chr4:883624:C:T,883624,T,0.40600,0.41950,C,1.8970,0.16840,0.9456,0.04061,0.8733,1.0240
...,...,...,...,...,...,...,...,...,...,...,...,...,...
372,4,chr4:1007954:T:C,1007954,T,0.42810,0.42160,C,0.4384,0.50790,1.0270,0.04030,0.9490,1.1110
373,4,chr4:1008009:C:T,1008009,T,0.37580,0.37980,C,0.1604,0.68880,0.9831,0.04266,0.9042,1.0690
374,4,chr4:1008064:G:C,1008064,C,0.02647,0.02219,G,1.9000,0.16810,1.1980,0.13130,0.9263,1.5500
375,4,chr4:1008209:C:T,1008209,T,0.10880,0.11340,C,0.5450,0.46040,0.9539,0.06397,0.8415,1.0810


In [7]:
#Save FREQ to csv
freq.to_csv(f'{WORK_DIR}/EAS.all_nonadj.csv')

In [8]:
# GLM analysis

WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_EAS'

! /home/jupyter/tools/plink2 \
--bfile {WORK_DIR}/EAS_TMEM175 \
--keep {WORK_DIR}/EAS.samplestoKeep \
--allow-no-sex \
--maf 0.01 \
--ci 0.95 \
--glm \
--covar {WORK_DIR}/EAS_covariate_file.txt \
--covar-name SEX,AGE,PC1,PC2,PC3,PC4,PC5 \
--covar-variance-standardize \
--neg9-pheno-really-missing \
--out {WORK_DIR}/EAS_TMEM175.all_adj

PLINK v2.00a6LM 64-bit Intel (4 Jul 2024)      www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS_TMEM175.all_adj.log.
Options in effect:
  --allow-no-sex
  --bfile /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS_TMEM175
  --ci 0.95
  --covar /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS_covariate_file.txt
  --covar-name SEX,AGE,PC1,PC2,PC3,PC4,PC5
  --covar-variance-standardize
  --glm
  --keep /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS.samplestoKeep
  --maf 0.01
  --neg9-pheno-really-missing
  --out /home/jupyter/workspace/ws_files/TMEM175/TMEM175_EAS/EAS_TMEM175.all_adj

Start time: Thu Aug 29 12:42:49 2024
Note: --allow-no-sex no longer has any effect.  (Missing-sex samples are
automatically excluded from association analysis when sex is a covariate, and
treated normally otherwise.)
52223 MiB RAM detected, ~50486 available

In [9]:
# read output file of glm

## Read in plink glm results
WORK_DIR = f'~/workspace/ws_files/TMEM175/TMEM175_EAS'
assoc = pd.read_csv(f'{WORK_DIR}/EAS_TMEM175.all_adj.PHENO1.glm.logistic.hybrid', delim_whitespace=True)
assoc



Unnamed: 0,#CHROM,POS,ID,REF,ALT,PROVISIONAL_REF?,A1,OMITTED,A1_FREQ,FIRTH?,TEST,OBS_CT,OR,LOG(OR)_SE,L95,U95,Z_STAT,P,ERRCODE
0,4,882530,chr4:882530:C:T,C,T,Y,T,C,0.278626,N,ADD,2489,1.079170,0.071422,0.938201,1.241320,1.066800,2.860620e-01,.
1,4,882530,chr4:882530:C:T,C,T,Y,T,C,0.278626,N,SEX,2489,0.804336,0.044091,0.737746,0.876936,-4.938340,7.878890e-07,.
2,4,882530,chr4:882530:C:T,C,T,Y,T,C,0.278626,N,AGE,2489,1.510690,0.047695,1.375870,1.658720,8.650160,5.142550e-18,.
3,4,882530,chr4:882530:C:T,C,T,Y,T,C,0.278626,N,PC1,2489,0.978250,0.044623,0.896327,1.067660,-0.492802,6.221530e-01,.
4,4,882530,chr4:882530:C:T,C,T,Y,T,C,0.278626,N,PC2,2489,0.997227,0.116869,0.793077,1.253930,-0.023759,9.810450e-01,.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3011,4,1008337,chr4:1008337:C:T,C,T,Y,C,T,0.419561,N,PC1,2505,0.974019,0.044508,0.892651,1.062800,-0.591460,5.542120e-01,.
3012,4,1008337,chr4:1008337:C:T,C,T,Y,C,T,0.419561,N,PC2,2505,0.980706,0.117453,0.779045,1.234570,-0.165874,8.682560e-01,.
3013,4,1008337,chr4:1008337:C:T,C,T,Y,C,T,0.419561,N,PC3,2505,0.679168,0.102293,0.555782,0.829945,-3.782130,1.554910e-04,.
3014,4,1008337,chr4:1008337:C:T,C,T,Y,C,T,0.419561,N,PC4,2505,0.562607,0.066139,0.494206,0.640475,-8.696510,3.422460e-18,.


In [10]:
##Filter for additive test only - this is the variant results
assoc_add = assoc[assoc['TEST']=="ADD"]

In [11]:
##Save assoc_add to csv
assoc_add.to_csv(f'{WORK_DIR}/EAS.all_adj.csv')