# PRS Prep for UKB

In [None]:
# Imports here.
import numpy as np
import pandas as pd
import os

import warnings
warnings.filterwarnings("ignore")

In [None]:
#Directory in Biowulf that contains the UKB genetic data
#Make sure you have PLINK
os.chdir('/path/to/UKB/data/bfile')

## Activity scores -- overall activity and sedentary traits included
Table from:
Doherty, A., Smith-Byrne, K., Ferreira, T., Holmes, M.V., Holmes, C., Pulit, S.L., and Lindgren, C.M. (2018). GWAS identifies 14 loci for device-measured physical activity and sleep duration. Nat. Commun. 9, 5257

In [None]:
#Load table from study - include only "overall activity" and "sedendary"
df = pd.read_csv('/path/to/activity.csv')
df = df[df['Trait']!= 'Sleep duration']
df.head()

In [None]:
#Create an "activity" file that includes the info needed ("SNP", 'Allele (effect)', 'Beta')
activity_file = pd.read_csv('/path/to/activity_file.txt', sep = ' ')
activity_file.head()

In [None]:
#Create a list of the chromosomes
chrom_list = list(df['Ch'])
chrom_list = set(chrom_list)
chrom_list

In [None]:
#Create list of snps; save as activity_snp_list.txt
!cat /path/to/activity_snp_list.txt

In [None]:
#Extract SNPs for activity
activity_chroms = ['3', '5', '7', '10', '17', '18']
for i in activity_chroms:
    !plink --bfile /path/to/UKBIOBANK/FILTER_IMPUTED_DATA/bfile/chr{i}.UKBB.EU.filtered --extract /data/levineks/Virus/activity_snp_list.txt --make-bed --out activity_chr{i}

In [None]:
#Create list of files for merge; save as activity_merge.txt
activity_chroms = ['3', '5', '7', '10', '17', '18']
for i in activity_chroms:
    print(f'/path/to/UKBIOBANK/FILTER_IMPUTED_DATA/bfile/activity_chr{i}')

In [None]:
#Merge the individual chromosome files
!plink --merge-list /data/levineks/Virus/activity_merge.txt --make-bed --out activity_merged

In [None]:
#Get the --score for the merged files
!plink --bfile /path/to/UKBIOBANK/FILTER_IMPUTED_DATA/bfile/activity_merged --score /path/to/activity_file.txt header --out /path/to/activity_prs_test

In [None]:
#Take a look at the results
activity = pd.read_csv('/path/to/activity_prs_test.profile', sep = '\s+')
activity.head()

## Diet scores -- protein included
Table from: Meddens, S.F.W., de Vlaming, R., Bowers, P., Burik, C.A.P., Linnér, R.K., Lee, C., Okbay, A., Turley, P., Rietveld, C.A., Fontana, M.A., et al. (2021). Genomic analysis of diet composition finds novel loci and associations with health and lifestyle. Mol. Psychiatry 26, 2056–2069

In [None]:
#Load table from study - include only "protein"
df = pd.read_csv('/path/to/diet.csv')
df = df[df['Top hit in locus for']=='Protein']
df.head()

In [None]:
#Pick the correct columns and save to file - 'SNPID', 'Effect allele', 'Beta'
#Remember to make effect alleles UPPER CASE
diet_file = pd.read_csv('/path/to/diet_file.txt', sep = ' ')
diet_file

In [None]:
#Create a list of the chromosomes
chrom_list = list(df['CHR'])
chrom_list = set(chrom_list)
chrom_list

In [None]:
#Create a list of SNPs; save to diet_snp_list.txt
!cat /path/to/Virus/diet_snp_list.txt

In [None]:
#Extract SNPs for diet
diet_chroms = ['2', '3', '4', '8', '16', '19']
for i in diet_chroms:
    !plink --bfile /path/to/UKBIOBANK/FILTER_IMPUTED_DATA/bfile/chr{i}.UKBB.EU.filtered --extract /path/to/diet_snp_list.txt --make-bed --out diet_chr{i}

In [None]:
##Error: No variants remaining after --extract for chr19 -- Remove #19
#Create list of files for merge; save as diet_merge.txt
diet_chroms = ['2', '3', '4', '8', '16']
for i in diet_chroms:
    print(f'/path/to/UKBIOBANK/FILTER_IMPUTED_DATA/bfile/diet_chr{i}')

In [None]:
#Merge the individual chromosome files
!plink --merge-list /path/to/diet_merge.txt --make-bed --out diet_merged

In [None]:
#Get the --score for the merged files
!plink --bfile /path/to/UKBIOBANK/FILTER_IMPUTED_DATA/bfile/diet_merged --score /path/to/diet_file.txt header --out /path/to/diet_prs_test

In [None]:
#Take a look at the results
diet = pd.read_csv('/path/to/diet_prs_test.profile', sep = '\s+')
diet.head()