# LRRK2 p.A419V - Haplotype analysis 

- Project: Multiancestry LRRK2 p.A419V analysis
- Version: Python/3.10.12
- Created: 05-MAY-2025
- Last Update: 12-JUNE-2025

# Description

**1. Haplotype block size comparison**

**2. Haplotype association study**

**3. R2 calculation**

# Getting started

## Load python libraries

In [None]:
# Import necessary packages
import os
import pandas as pd
import numpy as np
from io import StringIO
from firecloud import api as fapi
from IPython.core.display import display, HTML
import urllib.parse
from google.cloud import bigquery
import sys as sys

# Define function
# Utility routine for printing a shell command before executing it
def shell_do(command):
    print(f'Executing: {command}', file=sys.stderr)
    !$command
    
def shell_return(command):
    print(f'Executing: {command}', file=sys.stderr)
    output = !$command
    return '\n'.join(output)

## Install R and its packages

In [None]:
%%bash
mkdir -p /home/jupyter/A419V_release9/R_packages

In [None]:
# Install and import rpy2
!pip install rpy2
import rpy2.rinterface

In [None]:
%load_ext rpy2.ipython

In [None]:
%%R

BILLING_PROJECT_ID  <- Sys.getenv('GOOGLE_PROJECT')
WORKSPACE_NAMESPACE <- Sys.getenv('WORKSPACE_NAMESPACE')
WORKSPACE_NAME      <- Sys.getenv('WORKSPACE_NAME')

In [None]:
%%R
pack <- "/home/jupyter/A419V_release9/R_packages"

install.packages("dplyr", lib = pack)
install.packages("data.table", lib = pack)
install.packages("arsenal", lib = pack)
install.packages("haplo.stats", lib = pack)

In [None]:
%%R
pack <- "/home/jupyter/A419V_release9/R_packages"

suppressPackageStartupMessages(library(dplyr, lib.loc = pack))
suppressPackageStartupMessages(library(data.table, lib.loc = pack))
suppressPackageStartupMessages(library(arsenal, lib.loc = pack))
suppressPackageStartupMessages(library(haplo.stats, lib.loc = pack))

# File preparation

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AAC' 'AFR' 'AJ' 'AMR' 'CAH' 'CAS' 'EAS' 'EUR' 'FIN' 'MDE' 'SAS')

for label in "${ancestry_labels[@]}"
do

    # Extracting LRRK2
    /home/jupyter/plink1.9 \
    --bfile ${label}/${label}_release9_remove_related_updated \
    --chr 12 \
    --from-bp 40196744 \
    --to-bp 40369285 \
    --missing \
    --make-bed \
    --out ${label}/${label}_release9_remove_related_lrrk2
    
done

In [None]:
ancestry_labels=['AAC', 'AFR', 'AJ', 'AMR', 'CAH', 'CAS', 'EAS', 'EUR', 'FIN', 'MDE', 'SAS']

for anc in ancestry_labels:
    
    bim = pd.read_csv(f"/home/jupyter/A419V_release9/{anc}/{anc}_release9_remove_related_lrrk2.bim", 
                      sep = "\t", names = ["CHR", "RSID", "POS", "BP", "A1", "A2"])
    bim["CHR"] = bim["CHR"].astype(str)
    bim["BP"] = bim["BP"].astype(str)
    bim["RSID"] = bim["CHR"].str.cat(bim["BP"], sep = "_")
    bim["RSID"] = bim["RSID"].str.cat(bim["A2"], sep = "_")
    bim["RSID"] = bim["RSID"].str.cat(bim["A1"], sep = "_")
    
    bim.to_csv(f"/home/jupyter/A419V_release9/{anc}/{anc}_release9_remove_related_lrrk2.bim", 
               sep = "\t", index = False, header = False)

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

# Remove duplicate
ancestry_labels=('AAC' 'AFR' 'AJ' 'AMR' 'CAH' 'CAS' 'EAS' 'EUR' 'FIN' 'MDE' 'SAS')

for label in "${ancestry_labels[@]}"
do

    /home/jupyter/plink2 \
    --bfile ${label}/${label}_release9_remove_related_lrrk2 \
    --rm-dup force-first \
    --make-bed \
    --out ${label}/${label}_release9_remove_related_lrrk2_nodup
    
done

# Haplotype block size comparison

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do

    /home/jupyter/plink1.9 \
    --bfile ${label}/${label}_release9_remove_related_lrrk2_nodup \
    --blocks \
    --blocks-min-maf 0.0001 \
    --out ${label}/${label}_release9_remove_related_lrrk2.blocks

done

In [None]:
%%R

# Create a table contain the location of A419V
a419v_loci <- data.table(
    SNP = "A419V" ,
    CHR = 12 ,   
    BP  = 40252984)

In [None]:
%%R

# Initialize an empty table
ALLhap <- data.table()

ALLhap$ancestry <- NA
ALLhap$KB       <- NA
ALLhap$NSNPS    <- NA
ALLhap$SNPS     <- NA

# Extract info from the loci table into variables
thisSnp <- a419v_loci$SNP[1]
thisChr <- a419v_loci$CHR[1]
thisBp  <- a419v_loci$BP[1]

ancestry_labels <- c('AJ', 'CAH', 'CAS', 'EAS', 'EUR')

for (anc in ancestry_labels){
    
    hap     <- fread(paste0("/home/jupyter/A419V_release9/", anc, "/", anc, "_release9_remove_related_lrrk2.blocks.blocks.det"), header =T)
    hap_sub <- subset(hap, CHR == thisChr & BP1 <= thisBp & BP2 >= thisBp)
    
    if(length(hap_sub$KB) > 0) {
    
        tmp <- data.table(
            
            ancestry = anc,
            KB       = hap_sub$KB,
            NSNPS    = hap_sub$NSNPS,
            SNPS     = hap_sub$SNPS
        )
        
        ALLhap <- rbind(ALLhap, tmp)
    
    } 
}

ALLhap

# Haplotype Association study

## Haplotype frequency comparison

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do

    /home/jupyter/plink1.9 \
    --bfile ${label}/${label}_release9_remove_related_lrrk2_nodup \
    --recode \
    --out ${label}/${label}_release9_remove_related_lrrk2_nodup

done

In [None]:
%%R

ancestry_labels <- c('AJ', 'CAH', 'CAS', 'EAS', 'EUR')

for (anc in ancestry_labels){
    
    # Read PED file
    PED <- fread(paste0("/home/jupyter/A419V_release9/", anc, "/", anc, "_release9_remove_related_lrrk2_nodup.ped"))

    # Get Allele name
    FILTERED.LRRK2_1 <- read.table(paste0("/home/jupyter/A419V_release9/", anc, "/", anc, "_release9_remove_related_lrrk2_nodup.map"), header=FALSE)
    FILTERED.LRRK2_2 <- read.table(paste0("/home/jupyter/A419V_release9/", anc, "/", anc, "_release9_remove_related_lrrk2_nodup.map"), header=FALSE)
    FILTERED.LRRK2_1$V2 <- paste0(FILTERED.LRRK2_1$V2, sep="_", "1")
    FILTERED.LRRK2_2$V2 <- paste0(FILTERED.LRRK2_2$V2, sep="_", "2")

    # Set column names
    colnames(FILTERED.LRRK2_1) <- c("CHR", "SNP", "CM", "POS")
    colnames(FILTERED.LRRK2_2) <- c("CHR", "SNP", "CM", "POS")
    FILTERED.LRRK2_2alleles    <- rbind(FILTERED.LRRK2_1, FILTERED.LRRK2_2)

    # Sort file and create geno matrix:
    # The variants should appear in order of genomic postition in the ped file, therefore, we sort the alleles based on position
    LRRK2_pos     <- FILTERED.LRRK2_2alleles[order(FILTERED.LRRK2_2alleles$POS),]
    LRRK2_alleles <- LRRK2_pos[,c("SNP")]
    write.table(LRRK2_alleles, file = paste0("/home/jupyter/A419V_release9/", anc, "/", anc, "_LRRK2_alleles.txt"), quote = F, sep = "\t", row.names = F, col.names = F)

    # Add allele names to the PED file
    colnames(PED) <- c("FID", "IID", "PAT","MAT", "SEX", "PHENO", LRRK2_alleles)
    write.table(as.data.frame(PED), file = paste0("/home/jupyter/A419V_release9/", anc, "/", anc, "_geno_matrix_LRRK2.txt"), quote = F, row.names = FALSE, sep = '\t')

}

### EAS

In [None]:
%%bash
WORK_DIR="/home/jupyter/A419V_release9/"
cd $WORK_DIR

label="EAS"

grep 40252984 ${label}/${label}_release9_remove_related_lrrk2.blocks.blocks | sed 's/ /|/g' | sed -r 's/^.{2}//'

In [None]:
%%R

anc <- "EAS"

Geno_matrix_LRRK2 <- read.delim(paste0("/home/jupyter/A419V_release9/", anc, "/", anc, "_geno_matrix_LRRK2.txt"), check.names = FALSE)

#Colnames:
LRRK2_alleles <- read.table(paste0("/home/jupyter/A419V_release9/", anc, "/", anc, "_LRRK2_alleles.txt"), quote="\"", comment.char="")
H1_a          <- LRRK2_alleles[grepl("12_40238792_G_A|12_40239262_G_A|12_40239512_A_G|12_40240543_G_A|12_40249666_G_A|12_40251369_G_A|12_40251495_G_A|12_40251828_G_A|12_40252984_G_A", LRRK2_alleles$V1), ]
H1_LRRK2      <- Geno_matrix_LRRK2[,c("FID", "IID", "PAT", "MAT", "SEX", "PHENO", H1_a)]

In [None]:
%%R
ncol(H1_LRRK2)

In [None]:
%%R
ncol(H1_LRRK2)
anc <- "EAS"

#Extract only genotype data:
geno          <- data.frame(H1_LRRK2[,c(7:24)], check.names = FALSE)

#Set variables for running the association analyses in haplo.stats
#Label the SNPs: "12_40252984_G_A"

# Get the column name of the haplotype
label <- colnames(H1_LRRK2)[7:24] %>% gsub("_.{1}$", "", .) %>% unique()

#Set binary pheno (0=control, 1=patient):
H1_LRRK2$PHENO_01 <- H1_LRRK2$PHENO-1
y.bin <- 1*(H1_LRRK2$PHENO_01=="1")

In [None]:
%%R

#Non-adjusted:
H1 <- haplo.cc(y=y.bin, geno=geno, locus.label= label, control = haplo.glm.control(haplo.freq.min = 0.01))
print(H1, nlines=10, digits=2)

#Sort the output on p-value:
H1_cc.df <- H1$cc.df
H1_cc.df_sort <- H1_cc.df[order(H1_cc.df$`p-val`),]

In [None]:
%%R

library(knitr)

H1_cc.df_sort %>% kable()

#### Visualise the block

In [None]:
with open("/home/jupyter/A419V_release9/EAS/EAS_hap_extract.txt", "w") as f:
    f.write("12_40238792_G_A\n12_40239262_G_A\n12_40239512_A_G\n12_40240543_G_A\n12_40249666_G_A\n12_40251369_G_A\n12_40251495_G_A\n12_40251828_G_A\n12_40252984_G_A")

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

label="EAS"

/home/jupyter/plink1.9 \
--bfile ${label}/${label}_release9_remove_related_lrrk2_nodup \
--extract ${label}/${label}_hap_extract.txt \
--snps-only just-acgt \
--recode HV \
--out ${label}/${label}_release9_remove_related_lrrk2_nodup

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR


ls EAS/EAS_release9_remove_related_lrrk2_nodup.chr*

In [None]:
WORK_DIR="/home/jupyter/A419V_release9"
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp {WORK_DIR}/EAS/EAS_release9_remove_related_lrrk2_nodup.chr-12.* {WORKSPACE_BUCKET}/A419V_analysis/release9/haplotype/EAS/')

### EUR

In [None]:
%%bash
WORK_DIR="/home/jupyter/A419V_release9/"
cd $WORK_DIR

label="EUR"

cat ${label}/${label}_release9_remove_related_lrrk2.blocks.blocks

In [None]:
%%bash
WORK_DIR="/home/jupyter/A419V_release9/"
cd $WORK_DIR

label="EUR"

grep 40252984 ${label}/${label}_release9_remove_related_lrrk2.blocks.blocks | sed 's/ /|/g' | sed -r 's/^.{2}//'

No block contain p.A419V, checking for missingness

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('EUR')

for label in "${ancestry_labels[@]}"
do

    /home/jupyter/plink1.9 \
    --bfile ${label}/${label}_release9_remove_related_lrrk2_nodup \
    --missing \
    --out ${label}/${label}_release9_remove_related_lrrk2.blocks_test

done

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

head -1 EUR/EUR_release9_remove_related_lrrk2.blocks_test.lmiss
grep 40252984 EUR/EUR_release9_remove_related_lrrk2.blocks_test.lmiss

### AJ

In [None]:
%%bash
WORK_DIR="/home/jupyter/A419V_release9/"
cd $WORK_DIR

label="AJ"

grep 40252984 ${label}/${label}_release9_remove_related_lrrk2.blocks.blocks | sed 's/ /|/g' | sed -r 's/^.{2}//'

In [None]:
%%bash
WORK_DIR="/home/jupyter/A419V_release9/"
cd $WORK_DIR

label="AJ"

cat ${label}/${label}_release9_remove_related_lrrk2.blocks.blocks

No block contain p.A419V

### CAH

In [None]:
%%bash
WORK_DIR="/home/jupyter/A419V_release9/"
cd $WORK_DIR

label="CAH"

grep 40252984 ${label}/${label}_release9_remove_related_lrrk2.blocks.blocks | sed 's/ /|/g' | sed -r 's/^.{2}//'

In [None]:
%%bash
WORK_DIR="/home/jupyter/A419V_release9/"
cd $WORK_DIR

label="CAH"

cat ${label}/${label}_release9_remove_related_lrrk2.blocks.blocks

### CAS

In [None]:
%%bash
WORK_DIR="/home/jupyter/A419V_release9/"
cd $WORK_DIR

label="CAS"

grep 40252984 ${label}/${label}_release9_remove_related_lrrk2.blocks.blocks | sed 's/ /|/g' | sed -r 's/^.{2}//'

# Calculate R2

## Imputed coding variants and GWAS nominated variants

1. Within LRRK2
- p.R1628P (40320043)
- rs76904798 (40220632) (Nalls 2019)
- rs17443414 (40250950), rs7132187 (40351006) (Multi-ancestry)

2. Within chr12, LRRK2 loci
- rs1994090 (40034759) (Japanese GWAS)
- rs17443099 (40179612), rs28370650 (40006146) (Hampton)
- rs190807041 (40379882) (multi-anc)
- rs1384236(40064582), rs7315459 (40071984) (Foo et al 2017)

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do

    grep "40320043\|40220632\|40250950\|40351006" ${label}/chr12_${label}_release9.pvar | awk '{print $3}' > ${label}/${label}_within_lrrk2_snps.txt
    grep "40034759\|40179612\|40006146\|40379882\|40064582\|40071984" ${label}/chr12_${label}_release9.pvar | awk '{print $3}' > ${label}/${label}_within_chr12_snps.txt
    
done

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do

    cat ${label}/${label}_within_lrrk2_snps.txt ${label}/${label}_within_chr12_snps.txt  > ${label}/${label}_all_snps.txt
    
    # Add A419V to the list as well
    echo "chr12:40252984:C:T" >> ${label}/${label}_all_snps.txt

done

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do

    /home/jupyter/plink2 \
    --pfile ${label}/chr12_${label}_release9 \
    --extract ${label}/${label}_all_snps.txt \
    --remove ${label}/${label}_related_ids.samples \
    --make-bed \
    --out ${label}/chr12_${label}_release9_remove_related_extracted
    
done

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do
    
    wc -l ${label}/chr12_${label}_release9_remove_related_extracted.bim
    
done

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do

    /home/jupyter/plink1.9 \
    --bfile ${label}/chr12_${label}_release9_remove_related_extracted \
    --ld-snp "chr12:40252984:C:T" \
    --r2 \
    --ld-window 99999 \
    --ld-window-kb 10000000 \
    --ld-window-r2 0 \
    --out ${label}/chr12_${label}_release9_remove_related_extracted_r2
    
done

In [None]:
WORK_DIR="/home/jupyter/A419V_release9"

df = pd.DataFrame({"SNP_B":""}, index = [0])

labels=['AJ', 'CAH', 'CAS', 'EAS', 'EUR']

for label in labels:
    
    ld = pd.read_csv(f"{WORK_DIR}/{label}/chr12_{label}_release9_remove_related_extracted_r2.ld", delim_whitespace = True)
    ld_red = ld[["SNP_B", "R2"]]
    ld_red.rename(columns = {"R2":f"R2_{label}"}, inplace = True)

    df = pd.merge(df, ld_red, on = "SNP_B", how = "outer")

df

## Other coding variants

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do

    wc -l ${label}/${label}_release9_remove_related_lrrk2_nodup_exon.bim
    
done

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do

    /home/jupyter/plink1.9 \
    --bfile ${label}/${label}_release9_remove_related_lrrk2_nodup_exon \
    --ld-snp "12_40252984_G_A" \
    --r2 \
    --ld-window 99999 \
    --ld-window-kb 10000000 \
    --ld-window-r2 0 \
    --out ${label}/${label}_a419v_r2
    
done

In [None]:
%%bash
WORK_DIR=/home/jupyter/A419V_release9
cd $WORK_DIR

ancestry_labels=('AJ' 'CAH' 'CAS' 'EAS' 'EUR')

for label in "${ancestry_labels[@]}"
do

    wc -l ${label}/${label}_a419v_r2.ld
    
done

In [None]:
pd.set_option("Display.max_rows", None)

In [None]:
df = pd.DataFrame({"SNP_B":""}, index = [0])

labels=['AJ', 'CAH', 'CAS', 'EAS', 'EUR']

for label in labels:
    
    ld = pd.read_csv(f"/home/jupyter/A419V_release9/{label}/{label}_a419v_r2.ld", delim_whitespace = True)
    ld_red = ld[["SNP_B", "R2"]]
    ld_red.rename(columns = {"R2":f"R2_{label}"}, inplace = True)

    df = pd.merge(df, ld_red, on = "SNP_B", how = "outer")

df