# ADSP

* **Project:** ADRD-SORL1-Biobanks
* **Version:** Python/3.10
* **Last Updated:** 20-Aug-2025

## Notebook Overview
Haplotype analysis

# Query ADSP to perform haplotype analysis

## Variables used 

- `${ANCESTRY}` = EUR, AFR, AMR, AAC, AJ, MDE, SAS, CAS, EAS, FIN, CAH
- `${COHORT}` = Cases, Controls

### Select SORL1 region ±100 kb boundaries

In [None]:
import pandas as pd

In [None]:
%%bash
module load plink
plink2 --pfile /${WORK_DIR}/chr11.compact_filtered.r4.wgs.biallelic --chr 11 --from-bp 121352314 --to-bp 121733763 --make-bed --out Haploview_New_Biallelic/Hap_SORL1_boundris 

### Keep biallelic variants and separate the data by ancestry

In [8]:
%%bash
awk 'length($5)==1 && length($6)==1 && $5 ~ /^[ACGT]$/ && $6 ~ /^[ACGT]$/' \
  Haploview_New_Biallelic/Hap_SORL1_boundris.bim | cut -f2 > Haploview_New_Biallelic/biallelic_snps.txt


In [None]:
%%bash
module load plink
plink2 --bfile Haploview_New_Biallelic/Hap_SORL1_boundris \
      --extract Haploview_New_Biallelic/biallelic_snps.txt \
      --make-bed \
      --out Haploview_New_Biallelic/Hap_SORL1_biallelic_only


In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

input_prefix="Haploview_New_Biallelic/Hap_SORL1_biallelic_only"
output_dir="Haploview_New_Biallelic"

for anc in "${ancestries[@]}"; do
    keep_file="adsp_${anc}_keep.txt"  
    output_prefix="${output_dir}/Hap_SORL1_biallelic_adsp_${anc}"

    echo "Processing ancestry: $anc"
    
    plink --bfile "$input_prefix" \
          --keep "$keep_file" \
          --make-bed \
          --out "$output_prefix"
done

### Remove related individuals and perform quality control 

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

remove_base="/${WORK_DIR}"

for anc in "${ancestries[@]}"; do
    input_prefix="Haploview_New_Biallelic/Hap_SORL1_biallelic_adsp_${anc}"
    remove_file="${remove_base}/REMOVE.FILTERED.merged_biallelic_$(echo $anc | tr '[:lower:]' '[:upper:]').related"
    output_prefix="Haploview_New_Biallelic/Hap_SORL1_biallelic_adsp_${anc}_unrelated"

    echo "Processing ${anc}..."

    if [[ -f "$remove_file" ]]; then
        plink --bfile "$input_prefix" \
              --remove "$remove_file" \
              --make-bed \
              --out "$output_prefix"
    else
        echo "Warning: Remove file not found for ${anc}: ${remove_file}"
    fi
done

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_file="Haploview_New_Biallelic/Hap_SORL1_biallelic_adsp_${anc}_unrelated"
    output_file="Haploview_New_Biallelic/Hap_SORL1_biallelic_adsp_${anc}_unrelated_filtered"

    echo "Processing ${anc} with MAF and geno filters..."

    plink --bfile "$input_file" \
          --maf 0.05 \
          --geno 0.05 \
          --make-bed \
          --out "$output_file"
done

### Convert the data to VCF format to prepare for phasing

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_prefix="Haploview_New_Biallelic/Hap_SORL1_biallelic_adsp_${anc}_unrelated_filtered"
    output_prefix="Haploview_New_Biallelic/${anc}_biallelic_SORL1"

    echo "Converting ${anc} PLINK to VCF (with IID sample IDs)..."

    plink --bfile "$input_prefix" \
          --recode vcf-iid \
          --out "$output_prefix"
done

### Phasing data

In [None]:
! wget https://faculty.washington.edu/browning/beagle/beagle.28Jun21.220.jar -O beagle.jar

In [None]:
! module load java

In [None]:
%%bash
java -Xmx8g -jar beagle.jar \
  gt=Haploview_New_Biallelic/${ANCESTRY}_biallelic_SORL1.vcf \
  out=Haploview_New_Biallelic/${ANCESTRY}_biallelic_SORL1_phased \
  nthreads=8

### Covert data to PLINK format

In [None]:
%%bash
module load plink/1.9  

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_vcf="Haploview_New_Biallelic/${anc}_biallelic_SORL1_phased.vcf.gz"
    output_prefix="Haploview_New_Biallelic/${anc}_biallelic_SORL1_phased"

    echo "Converting phased VCF of ${anc} to PLINK..."

    plink --vcf "$input_vcf" \
          --double-id \
          --make-bed \
          --out "$output_prefix"
done

### Add phenotype data

In [None]:
%%bash

echo -e "FID\tIID\tPHENO" > haplo_phenotypes.txt

awk 'NR>1 {print $1, $2, 2}' qc_case_plink.txt >> haplo_phenotypes.txt

awk 'NR>1 {print $1, $2, 1}' qc_control_plink.txt >> haplo_phenotypes.txt

In [None]:
%%bash
grep -v "FID" haplo_phenotypes.txt | awk '$3 == 1' | wc -l
grep -v "FID" haplo_phenotypes.txt | awk '$3 == 2' | wc -l

In [None]:
%%bash
module load plink/1.9  

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_prefix="Haploview_New_Biallelic/${anc}_biallelic_SORL1_phased"
    output_prefix="Haploview_New_Biallelic/${anc}_biallelic_SORL1_phased_with_pheno"

    echo "Adding phenotype to ${anc}..."

    plink --bfile "$input_prefix" \
          --pheno <(tail -n +2 haplo_phenotypes.txt) \
          --allow-no-sex \
          --make-bed \
          --out "$output_prefix"
done


### Create necessary files (.ped and .info) for Haploview input

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_prefix="Haploview_New_Biallelic/${anc}_biallelic_SORL1_phased_with_pheno"
    output_prefix="Haploview_New_Biallelic/${anc}_biallelic_SORL1_haploview"

    echo "Generating Haploview input files for ${anc}..."

    plink --bfile "$input_prefix" \
          --recodeHV \
          --allow-no-sex \
          --out "$output_prefix"
done

### Run the analysis in a GUI environment in Haploview

In [None]:
! wget https://www.broadinstitute.org/ftp/pub/mpg/haploview/Haploview.jar

### Clean the results

In [19]:
import pandas as pd
import os

ancestries = ['eur', 'afr', 'amr', 'eas', 'sas', 'aac', 'aj', 'cah', 'cas', 'mde', 'fin']
output_dir = "Haploview_New_Biallelic"
os.makedirs(output_dir, exist_ok=True)

for anc in ancestries:
    file_path = f"Haploview_New_Biallelic/{anc}_results1"
    
    if not os.path.exists(file_path):
        continue
    
    data = []
    current_block = None  
    
    with open(file_path, 'r') as f:
        header_line = f.readline().strip()
        header_cols = header_line.split('\t')
        if header_cols[0].lower() == "block":
            header_cols = header_cols[1:]  
        
        for line in f:
            line = line.strip()
            if line.startswith("Block "):  
                current_block = line  
                continue
            fields = line.split('\t')
            if len(fields) == len(header_cols):
                
                data.append([current_block] + fields)
    
    
    df_columns = ['Block'] + [col.strip().replace(' ', '_').replace('.', '').replace(',', '') for col in header_cols]
    df = pd.DataFrame(data, columns=df_columns)
    
    
    numeric_cols = ['Freq', 'Chi_Square', 'P_Value']
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    
    if 'CaseControl_Frequencies' in df.columns:
        df[['CaseFreq', 'ControlFreq']] = df['CaseControl_Frequencies'].str.strip().str.split(',', expand=True).astype(float)
    
    
    cols_to_drop = ['Case_Control_Ratio_Counts', 'CaseControl_Frequencies']
    for col in cols_to_drop:
        if col in df.columns:
            df.drop(columns=[col], inplace=True)
    
    
    cols_to_keep = ['Block', 'Haplotype', 'Freq', 'Chi_Square', 'P_Value', 'CaseFreq', 'ControlFreq']
    df = df[cols_to_keep]
    
    
    output_path = os.path.join(output_dir, f"{anc}_cleaned.tsv")
    df.to_csv(output_path, sep='\t', index=False)


### Seperate significant results

In [1]:
import pandas as pd
import os

ancestries = ['eur', 'afr', 'amr', 'eas', 'sas', 'aac', 'aj', 'cah', 'cas', 'mde', 'fin']
input_dir = "Haploview_New_Biallelic"

for anc in ancestries:
    input_file = os.path.join(input_dir, f"{anc}_cleaned.tsv")
    
    if not os.path.exists(input_file):
        print(f"File not found: {input_file}")
        continue
    
   
    df = pd.read_csv(input_file, sep='\t')
    
    
    df['Direction'] = df.apply(
        lambda row: '↑ in cases' if row['CaseFreq'] > row['ControlFreq']
        else '↑ in controls' if row['CaseFreq'] < row['ControlFreq']
        else 'no difference',
        axis=1
    )
    
    
    sig_df = df[df['P_Value'] < 0.05].copy()
    sig_df.reset_index(drop=True, inplace=True)
    
    
    output_file = os.path.join(input_dir, f"{anc}_significant.tsv")
    sig_df.to_csv(output_file, sep='\t', index=False)


### Visualize data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
from matplotlib.backends.backend_pdf import PdfPages


warnings.filterwarnings("ignore", category=FutureWarning)


ancestries = ['eur', 'afr', 'amr', 'aac', 'aj', 'cas', 'eas', 'sas', 'mde', 'cah']
input_dir = "Haploview_New_Biallelic"
os.makedirs(input_dir, exist_ok=True)


pdf_output = os.path.join(input_dir, "all_log2fc_heatmaps.pdf")

with PdfPages(pdf_output) as pdf:
    for anc in ancestries:
        input_file = os.path.join(input_dir, f"{anc}_significant.tsv")
        
        if not os.path.exists(input_file):
            print(f"File not found: {input_file}")
            continue

        df = pd.read_csv(input_file, sep='\t')
        
        if df.empty:
            print(f"No significant results for {anc.upper()}")
            continue

        
        df['FoldChange'] = df['CaseFreq'] / df['ControlFreq']
        with np.errstate(divide='ignore', invalid='ignore'):
            df['log2FC'] = np.log2(df['FoldChange'])

        df = df.sort_values(by='FoldChange', ascending=False).reset_index(drop=True)

        
        output_table = os.path.join(input_dir, f"{anc}_significant_with_foldchange.tsv")
        df.to_csv(output_table, sep='\t', index=False)
        print(f"Saved TSV: {output_table}")

        
        heatmap_df = df.set_index('Haplotype')[['log2FC']].copy()
        finite_vals = heatmap_df.replace([np.inf, -np.inf], np.nan)

        
        if finite_vals.dropna().empty and not any(np.isinf(heatmap_df['log2FC'])):
            print(f"Skipped heatmap for {anc.upper()} — nothing to plot")
            continue

        mask = heatmap_df['log2FC'].isin([np.inf, -np.inf]).values.reshape(-1, 1)

        
        max_hap_len = max(len(h) for h in heatmap_df.index)
        fig_width = 10 if max_hap_len < 40 else 12 if max_hap_len < 80 else 14
        fig_height = max(3.5, len(heatmap_df) * 0.45)

        fig, ax = plt.subplots(figsize=(fig_width, fig_height))

        sns.heatmap(
            finite_vals,
            annot=True,
            fmt=".2f",
            cmap='coolwarm',
            center=0,
            cbar_kws={'label': 'log2(CaseFreq / ControlFreq)', 'shrink': 0.6},
            mask=mask,
            annot_kws={"color": "black"},
            ax=ax
        )

        for i, (hap, val) in enumerate(heatmap_df['log2FC'].items()):
            if val == np.inf or val == -np.inf:
                color = 'red' if val == np.inf else 'blue'
                ax.add_patch(plt.Rectangle((0, i), 1, 1, fill=True, color=color))
                ax.text(0.5, i + 0.5, '∞', va='center', ha='center', color='black', fontsize=10, fontweight='bold')

       
        ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=6)
        ax.set_xticklabels(ax.get_xticklabels(), fontsize=6)
        ax.set_title(f'Log2 Fold Change (Case vs Control): {anc.upper()}', fontsize=10)
        ax.set_ylabel('Haplotype', fontsize=8)
        ax.set_xlabel("")  

       
        colorbar = ax.collections[0].colorbar
        colorbar.ax.tick_params(labelsize=6)
        colorbar.set_label('log2(CaseFreq / ControlFreq)', fontsize=7)

        
        plt.subplots_adjust(left=0.42, right=0.92, top=0.88, bottom=0.08)

       
        heatmap_image = os.path.join(input_dir, f"{anc}_log2fc_heatmap.png")
        plt.savefig(heatmap_image, dpi=300, bbox_inches='tight')
        print(f"Saved PNG: {heatmap_image}")

        
        pdf.savefig(fig, bbox_inches='tight')
        plt.close()
        print(f"Added to PDF: {anc.upper()}")



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
from matplotlib.backends.backend_pdf import PdfPages


warnings.filterwarnings("ignore", category=FutureWarning)


input_dir = "Haploview_New_Biallelic"
os.makedirs(input_dir, exist_ok=True)

ordered_ancestries = ['eur', 'afr', 'amr', 'aac', 'aj', 'cas', 'eas', 'sas', 'mde', 'cah']

pdf_output = os.path.join(input_dir, "all_case_enriched_heatmaps.pdf")

with PdfPages(pdf_output) as pdf:
    for anc in ordered_ancestries:
        input_file = os.path.join(input_dir, f"{anc}_significant.tsv")

        if not os.path.exists(input_file):
            print(f"File not found: {input_file}")
            continue

        df = pd.read_csv(input_file, sep='\t')

        if df.empty:
            print(f"No significant results for {anc.upper()}")
            continue

        df['FoldChange'] = df['CaseFreq'] / df['ControlFreq']
        with np.errstate(divide='ignore', invalid='ignore'):
            df['log2FC'] = np.log2(df['FoldChange'])

        df_case_enriched = df[df['log2FC'] > 0].copy()

        if df_case_enriched.empty:
            print(f"No case-enriched haplotypes for {anc.upper()}")
            continue

        df_case_enriched = df_case_enriched.sort_values(by='log2FC', ascending=False).reset_index(drop=True)

        tsv_file = os.path.join(input_dir, f"{anc}_case_enriched.tsv")
        df_case_enriched.to_csv(tsv_file, sep='\t', index=False)
        print(f"Saved TSV: {tsv_file}")

        heatmap_df = df_case_enriched.set_index('Haplotype')[['log2FC']].copy()
        finite_vals = heatmap_df.replace([np.inf], np.nan)
        mask = heatmap_df['log2FC'].isin([np.inf]).values.reshape(-1, 1)

        max_hap_len = max(len(h) for h in heatmap_df.index)
        fig_width = 10 if max_hap_len < 40 else 12 if max_hap_len < 80 else 14
        fig_height = max(2, len(heatmap_df) * 0.5)

        plt.figure(figsize=(fig_width, fig_height))
        ax = sns.heatmap(
            finite_vals,
            annot=True,
            fmt=".2f",
            cmap='Reds',
            cbar_kws={'label': 'log2(CaseFreq / ControlFreq)'},
            mask=mask,
            annot_kws={"color": "black"}
        )

        for i, (hap, val) in enumerate(heatmap_df['log2FC'].items()):
            if val == np.inf:
                plt.gca().add_patch(plt.Rectangle((0, i), 1, 1, fill=True, color='darkred'))
                plt.text(0.5, i + 0.5, '+inf', va='center', ha='center', color='black')

        plt.yticks(rotation=0, fontsize=7)
        plt.title(f'Case-Enriched Haplotypes: {anc.upper()}')
        plt.ylabel('Haplotype')

        if len(heatmap_df) < 4:
            ax.figure.axes[-1].yaxis.label.set_size(8)
            plt.xlabel('', fontsize=8)
            plt.xticks(fontsize=8)
        else:
            plt.xlabel('')
            plt.xticks(fontsize=10)

        plt.subplots_adjust(left=0.45, right=0.95, top=0.85, bottom=0.10)

        png_file = os.path.join(input_dir, f"{anc}_case_enriched_heatmap.png")
        plt.savefig(png_file)
        print(f"Saved PNG: {png_file}")

        pdf.savefig()
        plt.close()
        print(f"Added to PDF: {anc.upper()}")


# Creating Haplotype Blocks

In [None]:
import pandas as pd

### Select SORL1 region ±100 kb boundaries

In [None]:
%%bash
module load plink
plink2 --pfile /${WORK_DIR}/chr11.compact_filtered.r4.wgs.biallelic --chr 11 --from-bp 121352314 --to-bp 121733763 --make-bed --out Blocks/Hap_SORL1_boundris 

### Separate the data by ancestry

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

input_prefix="Blocks/Hap_SORL1_boundris"
output_dir="Blocks"

for anc in "${ancestries[@]}"; do
    keep_file="adsp_${anc}_keep.txt"  
    output_prefix="${output_dir}/Hap_SORL1_boundris_adsp_${anc}"

    echo "Processing ancestry: $anc"
    
    plink --bfile "$input_prefix" \
          --keep "$keep_file" \
          --make-bed \
          --out "$output_prefix"
done

### Remove related individuals and perform quality control 

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

remove_base="/${WORK_DIR}/plink_files_genotools_update_2024"

for anc in "${ancestries[@]}"; do
    input_prefix="Blocks/Hap_SORL1_boundris_adsp_${anc}"
    remove_file="${remove_base}/REMOVE.FILTERED.merged_biallelic_$(echo $anc | tr '[:lower:]' '[:upper:]').related"
    output_prefix="Blocks/Hap_SORL1_boundris_adsp_${anc}_unrelated"

    echo "Processing ${anc}..."

    if [[ -f "$remove_file" ]]; then
        plink --bfile "$input_prefix" \
              --remove "$remove_file" \
              --make-bed \
              --out "$output_prefix"
    else
        echo "Warning: Remove file not found for ${anc}: ${remove_file}"
    fi
done

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_file="Blocks/Hap_SORL1_boundris_adsp_${anc}_unrelated"
    output_file="Blocks/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered"

    echo "Processing ${anc} with MAF and geno filters..."

    plink --bfile "$input_file" \
          --maf 0.05 \
          --geno 0.05 \
          --make-bed \
          --out "$output_file"
done

### Separate the data by cases and controls

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_prefix="Blocks/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered"
    output_dir="Blocks/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered_case"
    keep_file="qc_case_plink.txt"  
    output_prefix="${output_dir}/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered_case"

    echo "Processing ancestry: $anc"
    
    mkdir -p "$output_dir"

    plink --bfile "$input_prefix" \
          --keep "$keep_file" \
          --make-bed \
          --out "$output_prefix"
done


In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_prefix="Blocks/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered"
    output_dir="Blocks/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered_control"
    keep_file="qc_control_plink.txt"  
    output_prefix="${output_dir}/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered_control"

    echo "Processing ancestry: $anc"
    
    mkdir -p "$output_dir"

    plink --bfile "$input_prefix" \
          --keep "$keep_file" \
          --make-bed \
          --out "$output_prefix"
done


### Convert the data to VCF format to prepare for phasing

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

# --- Controls ---
for anc in "${ancestries[@]}"; do
    input_prefix="Blocks/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered_control/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered_control"
    output_prefix="Blocks/${anc}_control"

    echo "Converting ${anc} CONTROL PLINK to VCF (with IID sample IDs)..."

    plink --bfile "$input_prefix" \
          --recode vcf-iid \
          --out "$output_prefix"
done

# --- Cases ---
for anc in "${ancestries[@]}"; do
    input_prefix="Blocks/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered_case/Hap_SORL1_boundris_adsp_${anc}_unrelated_filtered_case"
    output_prefix="Blocks/${anc}_case"

    echo "Converting ${anc} CASE PLINK to VCF (with IID sample IDs)..."

    plink --bfile "$input_prefix" \
          --recode vcf-iid \
          --out "$output_prefix"
done


### Phasing data

In [None]:
! module load java

In [None]:
%%bash
java -Xmx8g -jar beagle.jar \
  gt=Blocks/${ANCESTRY}_${COHORT}.vcf \
  out=Blocks/${ANCESTRY}_${COHORT}_phased \
  nthreads=8

### Covert data to PLINK format

In [None]:
%%bash
module load plink/1.9  

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

# --- Controls ---
for anc in "${ancestries[@]}"; do
    input_vcf="Blocks/${anc}_control_phased.vcf.gz"
    output_prefix="Blocks/${anc}_control_phased"

    echo "Converting phased VCF of ${anc} (controls) to PLINK..."

    plink --vcf "$input_vcf" \
          --double-id \
          --make-bed \
          --out "$output_prefix"
done

# --- Cases ---
for anc in "${ancestries[@]}"; do
    input_vcf="Blocks/${anc}_case_phased.vcf.gz"
    output_prefix="Blocks/${anc}_case_phased"

    echo "Converting phased VCF of ${anc} (cases) to PLINK..."

    plink --vcf "$input_vcf" \
          --double-id \
          --make-bed \
          --out "$output_prefix"
done


### Generation of Haplotype Blocks 

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_prefix="Blocks/${anc}_case_phased"
    output_prefix="Blocks/${anc}_case_phased_blocks"

    echo "Processing ancestry: $anc"

    plink --bfile "$input_prefix" \
          --blocks no-pheno-req \
          --out "$output_prefix"
done

In [None]:
%%bash
module load plink/1.9

ancestries=(eur afr amr eas sas aac aj cah cas mde fin)

for anc in "${ancestries[@]}"; do
    input_prefix="Blocks/${anc}_control_phased"
    output_prefix="Blocks/${anc}_control_phased_blocks"

    echo "Processing ancestry: $anc"

    plink --bfile "$input_prefix" \
          --blocks no-pheno-req \
          --out "$output_prefix"
done