In [83]:
source("/frazer01/projects/CEGS/analysis/hla_type_1kgp/script/functions_hla_type.R")


# Get MHC VCF file

In [None]:
command = paste("bcftools", "view",
                "-r", coord,
                "-S", ids_file,
                "-O", "z",
                "-o", vcf_snps,
                vcf_chr6
               )

system(command)

command = paste("bcftools", "index", "-t", vcf_snps)

system(command)


# Annotate all genotypes

In [None]:
read_hla_types = function(id, id2hla)
{
    #message(id2hla[ id, "infile"], appendLF = FALSE)
    indata    = fread(id2hla[ id, "infile"], sep = "\t", header = TRUE, data.table = FALSE)
    indata    = rbind(data.frame(gene = indata$gene, type = indata$type1), 
                      data.frame(gene = indata$gene, type = indata$type2)) 
    
    if(nrow(indata[ indata$type == "", ]) > 0){indata[ indata$type == "", "type"] = paste(indata[ indata$type == "", "gene"], "00:00:00:00", sep = "*")}
    
    indata$id = id
    indata    = suppressMessages(indata %>% group_by(id, gene, type) %>% summarize(gn = length(type), gt = ifelse(length(type) == 1, yes = "0/1", no = "1/1")))
    
    return(indata)
}

#id  = "HG00187"
#a = read_hla_types(id, id2hla)
gts = as.data.frame(rbindlist(lapply(id2hla$id, function(id){read_hla_types(id, id2hla)})), stringsAsFactors = FALSE)

fwrite(gts, "pipeline/hla_typing/hla_types.txt", sep = "\t", col.names = TRUE, row.names = FALSE)

In [43]:
gts = fread("pipeline/hla_typing/hla_types.txt", sep = "\t", header = TRUE, data.table = FALSE)

In [44]:
gts$vcf  = paste(gts$gt, gts$gn, sep = ":")
vcf_data = add_rownames(reshape2::dcast(type ~ id, data = gts, value.var = "vcf", fun.aggregate = unique, fill = "0/0:0"))


In [45]:
type2af           = as.data.frame(suppressMessages(gts %>% group_by(gene, type) %>% summarize(af = sum(gn) / nrow(id2hla) /2)))
rownames(type2af) = type2af$type

In [46]:
str(type2af)

'data.frame':	2650 obs. of  3 variables:
 $ gene: chr  "A" "A" "A" "A" ...
 $ type: chr  "A*00:00:00:00" "A*01:01:01:01" "A*01:01:01:02N" "A*01:01:01:03" ...
 $ af  : num  0.000157 0.002828 0.003928 0.001571 0.000471 ...


In [None]:
#vcf_gt = add_rownames(reshape2::dcast(type ~ id, data = gts, value.var = "gt", fun.aggregate = unique, fill = "0/0"))[1:10, 1:10]
#vcf_gn = add_rownames(reshape2::dcast(type ~ id, data = gts, value.var = "gn", fun.aggregate = unique, fill = 0    ))[1:10, 1:10]


In [None]:
#vcf_data = paste(as.matrix(vcf_gt), as.matrix(vcf_gn), sep = ":")

# Write VCF

In [58]:
write_vcf_header = function(ids)
{
    out = c("fileformat=VCFv4.2",
            'FILTER=<ID=PASS,Description="All filters passed">',
            'INFO=<ID=END,Number=1,Type=Integer,Description="End position of the HLA type">',
            'INFO=<ID=COORD,Number=1,Type=String,Description="HLA type coordinates">',
            'INFO=<ID=AF,Number=1,Type=Float,Description="HLa type frequency">',
            'INFO=<ID=GENE,Number=1,Type=String,Description="HLA gene">',
            'FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
            'FORMAT=<ID=GN,Number=1,Type=String,Description="Genotype (numeric): 0 = homozygous reference; 1 = heterozygous; 2 = homozygous alternative">',
            'contig=<ID=6,length=170805979>',
            "reference=hg19"
           )
    out = c(paste0("##", out), paste(c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", ids), collapse = "\t"))
    
    return(out)
}

write_vcf_body = function(ids, vcf_data, geneinfo, type2af)
{
    vcf_data      = vcf_data[,ids]
    out           = merge(type2af, geneinfo[,c("gene", "start", "end")])
    rownames(out) = out$type
    out$chrom     = 6
    out$pos       = out$start
    out$ref       = "AA"
    out$alt       = "TT"
    out$id        = sub("\\*", ".", out$type)
    out$qual      = 1
    out$filter    = "PASS"
    out$info      = paste(paste("END" , out$end , sep = "="),
                          paste("AF"  , out$af  , sep = "="),
                          paste("GENE", out$gene, sep = "="),
                          paste("END" , out$end , sep = "="),
                          sep = ";"
                         )
    out$format    = "GT:GN"
    out           = out[ order(out$pos, -out$af), ]
    dups          = unique(out[ duplicated(out$pos), "pos"])
    
    for(pos in dups)
    {
        out[ out$pos == pos, "pos"] = out[ out$pos == pos, "pos"] - 1 + 1:nrow(out[ out$pos == pos,])
    }
    
    out           = cbind(out[,c("chrom", "pos", "id", "ref", "alt", "qual", "filter", "info", "format")], vcf_data[ rownames(out), ids])
    return(out)
}

vcf_header       = write_vcf_header(ids)
vcf_body         = write_vcf_body  (ids, vcf_data, geneinfo, type2af)
vcf_uncompressed = sub("\\.gz", "", vcf_hla_types)

writeLines(vcf_header, vcf_uncompressed, sep = "\n")
fwrite    (vcf_body  , vcf_uncompressed, sep = "\t", col.names = FALSE, row.names = FALSE, append = TRUE)


In [59]:
command = paste("bcftools", "view", "-O", "z", "-o", vcf_hla_types, vcf_uncompressed)

system(command)

command = paste("bcftools", "index", "-t", vcf_hla_types)

system(command)

file.remove(vcf_uncompressed)

## print list of HLA types

In [None]:
get_hla_types_list = function(vcf)
{
    command = paste("bcftools", "query", 
                    "-f", "'%ID\\n'",
                    vcf
                   )
    
    indata = fread(cmd = command, sep = "\t", header = FALSE, data.table = FALSE)[,1]
    
    return(indata)
}

hla_types           = get_hla_types_list(vcf_hla_types)

writeLines(hla_types, hla_types_list_file, sep = "\n")

# Combine VCFs

In [202]:
vcf_snps

In [60]:
vcf_combined       = paste(getwd(), "pipeline/hla_typing/vcf", "combined.vcf.gz" , sep = "/")


In [61]:
command = paste("bcftools", "concat", "-a",
                "-O", "z",
                "-o", vcf_combined,
                vcf_snps, vcf_hla_types
               )

system(command)

command = paste("bcftools", "index", "-t", vcf_combined)

system(command)


# Remove indels

In [63]:
vcf_combined_noindel = paste(getwd(), "pipeline/hla_typing/vcf", "combined_noindel.vcf.gz", sep = "/")

command = paste("bcftools", "view",
                "-i" , "'TYPE=\"snp\" | (REF=\"AA\" & ALT=\"TT\")'",
                "-Oz",
                "-o" , vcf_combined_noindel,
                vcf_combined
               )

system(command)

command = paste("bcftools", "index", "-t", vcf_combined_noindel)

system(command)


# phase all variants

In [64]:
file_map   = paste(getwd(), "input/phase/chr6.b37.gmap.gz"         , sep = "/")
log_phased = paste(getwd(), "pipeline/hla_typing/vcf/phased.log"   , sep = "/")
vcf_phased = paste(getwd(), "pipeline/hla_typing/vcf/phased.vcf.gz", sep = "/")
command    = paste("shapeit4", 
                   "--input" , vcf_combined, 
                   "--thread", 16,
                   "--map"   , file_map  ,
                   "--region", coord     ,
                   "--log"   , log_phased,
                   "--output", vcf_phased,
                   ""
                  )

system(command)

command = paste("bcftools", "index", "-t", vcf_phased)

system(command)


# impute HLA types on 1KGP (validation)
## 1- all IDs together

In [81]:
outfolder = paste(getwd(), "pipeline/beagle/1kgp_validation", sep = "/")

dir.create(outfolder                                , showWarnings = FALSE)
dir.create(paste(outfolder, "processing", sep = "/"), showWarnings = FALSE)


In [68]:
imputed_vcf = command_run_beagle         (vcf_phased, vcf_snps   , outfolder)
hla_out     = command_extract_hla_convert(imputed_vcf, outfolder)


## 2- 10-step cross-validation

In [161]:
set.seed(1)
id2cv     = data.frame(id = ids, bin = ceiling((1:length(ids) / length(ids) * 10)))
id2cv$bin = sample(id2cv$bin)
bins      = sort(unique(id2cv$bin))

fwrite(id2cv, paste(outfolder, "id2cv.txt", sep = "/"), sep = "\t", col.names = TRUE, row.names = FALSE)



In [162]:
run_hla_typing_qsub = function(bins, qsub = FALSE)
{
    sh_file = paste(getwd(), "pipeline/beagle/1kgp_validation/run_imputation_1kgp.sh", sep = "/")
    
    writeLines(text = c("#!/usr/bin/sh",
                        "source /frazer01/home/matteo/.bashrc",
                        paste("Rscript", paste(paste(getwd(), "script", "run_imputation_1kgp.R", sep = "/"),
                                               "--taskid"   , "$SGE_TASK_ID")
                             )
                       ), 
               con  = sh_file, 
               sep  = "\n\n")
    
    qsub_command = paste("qsub",
                         "-t", paste(1, "-", length(bins), ":1", sep = ""),
                         "-tc", 50, 
                         "-l" , "h_vmem=16G",
                         "-pe", "smp", 4,
                         "-o" , paste(getwd(), "pipeline/beagle/1kgp_validation/logs.out", sep = "/"),
                         "-e" , paste(getwd(), "pipeline/beagle/1kgp_validation/logs.err", sep = "/"),
                         sh_file
                        )
    
    message(qsub_command)
    if(qsub == TRUE){system(qsub_command)}
}

run_hla_typing_qsub(bins, TRUE)


qsub -t 1-10:1 -tc 50 -l h_vmem=16G -pe smp 4 -o /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/logs.out -e /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/logs.err /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/run_imputation_1kgp.sh



## 3- only using variants in the UKBB

In [163]:
outfolder_ukbb = paste(getwd(), "pipeline/beagle/1kgp_validation/ukbb_variants", sep = "/")

dir.create(outfolder_ukbb, showWarnings = FALSE)


In [174]:
var_ids       = sort(unique(c(fread(paste(getwd(), "input/ukbb/snps.pvar", sep = "/"), sep = "\t", header = TRUE, data.table = FALSE)[,3],
                              readLines(hla_types_list_file)
                             )))
var_ids       = var_ids[ grepl("^rs", var_ids) == TRUE | grepl("^6:", var_ids) == TRUE]
var_ids_file  = paste(outfolder_ukbb, "var_ids_ukbb.txt", sep = "/")

writeLines(var_ids, var_ids_file, sep = "\n")

In [175]:
prefix_ukbb_vars = paste(outfolder_ukbb, "ukbb_vars", sep = "/")

In [176]:
command = paste("plink2_64", 
                "--vcf"    , vcf_phased,
                "--extract", var_ids_file,
                "--memory" , 64000,
                "--threads", 8,
                "--export" , "vcf", "bgz", 
                "vcf-dosage=DS-force",
                "--out"   , prefix_ukbb_vars
               )

system(command)

vcf_ukbb_vars = paste(prefix_ukbb_vars, "vcf.gz", sep = ".")
command       = paste("bcftools", "index", "-t", vcf_ukbb_vars)

system(command)



In [201]:
vcf_ukbb_vars

In [177]:
imputed_vcf_ukbb = command_run_beagle         (vcf_ukbb_vars, vcf_snps, outfolder_ukbb)
hla_out_ukbb     = command_extract_hla_convert(imputed_vcf_ukbb, outfolder_ukbb)


## 4- 10-step cross-validation on variants in the UKBB

In [178]:
outfolder_ukbb_vars = paste(getwd(), "pipeline/beagle/1kgp_validation/1kgp_ukbb_vars", sep = "/")

dir.create(outfolder_ukbb_vars, showWarnings = FALSE)
dir.create(paste(outfolder_ukbb_vars, "processing", sep = "/"), showWarnings = FALSE)


In [179]:
set.seed(1)
id2cv     = data.frame(id = ids, bin = ceiling((1:length(ids) / length(ids) * 10)))
id2cv$bin = sample(id2cv$bin)
bins      = sort(unique(id2cv$bin))

fwrite(id2cv, paste(outfolder, "id2cv.txt", sep = "/"), sep = "\t", col.names = TRUE, row.names = FALSE)



In [180]:
run_hla_typing_qsub = function(bins, qsub = FALSE)
{
    sh_file = paste(getwd(), "pipeline/beagle/1kgp_validation/run_imputation_1kgp_ukbb_vars.sh", sep = "/")
    
    writeLines(text = c("#!/usr/bin/sh",
                        "source /frazer01/home/matteo/.bashrc",
                        paste("Rscript", paste(paste(getwd(), "script", "run_imputation_1kgp_ukbb_vars.R", sep = "/"),
                                               "--taskid"   , "$SGE_TASK_ID")
                             )
                       ), 
               con  = sh_file, 
               sep  = "\n\n")
    
    qsub_command = paste("qsub",
                         "-t", paste(1, "-", length(bins), ":1", sep = ""),
                         "-tc", 50, 
                         "-l" , "h_vmem=16G",
                         "-pe", "smp", 4,
                         "-o" , paste(getwd(), "pipeline/beagle/1kgp_validation/1kgp_ukbb_vars/logs.out", sep = "/"),
                         "-e" , paste(getwd(), "pipeline/beagle/1kgp_validation/1kgp_ukbb_vars/logs.err", sep = "/"),
                         sh_file
                        )
    
    message(qsub_command)
    if(qsub == TRUE){system(qsub_command)}
}

run_hla_typing_qsub(bins, TRUE)


qsub -t 1-10:1 -tc 50 -l h_vmem=16G -pe smp 4 -o /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/1kgp_ukbb_vars/logs.out -e /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/1kgp_ukbb_vars/logs.err /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/run_imputation_1kgp_ukbb_vars.sh



## 5- 10-step cross-validation on variants in the UKBB removing random variants

In [185]:
outfolder_random = paste(getwd(), "pipeline/beagle/1kgp_validation/random", sep = "/")

dir.create(outfolder_random, showWarnings = FALSE)
dir.create(paste(outfolder_random, "processing", sep = "/"), showWarnings = FALSE)
dir.create(paste(outfolder_random, "var_lists" , sep = "/"), showWarnings = FALSE)


In [186]:
#set.seed(1)
#id2cv     = data.frame(id = ids, bin = ceiling((1:length(ids) / length(ids) * 10)))
#id2cv$bin = sample(id2cv$bin)
#bins      = sort(unique(id2cv$bin))
#
#fwrite(id2cv, paste(outfolder_random, "id2cv.txt", sep = "/"), sep = "\t", col.names = TRUE, row.names = FALSE)
#
#

In [189]:
cv2perm          = do.call(expand.grid, list(bin = bins, random = (1:10) * 10, perm = 1:100))
random2perm      = unique(cv2perm[,c("random", "perm")])
cv2perm$folder   = paste(outfolder_random, "processing", paste(paste0("bin", cv2perm$bin), cv2perm$random, cv2perm$perm, sep = "."),                 sep = "/")
cv2perm$vcf      = paste(outfolder_random, "var_lists" , paste("vars"                    , cv2perm$random, cv2perm$perm, sep = "."), "vars.vcf.gz",  sep = "/")

fwrite(cv2perm, paste(outfolder_random, "cv2perm.txt", sep = "/"), sep = "\t", col.names = TRUE, row.names = FALSE)

In [172]:

ii = 1
create_random_var_lists(random2perm[ii, "random"], random2perm[ii, "perm"], var_ids)

#invisible(lapply(1:nrow(random2perm), function(ii)
#{
#    create_random_var_lists(random2perm[ii, "random"], random2perm[ii, "perm"], var_ids)
#}))


In [190]:
run_prepare_random_vars_qsub = function(random2perm, qsub = FALSE)
{
    sh_file = paste(outfolder_random, "prepare_random_vars.sh", sep = "/")
    
    writeLines(text = c("#!/usr/bin/sh",
                        "source /frazer01/home/matteo/.bashrc",
                        paste("Rscript", paste(paste(getwd(), "script", "prepare_random_vars.R", sep = "/"),
                                               "--taskid"   , "$SGE_TASK_ID")
                             )
                       ), 
               con  = sh_file, 
               sep  = "\n\n")
    
    qsub_command = paste("qsub",
                         "-t", paste(1, "-", nrow(random2perm), ":1", sep = ""),
                         "-tc", 50, 
                         "-l" , "h_vmem=16G",
                         "-pe", "smp", 4,
                         "-o" , paste(outfolder_random, "prepare_random_vars.out", sep = "/"),
                         "-e" , paste(outfolder_random, "prepare_random_vars.err", sep = "/"),
                         sh_file
                        )
    
    message(qsub_command)
    if(qsub == TRUE){system(qsub_command)}
}

run_prepare_random_vars_qsub(random2perm, FALSE)


qsub -t 1-1000:1 -tc 50 -l h_vmem=16G -pe smp 4 -o /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/prepare_random_vars.out -e /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/prepare_random_vars.err /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/prepare_random_vars.sh



In [192]:
run_hla_typing_qsub = function(cv2perm, qsub = FALSE)
{
    sh_file = paste(getwd(), "pipeline/beagle/1kgp_validation/run_imputation_random.sh", sep = "/")
    
    writeLines(text = c("#!/usr/bin/sh",
                        "source /frazer01/home/matteo/.bashrc",
                        paste("Rscript", paste(paste(getwd(), "script", "run_imputation_random.R", sep = "/"),
                                               "--taskid"   , "$SGE_TASK_ID")
                             )
                       ), 
               con  = sh_file, 
               sep  = "\n\n")
    
    qsub_command = paste("qsub",
                         "-t", paste(1, "-", nrow(cv2perm), ":1", sep = ""),
                         "-tc", 500, 
                         "-l" , "h_vmem=16G",
                         "-pe", "smp", 4,
                         "-o" , paste(outfolder_random, "imputation.out", sep = "/"),
                         "-e" , paste(outfolder_random, "imputation.err", sep = "/"),
                         sh_file
                        )
    
    message(qsub_command)
    if(qsub == TRUE){system(qsub_command)}
}

run_hla_typing_qsub(cv2perm, FALSE)


qsub -t 1-10000:1 -tc 500 -l h_vmem=16G -pe smp 4 -o /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/imputation.out -e /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/imputation.err /frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/run_imputation_random.sh



# SCRATCH

In [195]:
ii = 1000

In [200]:
impute_1kgp_random = function(ii, cv2perm, id2cv)
{
    bin       = cv2perm[ii, "bin"     ]
    random    = cv2perm[ii, "random"  ]
    perm      = cv2perm[ii, "perm"    ]
    message(paste(ii, bin, random, perm), appendLF = FALSE)
    outfolder = cv2perm[ii, "folder"  ]
    vcf_gts   = cv2perm[ii, "vcf"     ]
    ids_in    = id2cv[ id2cv$bin != bin, "id"]
    ids_imp   = id2cv[ id2cv$bin == bin, "id"]
    outfolder = paste(getwd(), "pipeline/beagle/1kgp_validation/random/processing", paste(paste0("bin", bin), random, perm, sep = "."), sep = "/")
    vcf_in    = paste(outfolder, "ref.vcf.gz", sep = "/")
    vcf_imp   = paste(outfolder, "gts.vcf.gz", sep = "/")
    
    dir.create(outfolder, showWarnings = FALSE)
        
    filter_vcf_by_id(ids_in , vcf_phased, vcf_in )
    filter_vcf_by_id(ids_imp, vcf_gts   , vcf_imp)
    
    imputed_vcf = command_run_beagle         (vcf_in, vcf_imp, outfolder)
    hla_out     = command_extract_hla_convert(imputed_vcf, outfolder)
}



lapply(9991:10000, function(ii){impute_1kgp_random(ii, cv2perm, id2cv)})


9991 1 100 100
9992 2 100 100
9993 3 100 100
9994 4 100 100
9995 5 100 100
9996 6 100 100
9997 7 100 100
9998 8 100 100
9999 9 100 100
10000 10 100 100


In [199]:
cv2perm[ 9991:10000,]

Unnamed: 0_level_0,bin,random,perm,folder,vcf
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<chr>,<chr>
9991,1,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin1.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
9992,2,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin2.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
9993,3,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin3.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
9994,4,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin4.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
9995,5,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin5.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
9996,6,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin6.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
9997,7,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin7.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
9998,8,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin8.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
9999,9,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin9.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
10000,10,100,100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/processing/bin10.100.100,/frazer01/projects/CEGS/analysis/hla_type_1kgp/pipeline/beagle/1kgp_validation/random/var_lists/vars.100.100/vars.vcf.gz
