In [7]:
from NIPDToolkit.xlinked_analysis import XLinkedAnalyser
from NIPDToolkit.autosomal_analysis import AutosomalAnalyser
import yaml
import pandas as pd
import csv

In [8]:
config = '../development_local.yaml'
min_genotype_depth = 20
min_distance_between_snps = 200
min_snps_per_block = 25
min_fetal_fraction =0.0
results_dir = '../output/sprt/'
worksheet_name = '190201_NB551415_0009_AHF7C7BGX7'
x_genes = ['EDA', 'F8', 'F9']
autosomal_genes = ['CFTR', 'TSC1', 'TSC2']
family_csv_dir = '../output/family_csvs/'

In [9]:
def parse_config(config_location):
    """
    Parse the YAML config file.
    """

    with open(config_location, 'r') as stream:

        try:
            return yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)
            raise

In [10]:
config_dict = parse_config(config)

In [11]:
families = config_dict['families'].keys()

In [12]:
for family in families:
    
    members = config_dict['families'][family]['members']
    
    mother = members['mother']
    father = members['father']
    plasma = members['plasma']
    proband = members['proband']
    
    if mother != None and plasma != None and proband != None:
        
        print (f'Can perform X Linked SPRT for family {family}')
        
        for gene in x_genes:
            
            family_csv = f'{family_csv_dir}/190201_NB551415_0009_AHF7C7BGX7_all_chr_qfiltered_anno_selected_{family}.csv'
            
            my_x_linked_analyser = XLinkedAnalyser(family_csv=family_csv,
                                      maternal_sample= mother,
                                      proband_sample= proband,
                                      plasma_sample=plasma,
                                      gene=gene,
                                      min_genotype_depth=min_genotype_depth,
                                      min_distance_between_snps=min_distance_between_snps,
                                      min_snps_per_block=min_snps_per_block,
                                      min_fetal_fraction=min_fetal_fraction)
            
            my_x_linked_analyser.run_analysis()
            
            my_x_linked_analyser.xlinked_df_fwd.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_raw_x_linked_fwd.csv', index=False)
            my_x_linked_analyser.xlinked_df_rev.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_raw_x_linked_rev.csv', index=False)
            
            blocks_fwd = my_x_linked_analyser.xlinked_df_fwd_haplotype_blocks
            blocks_fwd_df = pd.DataFrame(blocks_fwd, columns=['block_id', 'start', 'end', 'n_snps', 'haplotype', 'chomosome', 'gene'])
            blocks_fwd_df.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_haplotype_blocks_x_linked_fwd.csv', index=False)
            
            blocks_rev = my_x_linked_analyser.xlinked_df_rev_haplotype_blocks
            blocks_rev_df = pd.DataFrame(blocks_rev, columns=['block_id', 'start', 'end', 'n_snps', 'haplotype', 'chomosome', 'gene'])
            blocks_rev_df.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_haplotype_blocks_x_linked_rev.csv', index=False)
            

            with open(f'{results_dir}/{worksheet_name}_{family}_{gene}_x_linked_info.csv', 'w') as csvfile:
                spamwriter = csv.writer(csvfile, delimiter=',')
                spamwriter.writerow(['mother:',mother])
                spamwriter.writerow(['father:',father])
                spamwriter.writerow(['plasma:',plasma])
                spamwriter.writerow(['proband:',proband])
                spamwriter.writerow(['gene:',gene])
                spamwriter.writerow(['min_genotype_depth:',my_x_linked_analyser.min_genotype_depth])
                spamwriter.writerow(['min_distance_between_snps:',my_x_linked_analyser.min_distance_between_snps])
                spamwriter.writerow(['min_snps_per_block:',my_x_linked_analyser.min_snps_per_block])
                spamwriter.writerow(['min_fetal_fraction:', my_x_linked_analyser.min_fetal_fraction])
                spamwriter.writerow(['fetal_fraction:',my_x_linked_analyser.fetal_fraction])
                spamwriter.writerow(['initial_variant_count:',my_x_linked_analyser.initial_variant_count])
                spamwriter.writerow(['after_depth_filter_variant_count:',my_x_linked_analyser.after_depth_filter_variant_count])
                spamwriter.writerow(['after_gene_filter_variant_count:',my_x_linked_analyser.after_gene_filter_variant_count])
                spamwriter.writerow(['after_distance_filter_variant_count:',my_x_linked_analyser.after_distance_filter_variant_count])
                spamwriter.writerow(['informative_snp_count:',my_x_linked_analyser.informative_snp_count])
                spamwriter.writerow(['mean_snp_depth:',my_x_linked_analyser.mean_snp_depth])
                
                
    if mother != None and plasma != None and proband != None and father != None:     
        
        print ('Can do Autosomal Analysis')
         
        for gene in autosomal_genes:
            
            family_csv = f'{family_csv_dir}/190201_NB551415_0009_AHF7C7BGX7_all_chr_qfiltered_anno_selected_{family}.csv'
            
            my_autosomal_analyser = AutosomalAnalyser(
                family_csv = family_csv,
                maternal_sample = mother,
                paternal_sample = father,
                proband_sample = proband,
                plasma_sample = plasma,
                gene = gene,
                min_genotype_depth=min_genotype_depth,
                min_distance_between_snps=min_distance_between_snps,
                min_snps_per_block=min_snps_per_block,
                min_fetal_fraction=min_fetal_fraction)
            
            
            my_autosomal_analyser.run_analysis()
            
            
            my_autosomal_analyser.type4a_df_fwd.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_raw_type4a_fwd.csv', index=False)
            my_autosomal_analyser.type4a_df_rev.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_raw_type4a_rev.csv', index=False)
            
            my_autosomal_analyser.type4b_df_fwd.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_raw_type4b_fwd.csv', index=False)
            my_autosomal_analyser.type4b_df_rev.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_raw_type4b_rev.csv', index=False)
            
            my_autosomal_analyser.type3_df_fwd.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_raw_type3_fwd.csv', index=False)
            my_autosomal_analyser.type3_df_rev.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_raw_type3_rev.csv', index=False)
            
            blocks_fwd = my_autosomal_analyser.type4a_df_fwd_haplotype_blocks
            blocks_fwd_df = pd.DataFrame(blocks_fwd, columns=['block_id', 'start', 'end', 'n_snps', 'haplotype', 'chomosome', 'gene'])
            blocks_fwd_df.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_haplotype_blocks_type4a_fwd.csv', index=False)
            
            blocks_rev = my_autosomal_analyser.type4a_df_rev_haplotype_blocks
            blocks_rev_df = pd.DataFrame(blocks_rev, columns=['block_id', 'start', 'end', 'n_snps', 'haplotype', 'chomosome', 'gene'])
            blocks_rev_df.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_haplotype_blocks_type4a_rev.csv', index=False)
            
            blocks_fwd = my_autosomal_analyser.type4b_df_fwd_haplotype_blocks
            blocks_fwd_df = pd.DataFrame(blocks_fwd, columns=['block_id', 'start', 'end', 'n_snps', 'haplotype', 'chomosome', 'gene'])
            blocks_fwd_df.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_haplotype_blocks_type4b_fwd.csv', index=False)
            
            blocks_rev = my_autosomal_analyser.type4b_df_rev_haplotype_blocks
            blocks_rev_df = pd.DataFrame(blocks_rev, columns=['block_id', 'start', 'end', 'n_snps', 'haplotype', 'chomosome', 'gene'])
            blocks_rev_df.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_haplotype_blocks_type4b_rev.csv', index=False)
            
            
            blocks_fwd = my_autosomal_analyser.type3_df_fwd_haplotype_blocks
            blocks_fwd_df = pd.DataFrame(blocks_fwd, columns=['block_id', 'start', 'end', 'n_snps', 'haplotype', 'chomosome', 'gene'])
            blocks_fwd_df.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_haplotype_blocks_type3_fwd.csv', index=False)
            
            blocks_rev = my_autosomal_analyser.type3_df_rev_haplotype_blocks
            blocks_rev_df = pd.DataFrame(blocks_rev, columns=['block_id', 'start', 'end', 'n_snps', 'haplotype', 'chomosome', 'gene'])
            blocks_rev_df.to_csv(f'{results_dir}/{worksheet_name}_{family}_{gene}_haplotype_blocks_type3_rev.csv', index=False)
            
            
            
            
            
            with open(f'{results_dir}/{worksheet_name}_{family}_{gene}_autosomal_info.csv', 'w') as csvfile:
                spamwriter = csv.writer(csvfile, delimiter=',')
                spamwriter.writerow(['mother:',mother])
                spamwriter.writerow(['father:',father])
                spamwriter.writerow(['plasma:',plasma])
                spamwriter.writerow(['proband:',proband])
                spamwriter.writerow(['gene:',gene])
                spamwriter.writerow(['min_genotype_depth:',my_autosomal_analyser.min_genotype_depth])
                spamwriter.writerow(['min_distance_between_snps:',my_autosomal_analyser.min_distance_between_snps])
                spamwriter.writerow(['min_snps_per_block:',my_autosomal_analyser.min_snps_per_block])
                spamwriter.writerow(['min_fetal_fraction:', my_autosomal_analyser.min_fetal_fraction])
                spamwriter.writerow(['min_ks_p_value:', my_autosomal_analyser.min_ks_p_value])
                spamwriter.writerow(['fetal_fraction:',my_autosomal_analyser.fetal_fraction])
                spamwriter.writerow(['initial_variant_count:',my_autosomal_analyser.initial_variant_count])
                spamwriter.writerow(['after_depth_filter_variant_count:',my_autosomal_analyser.after_depth_filter_variant_count])
                spamwriter.writerow(['after_distance_filter_variant_count:',my_autosomal_analyser.after_distance_filter_variant_count])
                spamwriter.writerow(['after_gene_filter_variant_count:',my_autosomal_analyser.after_gene_filter_variant_count])
                spamwriter.writerow(['mean_snp_depth:',my_autosomal_analyser.mean_snp_depth])

Can perform X Linked SPRT for family FAM001
Can do Autosomal Analysis
Can perform X Linked SPRT for family FAM002
Can do Autosomal Analysis
Can perform X Linked SPRT for family FAM003
Can do Autosomal Analysis
Can perform X Linked SPRT for family FAM004
Can do Autosomal Analysis
Can perform X Linked SPRT for family FAM005
Can do Autosomal Analysis
Can perform X Linked SPRT for family FAM006
Can do Autosomal Analysis
