In [5]:
import disease_edgotype_classes
import database_utils

connect = database_utils.get_connection()
cursor = connect.cursor()

In [6]:
# write paper-relevant edgotyping data in DB table
db_table = 'hi2018_paper.edgotyping'
query = """delete from {}""".format(db_table)
cursor.execute(query)

query = """insert into %s 
        (causal_gene_id,CG_symbol,causal_orf_id,causal_mut_id,interactor_gene_id,
        interactor_symbol,interactor_orf_id,assay_id,GS_WT,
        GS_allele,GS_diff,perturbed,final_score,mutation,pathogenecity,omim_ids,
        TiP_tissue,CG_tis_expr,CG_TiP_expr,interactor_tis_expr,interactor_TiP_expr) 
        values (%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s,%%s)""" % (db_table)

std_batches = ['tipAs1','tipAs2','tipAs6']
data = disease_edgotype_classes.get_huri_rel_cegsb01_data(std_batches)
# find every gene and its pathogenic alleles that are assigned to tissue-specific diseases and for which there is at 
# least one according tissue-specific interaction partner
# write out the perturbation data

# for every gene
for gene_id,gene_obj in data.genes.items():

    # is it annotated with at least 1 tissue-specific disease and does it have at least 1 valid PPI?
    if len(gene_obj.diseases) > 0 and len(gene_obj.wt_PPIs) > 0:

        # if yes, does it have at least one allele matching any of these diseases?
        # get a set of all tissue-specific diseases of the CG for which there is at least one allele
        tis_diseases = set()
        CG_diseases = set(list(gene_obj.diseases.keys()))
        for allele_id,allele_obj in gene_obj.disease_alleles.items():
            allele_diseases = set(list(allele_obj.diseases.keys()))
            tis_diseases = tis_diseases.union(allele_diseases.intersection(CG_diseases))

        if len(tis_diseases) > 0:

            # if yes, is there a valid interaction partner that is TiP in at least one of the disease tissues and the 
            # causal gene is not TiP?
            # get all the tissues of the selected tissue-specific diseases
            sel_tissues = set()
            for tis_disease in tis_diseases:
                sel_tissues = sel_tissues.union(data.diseases[tis_disease].tissues)
            # for every interaction partner, check whether it's a TiP in any of these tissues while the CG is not 
            # but being expressed in that tissue
            max_TiP = 0
            for tup,GS_wt in gene_obj.wt_PPIs.items():
                partner_id = tup[0]
                assay_id = tup[1]
                partner_obj = data.genes[partner_id]
                if GS_wt > 1:
                    for sel_tissue in sel_tissues:
                        if len(gene_obj.tissue_expr_values) > 0 and \
                            gene_obj.tissue_expr_values[data.tissues.index(sel_tissue)] > 5 and \
                            gene_obj.tissue_TiP_values[data.tissues.index(sel_tissue)] < 2 and \
                            len(partner_obj.tissue_expr_values) > 0 and \
                            partner_obj.tissue_expr_values[data.tissues.index(sel_tissue)] > 5 and \
                            partner_obj.tissue_TiP_values[data.tissues.index(sel_tissue)] >= 2 and \
                            partner_obj.tissue_TiP_values[data.tissues.index(sel_tissue)] > max_TiP:
                            TiP_partner = partner_id
                            max_TiP = partner_obj.tissue_TiP_values[data.tissues.index(sel_tissue)]
                            TiP_tissue = sel_tissue

            if max_TiP > 2:

                omim_ids = '|'.join(([str(omim) for omim in tis_diseases]))

                # add the wild type PPIs
                for tup,GS_wt in gene_obj.wt_PPIs.items():
                    partner_id = tup[0]
                    assay_id = tup[1]

                    if len(data.genes[partner_id].tissue_expr_values) > 0:
                        partner_expr_value = data.genes[partner_id].tissue_expr_values[data.tissues.index(TiP_tissue)]
                        partner_TiP_value = data.genes[partner_id].tissue_TiP_values[data.tissues.index(TiP_tissue)]
                    else:
                        partner_expr_value = None
                        partner_TiP_value = None

                    data_in = (gene_id,gene_obj.symbol,gene_obj.orf_id,0,partner_id,
                                          data.genes[partner_id].symbol,
                                          data.genes[partner_id].orf_id,assay_id,GS_wt,None,
                                          None,None,'1',None,'WT',omim_ids,TiP_tissue,
                                          gene_obj.tissue_expr_values[data.tissues.index(TiP_tissue)],
                                          gene_obj.tissue_TiP_values[data.tissues.index(TiP_tissue)],
                                          partner_expr_value,partner_TiP_value)
                    print(data_in)
                    cursor.execute(query,data_in)

                # add the allele PPIs of all those alleles that belong to a tis-disease (and are pathogenic)
                for allele_id,allele_obj in gene_obj.disease_alleles.items():

#                     if allele_obj.allele_type and \
#                         (allele_obj.allele_type.find('athogenic') > -1 or 
#                         allele_obj.allele_type == 'risk factor') and \
#                         len(set(list(allele_obj.diseases.keys())).intersection(tis_diseases)) > 0:
                    if len(set(list(allele_obj.diseases.keys())).intersection(tis_diseases)) > 0:

                        for int_id,int_obj in allele_obj.interactions.items():

                            omim_ids = set(list(allele_obj.diseases.keys())).intersection(tis_diseases)
                            omim_ids = '|'.join(([str(omim) for omim in omim_ids]))

                            if len(data.genes[int_obj.partner_id].tissue_expr_values) > 0:
                                partner_expr_value = data.genes[int_obj.partner_id].tissue_expr_values[data.tissues.index(TiP_tissue)]
                                partner_TiP_value = data.genes[int_obj.partner_id].tissue_TiP_values[data.tissues.index(TiP_tissue)]
                            else:
                                partner_expr_value = None
                                partner_TiP_value = None

                            data_in = (gene_id,gene_obj.symbol,gene_obj.orf_id,allele_id,int_obj.partner_id,
                                                  data.genes[int_obj.partner_id].symbol,
                                                  data.genes[int_obj.partner_id].orf_id,int_id[2],int_obj.GS_wt,int_obj.GS_mut,
                                                  int_obj.GS_decrease,int_obj.perturbed,int_obj.score,
                                                  allele_obj.mutation,allele_obj.allele_type,omim_ids,TiP_tissue,
                                                  gene_obj.tissue_expr_values[data.tissues.index(TiP_tissue)],
                                                  gene_obj.tissue_TiP_values[data.tissues.index(TiP_tissue)],
                                                  partner_expr_value,partner_TiP_value)
                            cursor.execute(query,data_in)
        

Number of diseases loaded: 1105
Number of genes loaded: 364
Number of alleles loaded: 50
Number of interactions loaded: 704
('ENSG00000136143', 'SUCLA2', 1598, 0, 'ENSG00000100934', 'SEC23A', 100008515, 1, 3, None, None, None, '1', None, 'WT', '612073', 'brain-0', 11.4304017378, 0.576913607938, 10.6696848887, -0.514762514305)
('ENSG00000136143', 'SUCLA2', 1598, 0, 'ENSG00000170540', 'ARL6IP1', 2374, 1, 4, None, None, None, '1', None, 'WT', '612073', 'brain-0', 11.4304017378, 0.576913607938, 14.090027809, 2.17236715373)
('ENSG00000136143', 'SUCLA2', 1598, 0, 'ENSG00000185247', 'MAGEA11', 4361, 1, 2, None, None, None, '1', None, 'WT', '612073', 'brain-0', 11.4304017378, 0.576913607938, None, None)
('ENSG00000136143', 'SUCLA2', 1598, 0, 'ENSG00000177674', 'AGTRAP', 7845, 1, 4, None, None, None, '1', None, 'WT', '612073', 'brain-0', 11.4304017378, 0.576913607938, 9.16467479758, -1.04578115215)
('ENSG00000136143', 'SUCLA2', 1598, 0, 'ENSG00000147676', 'MAL2', 581, 1, 2, None, None, None, '1

('ENSG00000108433', 'GOSR2', 3086, 0, 'ENSG00000147155', 'EBP', 2416, 6, 4, None, None, None, '1', None, 'WT', '614018', 'brain-1', 6.00751738404, -0.743531013531, None, None)
('ENSG00000170876', 'TMEM43', 3438, 0, 'ENSG00000155324', 'GRAMD2B', 361, 1, 4, None, None, None, '1', None, 'WT', '604400|614302', 'muscle_skeletal', 12.0957184973, -0.339628694833, 8.35552594485, -1.49710981689)
('ENSG00000170876', 'TMEM43', 3438, 0, 'ENSG00000155313', 'USP25', 14495, 2, 3, None, None, None, '1', None, 'WT', '604400|614302', 'muscle_skeletal', 12.0957184973, -0.339628694833, 12.7690206707, 2.32168786915)
('ENSG00000170876', 'TMEM43', 3438, 0, 'ENSG00000145354', 'CISD2', 9684, 6, 1, None, None, None, '1', None, 'WT', '604400|614302', 'muscle_skeletal', 12.0957184973, -0.339628694833, 8.51323136001, -0.880236677376)
('ENSG00000170876', 'TMEM43', 3438, 0, 'ENSG00000102230', 'PCYT1B', 71081, 6, 2, None, None, None, '1', None, 'WT', '604400|614302', 'muscle_skeletal', 12.0957184973, -0.339628694833,

('ENSG00000099797', 'TECR', 5155, 0, 'ENSG00000165996', 'HACD1', 6415, 1, 1, None, None, None, '1', None, 'WT', '614020', 'brain-1', 12.22352526, 0.869723127211, 7.95709448297, -0.0274365810504)
('ENSG00000099797', 'TECR', 5155, 0, 'ENSG00000130545', 'CRB3', 13427, 6, 3, None, None, None, '1', None, 'WT', '614020', 'brain-1', 12.22352526, 0.869723127211, 2.12464856918, -0.0734397658361)
('ENSG00000099797', 'TECR', 5155, 0, 'ENSG00000169446', 'MMGT1', 9500, 6, 2, None, None, None, '1', None, 'WT', '614020', 'brain-1', 12.22352526, 0.869723127211, None, None)
('ENSG00000099797', 'TECR', 5155, 0, 'ENSG00000182093', 'WRB', 637, 6, 4, None, None, None, '1', None, 'WT', '614020', 'brain-1', 12.22352526, 0.869723127211, 11.1148041299, 1.12123421475)
('ENSG00000099797', 'TECR', 5155, 0, 'ENSG00000148175', 'STOM', 3031, 6, 4, None, None, None, '1', None, 'WT', '614020', 'brain-1', 12.22352526, 0.869723127211, 7.92345893042, -1.66429437249)
('ENSG00000099797', 'TECR', 5155, 0, 'ENSG00000145283',

('ENSG00000183230', 'CTNNA3', 71258, 0, 'ENSG00000140481', 'CCDC33', 8359, 1, 3, None, None, None, '1', None, 'WT', '615616', 'heart_left_ventricle', 11.3305880514, 0.864219606581, 2.26038390838, 0.337069256304)
('ENSG00000183230', 'CTNNA3', 71258, 0, 'ENSG00000179833', 'SERTAD2', 100069829, 1, 3, None, None, None, '1', None, 'WT', '615616', 'heart_left_ventricle', 11.3305880514, 0.864219606581, 10.2158274931, -0.349125210206)
('ENSG00000183230', 'CTNNA3', 71258, 0, 'ENSG00000215271', 'HOMEZ', 54692, 1, 4, None, None, None, '1', None, 'WT', '615616', 'heart_left_ventricle', 11.3305880514, 0.864219606581, 5.018578605, -0.396118786572)
('ENSG00000183230', 'CTNNA3', 71258, 0, 'ENSG00000162924', 'REL', 53942, 1, 4, None, None, None, '1', None, 'WT', '615616', 'heart_left_ventricle', 11.3305880514, 0.864219606581, 8.66587116482, -0.873798722625)
('ENSG00000183230', 'CTNNA3', 71258, 0, 'ENSG00000166886', 'NAB2', 12387, 1, 3, None, None, None, '1', None, 'WT', '615616', 'heart_left_ventricle'