#### In this example, we integrated the two datasets as listed in <font color='red'>Data_file1</font>  and  <font color='red'>Data_file2</font>  by merging them using the identifier of  <font color='red'>Common_index</font>. For example, we integrated the mutation data of AML  and the drug response data in the GDSC dataset, and statistically assessed whether the mutation of one gene will be associated with the sensitivity or resistance of one drug.

In [None]:
import Docket_integration

import matplotlib.pyplot as plt
import pandas as pd
import scipy
import numpy as np
import json

#### Parameters

In [None]:
input_data1 = {
             "Data_file1": "../Data/LUAD_GDSC_MUT.csv",
             "Data_file2": "../Data/LUAD_GDSC_Drug.csv",
             "Common_index": ["COSMIC_identifier", "COSMIC_identifier"],
             "Features_1":"HGNC_gene_symbol",                                     #Features for Data_file1
             "Features_2": ['Drug_identifier',"IC50"],                             #Features for Data_file2
             "Label_1": 'MUT',
             "Label_2": 'Drug'
            }


In [None]:
input_data2 = {
             "Genelist": ['PIK3CA_MUT','TP53_MUT','KRAS_MUT','EGFR_MUT'],   #Select the genes mutations for analysis
             "Druglist":[],                                                        #If no drug is selected, all drugs will be used!
             "Output_SEN_File1": "../Output/LUAD_Sensitivity.csv",                      #The output file for drugs which show sensitivity to the gene mutations
             "Output_RES_File2": "../Output/LUAD_Resistance.csv"  ,                      #The output file for drugs which show resistance to the gene mutations
             "Drug_annotation" : "../Data/GDSC_Drug_anno.csv"
            }

In [None]:
Merged_mat = Docket_integration.merge_data(input_data1)
result = Docket_integration.Integration_mutation_drugResponse(Merged_mat, input_data1, input_data2)

In [None]:
Drug_annotation = pd.read_csv(input_data2['Drug_annotation'])
Drug_annotation.index = Drug_annotation['Drug_identifier']

temp = list(result['F2'].values)

temp_new_id = []
Drug_Name_list = []
Drug_Putative_Target_list = []
pathway_list = []

for i in temp:
    temp_new_id.append(i.split('_')[0])
    Drug_Name_list.append(Drug_annotation.loc[Drug_annotation['Drug_identifier'] == int(i.split('_')[0])]['Drug_Name'].values[0])
    Drug_Putative_Target_list.append(Drug_annotation.loc[Drug_annotation['Drug_identifier'] == int(i.split('_')[0])]['Drug_Putative_Target'].values[0])
    pathway_list.append(Drug_annotation.loc[Drug_annotation['Drug_identifier'] == int(i.split('_')[0])]['Drug_Targeted_process_or_pathway'].values[0])

result['Drug'] = Drug_Name_list
result['Target'] = Drug_Putative_Target_list
result['Pathway'] = pathway_list

import plotly.express as px

fig = px.scatter(result, "SE", "-logP", hover_data=['F1','F2','Drug'],color="Pathway")
fig.show()

In [None]:
result.loc[result['FDR'] < 0.05]