In [None]:
from downloading_BiGG_models import *
import json

In [None]:
model_IDs = ["e_coli_core", "iAB_RBC_283", "iAF1260", "iAF1260b", "iAF692", "iAF987", "iAM_Pb448",
             "iAM_Pc455", "iAM_Pf480", "iAM_Pk459", "iAM_Pv461", "iAPECO1_1312", "iAT_PLT_636",
             "iB21_1397", "iBWG_1329", "ic_1306", "iCHOv1", "iCHOv1_DG44", "iCN718", "iCN900", "iE2348C_1286",
            "iEC042_1314", "iEC1344_C", "iEC1349_Crooks", "iEC1356_Bl21DE3", "iEC1364_W",
            "iEC1368_DH5a", "iEC1372_W3110", "iEC55989_1330", "iECABU_c1320", "iECB_1328", "iECBD_1354",
            "iECD_1391", "iECDH10B_1368", "iEcDH1_1363", "iECDH1ME8569_1439", "iEcE24377_1341",
            "iECED1_1282", "iECH74115_1262", "iEcHS_1320", "iECIAI1_1343", "iECIAI39_1322", 
            "iECNA114_1301", "iECO103_1326", "iECO111_1330", "iECO26_1355", "iECOK1_1307", "iEcolC_1368",
            "iECP_1309", "iECs_1301", "iECS88_1305", "iECSE_1348", "iECSF_1327", "iEcSMS35_1347",
            "iECSP_1301", "iECUMN_1333", "iECW_1372", "iEK1008", "iEKO11_1354", "iETEC_1333", "iG2583_1286",
            "iHN637", "iIS312", 
             "iIS312_Amastigote", "iIS312_Epimastigote", "iIS312_Trypomastigote", "iIT341", "iJB785", "iJN1463",
            "iJN678", "iJN746", "iJO1366", "iJR904", "iLB1027_lipid", "iLF82_1304", "iLJ478", "iML1515",
            "iMM1415", "iMM904", "iND750", "iNF517", "iNJ661", "iNRG857_1313", "iPC815", "iRC1080",
            "iS_1188", "iSB619", "iSbBS512_1146", "iSBO_1134", "iSDY_1059", "iSF_1195", "iSFV_1184", 
            "iSFxv_1172", "iSSON_1240", "iSynCJ816", "iUMN146_1321", "iUMNK88_1353", "iUTI89_1310", 
            "iWFL_1372", "iY75_1357", "iYL1228", "iYO844", "iYS1720", "iYS854", "iZ_1308", "RECON1",
            "Recon3D", "STM_v1_0"]

In [None]:
#model_IDs = ["e_coli_core", "iAF1260", "iAF1260b", "iAF692", "iAF987"]

#### Create reaction DataFrame for all BiGG models and text files with all genes in the model:

In [None]:
for model_ID in model_IDs:
    print(model_ID)
    
    try:
        os.mkdir(join(datasets_dir, "BiGG_GSM", model_ID))
    
        model_metabolites, model_reactions, df_reactions = download_model_information(bigg_ID = model_ID)
        model_metabolites_list = [met["id"] for met in model_metabolites]
        model_reactions_list = [reaction["id"] for reaction in model_reactions]
        with open(join(datasets_dir, "BiGG_GSM", model_ID, 'model_metabolites.txt'), 'w') as outfile:
            json.dump(model_metabolites, outfile)
        np.save(join(datasets_dir, "BiGG_GSM", model_ID, 'model_metabolites_list.npy'),
                model_metabolites_list)    

        df_reactions = process_reactions_DataFrame(df_reactions, model_reactions, model_reactions_list)
        print("There are enzymatic %s reactions" % len(df_reactions))
        print("Reactions without gene reaction rules: %s" %len(df_reactions.loc[df_reactions["gene_reaction_rule"] == ""]))

        create_txt_file_with_all_genes(df_reactions, model_ID)

        df_reactions.to_pickle(join(datasets_dir, "BiGG_GSM", model_ID, "df_reactions.pkl"))
    
    except FileExistsError: pass

#### Use the created files as the input for the Uniprot mapping service

In [None]:
model_IDs = os.listdir(join(datasets_dir, "BiGG_GSM"))

Load and map the results:

In [None]:
for model_ID in model_IDs:
    print(model_ID)
    df_reactions = pd.read_pickle(join(datasets_dir, "BiGG_GSM", model_ID, "df_reactions.pkl"))
    df_reactions = add_Uniprot_IDs(df_reactions, model_ID)
    print("For %s out of %s reactions we don't have a Uniprot ID, yet." % 
          (len(df_reactions.loc[pd.isnull(df_reactions["Uniprot ID"])]), len(df_reactions)))
    #get links for quick go mapping for Uniprot IDs:
    get_Quick_GO_links_for_enzyme_complexes(df_reactions, model_ID)
    df_reactions.to_pickle(join(datasets_dir, "BiGG_GSM", model_ID, "df_reactions.pkl"))
    print(" ")

#### Downloading information about binding site from https://www.ebi.ac.uk/QuickGO via the Uniprot IDs:

In [None]:
for model_ID in model_IDs[45:]:
    print(model_ID)
    df_reactions = pd.read_pickle(join(datasets_dir, "BiGG_GSM", model_ID, "df_reactions.pkl"))
    
    GO_df = load_GO_DataFrames(model_ID)
    GO_UIDs = list(set(GO_df["GENE PRODUCT ID"]))
    df_reactions = add_Uniprot_ID_for_enzyme_complexes(df_reactions, GO_UIDs)
    
    print("For %s out of %s reactions we don't have a Uniprot ID." % 
          (len(df_reactions.loc[pd.isnull(df_reactions["Uniprot ID"])]), len(df_reactions)))
    df_reactions.to_pickle(join(datasets_dir, "BiGG_GSM", model_ID, "df_reactions.pkl"))
    
    
    with open(join(datasets_dir, "BiGG_GSM", model_ID, 'model_metabolites.txt')) as json_file:
        model_metabolites = json.load(json_file)
    model_metabolites_list = list(np.load(join(datasets_dir, "BiGG_GSM", model_ID, 'model_metabolites_list.npy')))
    
    df_KM = create_KM_DataFrame(df_reactions)
    df_KM = get_kegg_and_bigg_compound_ids(df_KM,  model_metabolites, model_metabolites_list)
    df_KM = add_substrate_name(df_KM)
    df_KM = find_KEGG_ID_by_synonym(df_KM)
    df_KM = download_SMILES_or_KEGG_from_MetaNetX(df_KM)
    df_KM = remove_small_mets(df_KM)
    df_KM.to_pickle(join(datasets_dir, "BiGG_GSM", model_ID, "df_KM_checkpoint_" + model_ID + ".pkl"))
    
    print("There are %s KM data points" % len(df_KM))
    print("Data points without KEGG ID and SMILES: %s" 
          %len(df_KM.loc[pd.isnull(df_KM["KEGG ID"])].loc[pd.isnull(df_KM["SMILES"])]))
    print("Data points without Uniprot ID: %s" 
          %len(df_KM.loc[pd.isnull(df_KM["Uniprot ID"])]))
    print("Data points without KEGG ID, SMILES and Uniprot ID: %s" 
          %len(df_KM.loc[pd.isnull(df_KM["KEGG ID"])].loc[pd.isnull(df_KM["SMILES"])].loc[pd.isnull(df_KM["Uniprot ID"])]))
    
    print(" ")