Modify this line to briefly discribe the functionality of output_descriptors_and_fingerprints.ipynb<br/><br/>Copyright (C) 2017  Martin Engqvist Lab<br/>This program is free software: you can redistribute it and/or modify<br/>it under the terms of the GNU General Public License as published by<br/>the Free Software Foundation, either version 3 of the License, or<br/>(at your option) any later version.<br/>This program is distributed in the hope that it will be useful,<br/>but WITHOUT ANY WARRANTY; without even the implied warranty of<br/>MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the<br/>GNU General Public License for more details.<br/>You should have received a copy of the GNU General Public License<br/>along with this program.  If not, see <http://www.gnu.org/licenses/>.

In [3]:
import os
from dotenv import load_dotenv, find_dotenv
from os.path import join, dirname, basename, exists, isdir

### Load environmental variables from the project root directory ###
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

# now you can get the variables using their names

# Check whether a network drive has been specified
DATABASE = os.environ.get("NETWORK_URL")
if DATABASE == 'None':
    pass
else:
    pass
    #mount network drive here

# set up directory paths
CURRENT_DIR = os.getcwd()
PROJ = dirname(dotenv_path) # project root directory

DATA = join(PROJ, 'data') #data directory
RAW_EXTERNAL = join(DATA, 'raw_external') # external data raw directory
RAW_INTERNAL = join(DATA, 'raw_internal') # internal data raw directory
INTERMEDIATE = join(DATA, 'intermediate') # intermediate data directory
FINAL = join(DATA, 'final') # final data directory

RESULTS = join(PROJ, 'results') # output directory
FIGURES = join(RESULTS, 'figures') # figure output directory
PICTURES = join(RESULTS, 'pictures') # picture output directory


# make folders specific for certain data
folder_name = ''
if folder_name != '':
    #make folders if they don't exist
    if not exists(join(RAW_EXTERNAL, folder_name)):
        os.makedirs(join(RAW_EXTERNAL, folder_name))

    if not exists(join(INTERMEDIATE, folder_name)):
        os.makedirs(join(INTERMEDIATE, folder_name))

    if not exists(join(FINAL, folder_name)):
        os.makedirs(join(FINAL, folder_name))

print('Standard variables loaded, you are good to go!')

Standard variables loaded, you are good to go!


In [2]:
from rdkit import Chem
from rdkit.Chem import Descriptors

subswithsmiles = {'Massa ringar' : 'COc1cccc2cc(C(=O)NCCCCN3CCN(c4cccc5nccnc54)CC3)oc21','L-lactate' : "C([C@@H](O)C)(=O)[O-]", 'Glucose' : 'O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO', 'Glutamate' : "N[C@@H](CCC(=O)[O-])C(=O)[O-]"}


def molecular_weight(substrate_dict):
    """Takes as input a dictionary of substrate names and their SMILES, converts to mol-format and returns a dictionary of substrate names and 
    and their respective molecular weight."""
    molweight = {}
    for key in substrate_dict:
        mol = Chem.MolFromSmiles(substrate_dict[key])
        molweight[key] = Descriptors.ExactMolWt(mol)
    return molweight

def polar_surface_area(substrate_dict):
    """Takes as input a dictionary of substrate names and their SMILES and returns a dictionary of substrate names and 
       their respective polar surface area."""
    TPSA = {}
    for key in substrate_dict:
        mol = Chem.MolFromSmiles(substrate_dict[key])
        TPSA[key] = Descriptors.TPSA(mol)
    return TPSA

# Kanske att föredra att dela upp den här i två funktioner?
def Hbond_donors_acceptors(substrate_dict):
    """Takes as input a dictionary of substrate names and their SMILES and returns a list of dictionaries of substrate names and
       their respective number of hydrogen bond acceptors and donors. Acceptors on index 0, donors on index 1."""
    acceptors = {}
    donors = {}
    for key in substrate_dict:
        mol = Chem.MolFromSmiles(substrate_dict[key])
        acceptors[key] = Descriptors.NumHAcceptors(mol)
        donors[key] = Descriptors.NumHDonors(mol)
    return acceptors, donors

def OHNH_count(substrate_dict):
    """Takes as input a dictionary of substrate names and their SMILES, converts to mol-format, calculates the number of NH and OH groups, and outputs
       a dictionary containing the substrate names and their respective counts."""
    OHNH = {}
    for key in substrate_dict:
        mol = Chem.MolFromSmiles(substrate_dict[key])
        OHNH[key] = Descriptors.NHOHCount(mol)
    return OHNH

def ON_count(substrate_dict):
    """Takes as input a dictionary of substrate names and their SMILES, converts to mol-format, calculates the number of O and N atoms, and outputs
        a dictionary containing the substrate names and their respective counts. """
    ON = {}
    for key in substrate_dict:
        mol = Chem.MolFromSmiles(substrate_dict[key])
        ON[key] = Descriptors.NOCount(mol)
    return ON

def HeteroAtomCount(substrate_dict):
    """Takes as input a dictionary of substrate names and their SMILES, converts to mol-format, calculates the number of 
        hetero atoms and returns a dictionary containing the substrate names and their respective hetero counts."""
    HA = {}
    for key in substrate_dict:
        mol = Chem.MolFromSmiles(substrate_dict[key])
        HA[key] = Descriptors.NumHeteroatoms(mol)
    return HA

def RotaBondCount(substrate_dict):
    """Takes as input a dictionary of substrate names and their SMILES, converts to mol-format, calculates the number of 
        rotatable bonds and returns a dictionary containing the substrate names and their respective rotatable bond counts."""
    RB = {}
    for key in substrate_dict:
        mol = Chem.MolFromSmiles(substrate_dict[key])
        RB[key] = Descriptors.NumRotatableBonds(mol)
    return RB


def AromRingCount(substrate_dict):
    """Takes as input a dictionary of substrate names and their SMILES, converts to mol-format, calculates the number of 
        aromatic rings and returns a dictionary containing the substrate names and their respective aromatic ring counts"""
    RING = {}
    for key in substrate_dict:
        mol = Chem.MolFromSmiles(substrate_dict[key])
        RING[key] = Descriptors.NumAromaticRings(mol)
    return RING

def AliphRingCount(substrate_dict):
    """Takes as input a dictionary of substrate names and their SMILES, converts to mol-format, calculates the number of 
        aliphatic rings and returns a dictionary containing the substrate names and their respective aliphatic ring counts"""
    RING2 = {}
    for key in substrate_dict:
        mol = Chem.MolFromSmiles(substrate_dict[key])
        RING2[key] = Descriptors.NumAliphaticRings(mol)
    return RING2

import json
filepath = join(RAW_EXTERNAL,"BRENDA_data_2019_1","2019-04-02_substrate_cache.json")
with open(filepath, 'r') as f:
        data = json.loads(f.read())





NameError: name 'join' is not defined

In [218]:
import json
filepath = join(RAW_EXTERNAL,"BRENDA_data_2019_1","2019-04-02_substrate_cache.json")
with open(filepath, 'r') as f:
        data = json.loads(f.read())
Nones = []
Smiles = {}
for key in data:
    if data[key] == None:
        Nones.append(key)
    else:
        Smiles[key] = data[key]
len(data)

1700

In [164]:
print Nones

SyntaxError: Missing parentheses in call to 'print'. Did you mean print(Nones)? (<ipython-input-164-c77748d67e0c>, line 1)