In [None]:
import pandas as pd
from pubchemprops.pubchemprops import get_cid_by_name, get_first_layer_props, get_second_layer_props
import urllib.error
import urllib.parse
from pka_lookup import pka_lookup_pubchem
import pubchempy as pcp
import re
import json

#Finds the pKa using the code of Khoi Van.
def find_pka(inchikey_string):
    text_pka = pka_lookup_pubchem(inchikey_string, "inchikey")
    if text_pka is not None and 'pKa' in text_pka:
        pKa_value = text_pka['pKa']
        return pKa_value
    else:
        return None

def find_boiling_point(name):
    text_dict = get_second_layer_props(str(name), ['Boiling Point', 'Vapor Pressure'])
    
    Boiling_point_values = []
    #finds all celsius
    pattern_celsius = r'([-+]?\d*\.\d+|\d+) °C'
    pattern_F = r'([-+]?\d*\.\d+|\d+) °F'
    
    for item in text_dict['Boiling Point']:
        # Check if the item has a key 'Value' and 'StringWithMarkup'
        if 'Value' in item and 'StringWithMarkup' in item['Value']:
            # Access the 'String' key inside the nested dictionary
            string_value = item['Value']['StringWithMarkup'][0]['String']
            match_celsius = re.search(pattern_celsius, string_value)
            if match_celsius:
                celsius = float(match_celsius.group(1))
                Boiling_point_values.append(celsius)

            #Search for Farenheit values, if found: converts farenheit to celsius before adding to the list
            match_F = re.search(pattern_F, string_value)
            if match_F:
                fahrenheit_temp = float(match_F.group(1))
                celsius_from_F = round(((fahrenheit_temp - 32) * (5/9)), 2)
                Boiling_point_values.append(celsius_from_F)

    #get the mean value
    Boiling_temp = round((sum(Boiling_point_values) / len(Boiling_point_values)), 2)
    return Boiling_temp

"""
This code takes as input a list of compound written like: acetone, water. The code allows spaces, wrong names and unknown pubchem names.
Then it iterates through each of them to find if they exist on pubchem, and if they do,
then 'CID', 'MolecularFormula', 'MolecularWeight', 'InChI', 'InChIKey', 'IUPACName', 'XLogP', 'pKa',  and 'BoilingPoint' is added into a list and then a data frame.
The code takes time as find_pka(inchikey_string) and find_boiling_point(name) request URL to find the string on the Pubchem page, then extract it using regex. 
The boiling point is a mean of all the values (references) found.
"""

def get_df_properties():
    compound_name = input("Enter the name of the compound like this: water, ethanol, methanol =")
    compound_list = [compound.strip() for compound in compound_name.split(',')]
    
    compound_properties = []
    valid_properties = []
    for compound_name in compound_list:
        compound_name_encoded = urllib.parse.quote(compound_name.strip())
        try: 
            first_data = get_first_layer_props(compound_name_encoded, ['MolecularFormula', 'MolecularWeight', 'InChI', 'InChIKey', 'IUPACName', 'XLogP'])
            compound_info = {}
            for prop in ['CID', 'MolecularFormula', 'MolecularWeight', 'InChI', 'InChIKey', 'IUPACName', 'XLogP']:
                compound_info[prop] = first_data.get(prop)
            #print(first_data)
            
            #adds pKa value
            pka_value = find_pka(first_data['InChIKey'])
            if pka_value is not None:
                compound_info['pKa'] = pka_value
            else:
                pass
            
            #adds boiling point, solubility
            compound_info['BoilingPoint'] = find_boiling_point(compound_name_encoded)
    
            # When every property has been added to compound_info, add to the properties. This makes sure all properties have the right keys
            compound_properties.append(compound_info)
        
        except urllib.error.HTTPError as e:
            if e.code == 404:
                print(f'{compound_name} not found on PubChem')
            else:
                print(f'An error occurred: {e}')
    
    for prop in compound_properties:
        if isinstance(prop, dict):
            valid_properties.append(prop)
    df = pd.DataFrame(valid_properties)
    # Set the property names from the first dictionary as column headers
    if len(valid_properties) > 0:
        df = df.reindex(columns=valid_properties[0].keys())

    #print(df)
    return(df)

In [None]:
import pandas as pd
from pubchemprops.pubchemprops import get_cid_by_name, get_first_layer_props, get_second_layer_props
import urllib.error

from pka_lookup import pka_lookup_pubchem
import pubchempy as pcp
import re
import json

#Finds the pKa using the code of Khoi Van.
def find_pka(inchikey_string):
    text_pka = pka_lookup_pubchem(inchikey_string, "inchikey")
    if text_pka is not None and 'pKa' in text_pka:
        pKa_value = text_pka['pKa']
        return pKa_value
    else:
        return None

def find_boiling_point(name):
    text_dict = get_second_layer_props(str(name), ['Boiling Point', 'Vapor Pressure'])
    
    Boiling_point_values = []
    #finds all celsius
    pattern_celsius = r'([-+]?\d*\.\d+|\d+) °C'
    pattern_F = r'([-+]?\d*\.\d+|\d+) °F'
    
    for item in text_dict['Boiling Point']:
        # Check if the item has a key 'Value' and 'StringWithMarkup'
        if 'Value' in item and 'StringWithMarkup' in item['Value']:
            # Access the 'String' key inside the nested dictionary
            string_value = item['Value']['StringWithMarkup'][0]['String']
            match_celsius = re.search(pattern_celsius, string_value)
            if match_celsius:
                celsius = float(match_celsius.group(1))
                Boiling_point_values.append(celsius)

            #Search for Farenheit values, if found: converts farenheit to celsius before adding to the list
            match_F = re.search(pattern_F, string_value)
            if match_F:
                fahrenheit_temp = float(match_F.group(1))
                celsius_from_F = round(((fahrenheit_temp - 32) * (5/9)), 2)
                Boiling_point_values.append(celsius_from_F)

    #get the mean value
    Boiling_temp = round((sum(Boiling_point_values) / len(Boiling_point_values)), 2)
    return Boiling_temp

"""
This code takes as input a list of compound written like: acetone, water. Then it iterates through each of them to find if they exist on pubchem, and if they do their first properties 
(not all which we need) is added into a list then a dataframe.
"""

compound_name = input("Enter the name of the compound like this: water, ethanol, methanol =")
compound_list = [compound.strip() for compound in compound_name.split(',')]

compound_properties = []
valid_properties = []
for compound_name in compound_list:
    try: 
        first_data = get_first_layer_props(compound_name, ['MolecularFormula', 'MolecularWeight', 'InChI', 'InChIKey', 'IUPACName', 'XLogP'])
        compound_info = {}
        for prop in ['CID', 'MolecularFormula', 'MolecularWeight', 'InChI', 'InChIKey', 'IUPACName', 'XLogP']:
            compound_info[prop] = first_data.get(prop)
        #print(first_data)
        
        #adds pKa value
        pka_value = find_pka(first_data['InChIKey'])
        if pka_value is not None:
            compound_info['pKa'] = pka_value
        else:
            pass
        
        #adds boiling point, solubility
        compound_info['BoilingPoint'] = find_boiling_point(compound_name)

        # When every property has been added to compound_info, add to the properties. This makes sure all properties have the right keys
        compound_properties.append(compound_info)
    
    except urllib.error.HTTPError as e:
        if e.code == 404:
            print(f'{compound} not found on PubChem')
        else:
            print(f'An error occurred: {e}')

for prop in compound_properties:
    if isinstance(prop, dict):
        valid_properties.append(prop)
df = pd.DataFrame(valid_properties)
# Set the property names from the first dictionary as column headers
if len(valid_properties) > 0:
    df = df.reindex(columns=valid_properties[0].keys())
    
print(df)

In [None]:
import os
import pandas as pd
import pubchempy as pcp

path = os.path.join('your', 'directory', 'file.txt')

"""
The Pubchempy python package here is used to have easy access to PubChem library. 
We can retrieve the information on PubChem for molecules which the user demands the chromatography separation.
If the name of the compound matches with a PubChem page name, the properrties are retrieved and placed in the dictionnary properties.
"""

def get_compound_info(compound_name):
    compound = pcp.get_compounds(compound_name, 'name')
    if compound:
        compound = compound[0]
        properties = compound.to_dict(properties=['IUPACName', 'MolecularFormula', 'MolecularWeight', 'IsomericSMILES', 'XLogP', 'BoilingPt', 'Solubility', 'SaturatedVaporPressure', 'pKa', 'ExactMass', 'Fingerprint3D'])
        return properties
    else:
        print(f"Compound '{compound_name}' not found on PubChem.")
        return None

"""THe df_info function gets an input of the user and uses the get_compound_info to write it in a df if possible"""

def df_info():
    compound_name = input("Enter the name of the compound like this (water, ethanol, methanol): ")
    compounds = ["water", "ethanol", "methanol"]

    df = pd.DataFrame()

    # Tries to get info from each compound
    for compound_name in compound_names:
        compound_info = get_compound_info(compound_name.strip())
        if compound_info:
            df = df.append(compound_info, ignore_index=True)
        else:
            print(f"Could not retrieve information for the compound '{compound_name.strip()}'.")
    print(df)


Boiling_temp_threshold = 300


def main(boiling_temp, solubility, pKa, p_star, Boiling_temp_threshold, solubility_threshold, pKa_threshold, p_star_threshold):
    df_info=pd.DataFrame("Compound", "Teb_threshold")
    df_info['Compound'] = df['Compound']

    for index, row in df.iterrows(): 
        if boiling_temp >= boiling_temp_threshold:
            df_info['BoilingTempGreaterThanThreshold'] = df['BoilingTemp_C'] > df['BoilingTempThreshold_C']
            
        if solubility >= solubility_threshold: 
            pass
        if pKa <= pKa_threshold:
            pass
        if p_star <= p_star_threshold:
            pass


if __name__ == "__main__":
    main()

In [None]:
import pandas as pd
from pubchemprops.pubchemprops import get_cid_by_name, get_first_layer_props, get_second_layer_props
from pka_lookup import pka_lookup_pubchem
import urllib.error

"""
This code takes as input a list of compound written like: acetone, water. Then it iterates through each of them to find if they exist on pubchem, and if they do their first properties 
(not all which we need) is added into a list then a dataframe.
"""

compound_name = input("Enter the name of the compound like this (water, ethanol, methanol): ")
compound_list = [compound.strip() for compound in compound_name.split(',')]

compound_properties = []
for compound_name in compound_list:
    try: 
        first_data = get_first_layer_props(compound_name, ['MolecularFormula', 'MolecularWeight', 'InChI', 'InChIKey', 'IUPACName', 'XLogP'])
        compound_properties.append(first_data)
        #print(first_data)
    
    except urllib.error.HTTPError as e:
        if e.code == 404:
            print(f'{compound} not found on PubChem')
        else:
            print(f'An error occurred: {e}')
            
df = pd.DataFrame(compound_properties)
# Set the property names from the first dictionary as column headers
if len(compound_properties) > 0:
    df = df.reindex(columns=compound_properties[0].keys())
    
print(df)