In [None]:
import pandas as pd
import tkinter as tk
from tkinter import ttk
from tkinter import messagebox
import urllib.error
import urllib.parse
from pka_lookup.src import pka_lookup_pubchem
import re

root = tk.Tk()
root.title("Determination of Chromatography Type")

mixture = []

# Function to add a molecule to the mixture
def add_molecule():
    element = mixture_entry.get()
    mixture.append(element)
    mixture_listbox.insert(tk.END, element.strip())

# Function to retrieve compound properties from PubChem
def get_compound_properties(compound_name):
    compound_name_encoded = urllib.parse.quote(compound_name.strip())
    try:
        first_data = get_first_layer_props(compound_name_encoded, ['MolecularFormula', 'MolecularWeight', 'InChI', 'InChIKey', 'IUPACName', 'XLogP'])
        compound_info = {
            'Name': compound_name,
            'MolecularFormula': first_data.get('MolecularFormula'),
            'MolecularWeight': first_data.get('MolecularWeight'),
            'InChI': first_data.get('InChI'),
            'InChIKey': first_data.get('InChIKey'),
            'IUPACName': first_data.get('IUPACName'),
            'XLogP': first_data.get('XLogP'),
            'pKa': find_pka(first_data.get('InChIKey')),
            'BoilingPoint': find_boiling_point(compound_name_encoded)
        }
        return compound_info
    except urllib.error.HTTPError as e:
        if e.code == 404:
            messagebox.showerror("Error", f"{compound_name} not found on PubChem")
        else:
            messagebox.showerror("Error", f"An error occurred: {e}")

# Function to get compound properties for all entered molecules
def get_mixture_properties():
    mixture_properties = []
    for compound_name in mixture_listbox.get(0, tk.END):
        compound_info = get_compound_properties(compound_name)
        if compound_info:
            mixture_properties.append(compound_info)
    return mixture_properties

df = pd.DataFrame()
user_df = pd.DataFrame(mixture, columns=['Mixture', 'Boiling_temp_(°C)', 'logP', 'pKa', 'Molecular_mass'])


# Function to determine chromatography type
def det_chromato(df):
    if df.empty:
        messagebox.showerror("Error", "No compounds found in the mixture.")
        return
    
    min_boiling_temp = df['BoilingPoint'].min()
    if min_boiling_temp >= 300:
        Chromato_type = 'GC'
        eluent_nature = 'gas'
        proposed_pH = None
    else:
        max_molar_mass = df['MolecularWeight'].max()
        min_pKa = float('inf')
        max_pKa = float('-inf')
        for pKa_entry in df['pKa']:
            if isinstance(pKa_entry, list):
                for pKa_value in pKa_entry:
                    min_pKa = min(pKa_value, min_pKa)
                    max_pKa = max(pKa_value, max_pKa)
            else:
                min_pKa = min(pKa_entry, min_pKa)
                max_pKa = max(pKa_entry, max_pKa)
        
        if max_molar_mass <= 2000:
            max_logP = df['XLogP'].max()
            min_logP = df['XLogP'].min()
            if max_logP < 0:
                proposed_pH = max_pKa + 2
                if 3 <= proposed_pH <= 11 and max_pKa + 2 >= proposed_pH:
                    Chromato_type = 'IC'
                    eluent_nature = 'aqueous'
                    proposed_pH = max_pKa + 2
                else:
                    Chromato_type = 'HPLC'
                    eluent_nature = 'organic or hydro-organic'
                    proposed_pH = min_pKa + 2
            else:
                Chromato_type = 'HPLC'
                if -2 <= min_logP <= 0:
                    eluent_nature = 'organic or hydro-organic'
                    if min_logP >= 0:
                        Chromato_type += ' on normal stationary phase'
                    else:
                        Chromato_type += ' on reverse stationary phase using C18 column'
                else:
                    eluent_nature = 'organic or hydro-organic'
                    Chromato_type += ' on normal stationary phase'
                proposed_pH = min_pKa + 2
        else:
            max_logP = df['XLogP'].max()
            min_logP = df['XLogP'].min()
            if max_logP < 0:
                Chromato_type = 'HPLC on reverse stationary phase'
                eluent_nature = 'organic or hydro-organic'
                proposed_pH = min_pKa + 2
            else:
                Chromato_type = 'SEC on gel filtration'
                eluent_nature = 'aqueous'
                proposed_pH = min_pKa + 2
    
    result1_label.config(text=f"The advisable chromatography type is: {Chromato_type}")
    result2_label.config(text=f"Eluent nature: {eluent_nature}")
    if proposed_pH is not None:
        result3_label.config(text=f"Proposed pH for the eluent: {proposed_pH}")
        add_entry_widget()

# Function to add entry widget for pH value
def add_entry_widget():
    entry_widget.grid(row=3, column=1, padx=5, pady=5)
    label.grid(row=3, column=0, padx=5, pady=5)

# Function to find pKa of a compound
def find_pka(inchikey_string):
    text_pka = pka_lookup_pubchem(inchikey_string, "inchikey")
    if text_pka is not None and 'pKa' in text_pka:
        pKa_value = text_pka['pKa']
        return pKa_value
    else:
        return None

# Function to find boiling point of a compound
def find_boiling_point(name):
    text_dict = get_second_layer_props(str(name), ['Boiling Point', 'Vapor Pressure'])
    
    Boiling_point_values = []
    #finds all celsius
    pattern_celsius = r'([-+]?\d*\.\d+|\d+) °C'
    pattern_F = r'([-+]?\d*\.\d+|\d+) °F'
    
    for item in text_dict['Boiling Point']:
        # Check if the item has a key 'Value' and 'StringWithMarkup'
        if 'Value' in item and 'StringWithMarkup' in item['Value']:
            # Access the 'String' key inside the nested dictionary
            string_value = item['Value']['StringWithMarkup'][0]['String']
            match_celsius = re.search(pattern_celsius, string_value)
            if match_celsius:
                celsius = float(match_celsius.group(1))
                Boiling_point_values.append(celsius)

            #Search for Farenheit values, if found: converts farenheit to celsius before adding to the list
            match_F = re.search(pattern_F, string_value)
            if match_F:
                fahrenheit_temp = float(match_F.group(1))
                celsius_from_F = round(((fahrenheit_temp - 32) * (5/9)), 2)
                Boiling_point_values.append(celsius_from_F)

    #get the mean value
    Boiling_temp = round((sum(Boiling_point_values) / len(Boiling_point_values)), 2)
    return Boiling_temp    
    pass

# Interface widgets
entry_widget = tk.Entry(root)
label = tk.Label(root, text="pH value:")
mixture_entry = ttk.Entry(root)
mixture_label = ttk.Label(root, text="Names of the molecules in the mixture:")
add_button = ttk.Button(root, text="Add molecule", command=add_molecule)
mixture_listbox = tk.Listbox(root)
calculate_button = ttk.Button(root, text="Determine chromatography", command=lambda: det_chromato(df))
result1_label = ttk.Label(root, text="")
result2_label = ttk.Label(root, text="")
result3_label = ttk.Label(root, text="")

# Interface layout
mixture_label.grid(row=0, column=0, padx=5, pady=5)
mixture_entry.grid(row=0, column=1, padx=5, pady=5)
add_button.grid(row=1, column=0, columnspan=2, padx=5, pady=5)
mixture_listbox.grid(row=2, column=0, columnspan=2, padx=5, pady=5)
calculate_button.grid(row=3, column=0, columnspan=2, padx=5, pady=5)
result1_label.grid(row=4, column=0, columnspan=2, padx=5, pady=5)
result2_label.grid(row=5, column=0, columnspan=2, padx=5, pady=5)
result3_label.grid(row=6, column=0, columnspan=2, padx=5, pady=5)

root.mainloop()
