In [None]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['mathtext.fontset'] = 'stix'
mpl.rcParams['font.family'] = 'STIXGeneral'
mpl.rcParams['text.usetex'] = False
plt.rc('xtick', labelsize=12)
plt.rc('ytick', labelsize=12)
plt.rc('axes', labelsize=12)
mpl.rcParams['figure.dpi'] = 300

In [None]:
from pathlib import Path
import numpy as np
from rdkit import Chem
import pubchempy as pcp

In [None]:
from openai import OpenAI
import os
client = OpenAI(api_key=os.environ["CHAT_GPT3_API_KEY"])

In [None]:
# Initialize with the system prompt
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "You are a chemist, knowledgable about converting chemical names into molecular smiles strings",
        }
    ],
    model="gpt-3.5-turbo",
)

In [None]:
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Say this is a test",
        }
    ],
    model="gpt-3.5-turbo",
)

In [None]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0,
    )
    return response.choices[0].message["content"]

In [None]:
content = get_completion("What is the canonical smiles string for alpha formula C5H7Se and chemical name 3-Me-selenophene?")

In [None]:
s = pcp.get_compounds("CH3-C4H4Se", 'formula')

In [None]:
s2 = pcp.get_compounds("C5H7Se", "formula")

In [None]:
Chem.MolFromSmiles(s2[0].canonical_smiles)

In [None]:
Chem.MolFromSmiles("CC1=C[Se]C=C1")

In [None]:
Chem.MolFromSmiles("CC1=CSeC=C1")

In [None]:
Chem.MolFromSmiles(s[0].canonical_smiles)

In [None]:
import sys
sys.path.append("../NMC/NSLS-II-ISS-xview")
from xview.xasproject.xasproject import XASDataSet

In [None]:
# Not super proud of this code but it does work
def parse_file(fname):
    
    with open(fname, "r") as f:
        lines = [xx.strip() for xx in f.readlines()]
        
    if len(lines) == 0:
        return None, None

    metadata = {"total": []}
    data = []
    for ii, line in enumerate(lines):
        
        if "Edge" in line:
            if "C " not in line and "N " not in line and "O " not in line:
                return None, None
            if "1s" not in line:
                return None, None
            metadata["edge"] = line

            
        if line == "":
            ii += 1
            break
        if line[0] == "*":
            metadata["total"].append(line)
            continue
        break
    
    for jj in range(ii, len(lines)):
        data.append(lines[jj])

    # print([float(yy.strip()) for yy in data[0].split("\t")])
    if "\t" in data[0]:
        data = [[float(yy.strip()) for yy in xx.split("\t")] for xx in data]
    else:
        data = [[float(yy.replace(",", "").replace("\t", " ")) for yy in xx.split(" ") if yy != ""] for xx in data]

    L = len(data[0])
    new_data = []
    for line in data:
        if len(line) < L:
            line = line + [np.nan] * (L - len(line))
        new_data.append(line)
    
    
    return np.array(new_data), metadata

In [None]:
files = Path("experiment/data").glob("*.os")
data = {}
metadata = {}
for file in files:
    if "ab-0nokk" in file.stem:
        continue
    d, m = parse_file(file)
    if d is None:
        continue
    data[file.stem] = d
    metadata[file.stem] = m

# Look at the Carbon K-edge

In [None]:
carbon_data = []

In [None]:
for key, value in metadata.items():
    if " C " in value["edge"]:
        carbon_data.append(data[key])

In [None]:
for key, value in metadata.items():
    print(value["total"])
    break

In [None]:
a_norm1=50
a_norm2=600

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(3, 2))

for d in carbon_data[:10]:
    
    try:
        a=XASDataSet(energy=d[:, 0], mu=d[:, 1])
        a.norm1=a_norm1
        a.norm2=a_norm2
        a.normalize_force()
    except ValueError:
        print("problem")
        continue
    norm_spectrum = {}
    norm_spectrum['energy']=list(a.energy)
    norm_spectrum['mu']=list(a.flat)
    # norm_spectrum['metadata']=dict(data[i]['metadata'])
    # norm_spectra.append(norm_spectrum)
    
    
    # L = len(d[:, 1])
    # m = d[-L//10:, 1]
    # y = d[:, 1] / m.mean()
    # if np.any(y > 5):
    #     continue
    # ax.plot(d[:, 0], y, alpha=0.5, color="black")
    
    ax.plot(norm_spectrum["energy"], norm_spectrum["mu"], color="black", alpha=0.5)
    
ax.set_xlim(280, 300)
# ax.set_ylim(0, 6)
plt.show()