In [16]:
import pandas as pd
import numpy as np
from subprocess import Popen, PIPE

In [70]:
df = pd.read_csv("../../data/raw/TMC-1_inventory.csv")

In [76]:
df = df.loc[(df["Isotopologue"] == 0)].reset_index(drop=True)

In [77]:
reduced = df[["Molecule", "Formula", "SMILES", "Column density (cm^-2)", "DOI"]]

In [78]:
reduced["Column density (cm^-2)"] = np.log10(reduced["Column density (cm^-2)"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [79]:
reduced

Unnamed: 0,Molecule,Formula,SMILES,Column density (cm^-2),DOI
0,Methyltriacetylene,CH3C6H,CC#CC#CC#C,12.491362,10.1086/504918
1,Methyldiacetylene,CH3C4H,CC#CC#C,13.477121,10.1086/184312
2,Methylcyanodiacetylene,CH3C5N,CC#CC#CC#N,11.924279,10.1086/504918
3,Methylcyanoacetylene,CH3C3N,CC#CC#N,12.255273,10.1086/504918
4,Protonated nitrogen,N2H+,N#[NH+],12.698970,10.3847/1538-4365/aa69ba
...,...,...,...,...,...
83,Hydrogen cyanide,HCN,C#N,12.389166,10.1086/306032
84,Indene,C9H8,c1ccc2c(c1)CC=C2,12.982271,
85,Vinylacetylene,CH2CHCCH,C=CC#C,13.079181,10.1051/0004-6361/202140434
86,,HCCN,N#C[CH+],11.643453,10.1051/0004-6361/202140434


In [80]:
entries = reduced["DOI"].dropna().unique().tolist()

In [45]:
results = {}
for entry in entries:
    with Popen(["doi2bib", entry], stdout=PIPE) as proc:
        results[entry] = proc.communicate()[0].decode("utf-8")

In [55]:
print(results["10.1086/504918"])

r


In [117]:
map_dict = dict()

for key, value in results.items():
    citekey = value.split()[0].split("{")[-1].replace(",", "")
    map_dict[key] = f"\citet{{{citekey}}}"

In [118]:
reduced["Reference"] = reduced["DOI"].map(map_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [129]:
final = reduced[["Formula", "SMILES", "Column density (cm^-2)", "Reference"]]

In [130]:
final["Formula"] = final["Formula"].apply(lambda x: f"\ce{{{x}}}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [131]:
def format_smiles(smi):
    for char in ["#"]:
        smi = smi.replace(char, f"\\{char}")
    return smi

In [132]:
final["SMILES"] = final["SMILES"].apply(format_smiles)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [133]:
print(final.round(4).to_latex(index=False, escape=False, longtable=True))

\begin{longtable}{llrl}
\toprule
      Formula &                    SMILES &  Column density (cm^-2) &               Reference \\
\midrule
\endfirsthead

\toprule
      Formula &                    SMILES &  Column density (cm^-2) &               Reference \\
\midrule
\endhead
\midrule
\multicolumn{4}{r}{{Continued on next page}} \\
\midrule
\endfoot

\bottomrule
\endlastfoot
  \ce{CH3C6H} &             CC\#CC\#CC\#C &                 12.4914 &    \citet{Remijan_2006} \\
  \ce{CH3C4H} &                 CC\#CC\#C &                 13.4771 &    \citet{MacLeod_1984} \\
  \ce{CH3C5N} &             CC\#CC\#CC\#N &                 11.9243 &    \citet{Remijan_2006} \\
  \ce{CH3C3N} &                 CC\#CC\#N &                 12.2553 &    \citet{Remijan_2006} \\
    \ce{N2H+} &                  N\#[NH+] &                 12.6990 &       \citet{Choi_2017} \\
     \ce{NH3} &                         N &                 14.6998 &    \citet{Gratier_2016} \\
   \ce{CH3OH} &                        

In [116]:
with open("molecules.bib", "w+") as write_file:
    write_file.write('\n'.join(results.values()))