### Generate 3D Download Links Only

In [None]:
import pandas as pd
import re
import argparse
import os

parser = argparse.ArgumentParser(description="Generate Excel with PubChem 3D structure download links")
parser.add_argument("--input", default="data/example_input.xlsx", help="Input Excel/CSV file")
parser.add_argument("--output", default="data/imppat_with_3D_links.xlsx", help="Output Excel file")
parser.add_argument("--cid_column", default="PubChem ID", help="Column name containing PubChem IDs")
args = parser.parse_args()

if args.input.endswith(".xlsx"):
    df = pd.read_excel(args.input)
else:
    df = pd.read_csv(args.input)

hyperlinks = []
for _, row in df.iterrows():
    pubchem_id = str(row[args.cid_column]).strip()
    match = re.search(r"CID:(\d+)", pubchem_id)
    if match:
        cid = match.group(1)
        url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/CID/{cid}/record/SDF?record_type=3d&response_type=save&response_basename=Conformer3D_COMPOUND_CID_{cid}"
        hyperlinks.append(f'=HYPERLINK("{url}", "Download")')
    else:
        hyperlinks.append("NotFound")

df["3D Structure Link"] = hyperlinks
os.makedirs(os.path.dirname(args.output), exist_ok=True)
with pd.ExcelWriter(args.output, engine="openpyxl") as writer:
    df.to_excel(writer, index=False)

print(f"Excel file saved: {args.output}")
