# The code here translates CIDs to SMILES
The first cell contains the necessary function, eveything below is just
application.

In [1]:
import requests
import pickle

In [2]:
def CID_to_SMILES(CID):
    '''
    This searches for a CTL material's CID in PubChem.
    Argument: industry_name (str) - the name of the material
    Value: CID (int) - the CID of the material
    '''
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{CID}/property/CanonicalSMILES/JSON"
    
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        data = data['PropertyTable']['Properties'][0]['CanonicalSMILES']
        return data
    else:
        print('debug: SMILES could not be retrieved')
        raise Exception(f"Error: Could not retrieve SMILES from this CID. Status code: {response.status_code}")
        return None

In [3]:
# load CID dictionary
with open('pickles/dict_all_cells_v2.pkl', 'rb') as f:
    name_to_CID_dict = pickle.load(f)
    f.close()

#initialize new dictionary where the SMILES will be written
name_to_SMILES_dict = {}

# populate the SMILES dictionary
for index, name in enumerate(name_to_CID_dict):
    print(f"{index+1}/2559. Next up: {name}")
    CID = name_to_CID_dict[name]
    if CID is not None:
        SMILES = CID_to_SMILES(CID)
        name_to_SMILES_dict[name] = SMILES
    else:
        name_to_SMILES_dict[name] = None

1/2559. Next up: TiO2-c
2/2559. Next up: Nb2O5-c
3/2559. Next up: BTTI-C8
4/2559. Next up: TT-3,6-TPA
5/2559. Next up: TTz-1
6/2559. Next up: DTS(IIThThHEX)2
7/2559. Next up: Graphene-np
8/2559. Next up: Isopropanol-hydroquinolatolithium
9/2559. Next up: OMeTPA-TPA
10/2559. Next up: Polystyrene
11/2559. Next up: LiNiO-c
12/2559. Next up: PDBT-T1
13/2559. Next up: CuO
14/2559. Next up: 4PA-SAM
15/2559. Next up: CuS
16/2559. Next up: Z25
17/2559. Next up: PTO
18/2559. Next up: CuPcNO2-OMFPh
19/2559. Next up: Ph-TPA-4A
20/2559. Next up: [BMMIm]Cl
21/2559. Next up: CuInS2-QDs
22/2559. Next up: BZnTPP
23/2559. Next up: Perylene
24/2559. Next up: CuPc(tBu)4
25/2559. Next up: Zn(acac)2
26/2559. Next up: pm-TPE-4DPA
27/2559. Next up: Black P-QDs
28/2559. Next up: Cysteine
29/2559. Next up: 3,6-Ben
30/2559. Next up: CrO3
31/2559. Next up: Sb2S3
32/2559. Next up: V1036
33/2559. Next up: PNDI-2T
34/2559. Next up: T5H-OMeDPA
35/2559. Next up: H-3,4
36/2559. Next up: PBDTTT-E-T
37/2559. Next up: PB

Exception: Error: Could not retrieve SMILES from this CID. Status code: 503

In [5]:
# pickle the SMILES dictionary
with open('SMILES_dict.pkl', 'wb') as f:
    pickle.dump(name_to_SMILES_dict, f)

In [3]:
# counting how many entries in name_to_SMILES_dict are not None
count = sum(value is not None for value in name_to_SMILES_dict.values())
print(count)

0
