In [5]:
# Import necessary libraries
import requests       # to fetch data from the web API
import pandas as pd   # to organize and store the data

# Step 1: Define a list of drug names we are interested in
drug_names = ['paracetamol', 'ibuprofen', 'celecoxib']

# Step 2: Create an empty list to collect drug information
drug_data = []

# Step 3: Loop through each drug to query ChEMBL and retrieve SMILES
for drug in drug_names:
    # Use ChEMBL API to search for the drug and get its ChEMBL ID
    url = f"https://www.ebi.ac.uk/chembl/api/data/molecule/search?q={drug}"
    response = requests.get(url, headers={"Accept": "application/json"})

    # If the search is successful (status code 200)
    if response.status_code == 200:
        results = response.json()
        # Get the first hit from the search result
        if results['molecules']:
            chembl_id = results['molecules'][0]['molecule_chembl_id']

            # Use the ChEMBL ID to get molecule details (like SMILES)
            mol_url = f"https://www.ebi.ac.uk/chembl/api/data/molecule/{chembl_id}.json"
            mol_response = requests.get(mol_url)

            if mol_response.status_code == 200:
                mol_data = mol_response.json()

                # Extract canonical SMILES string
                smiles = mol_data.get('molecule_structures', {}).get('canonical_smiles', 'NA')

                # Append results to the list
                drug_data.append({'drug': drug, 'chembl_id': chembl_id, 'smiles': smiles})
    else:
        print(f"Failed to retrieve data for {drug}")

# Step 4: Convert collected data into a pandas DataFrame
df_smiles = pd.DataFrame(drug_data)

# Step 5: Display the results
print("SMILES DataFrame:")
print(df_smiles)

# Optional: Save to CSV
df_smiles.to_csv("drug_smiles.csv", index=False)




SMILES DataFrame:
          drug  chembl_id                                             smiles
0  paracetamol  CHEMBL112                                 CC(=O)Nc1ccc(O)cc1
1    ibuprofen  CHEMBL521                         CC(C)Cc1ccc(C(C)C(=O)O)cc1
2    celecoxib  CHEMBL118  Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2...


In [None]:
# Step 1: Define UniProt IDs for COX-1 and COX-2
protein_ids = {
    "COX1": "P23219",  # PTGS1 - Cyclooxygenase-1
    "COX2": "P35354"   # PTGS2 - Cyclooxygenase-2
}

# Step 2: Create a dictionary to store protein sequences
protein_seqs = {}

# Step 3: Fetch sequences using UniProt REST API
for name, uniprot_id in protein_ids.items():
    url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.fasta"
    response = requests.get(url)

    if response.status_code == 200:
        fasta_data = response.text

        # Remove the FASTA header (starting with '>') and join the sequence lines
        sequence = ''.join(fasta_data.split('\n')[1:])
        protein_seqs[name] = sequence
    else:
        print(f"Error fetching {name} sequence.")

# Step 4: Display a preview of the sequences
for name, seq in protein_seqs.items():
    print(f"\n{name} Sequence (first 100 amino acids):\n{seq[:100]}...\n")

# Optional: Save to FASTA files
with open("COX1.fasta", "w") as f:
    f.write(f">COX1|P23219\n{protein_seqs['COX1']}")

with open("COX2.fasta", "w") as f:
    f.write(f">COX2|P35354\n{protein_seqs['COX2']}")




COX1 Sequence (first 100 amino acids):
MSRSLLLWFLLFLLLLPPLPVLLADPGAPTPVNPCCYYPCQHQGICVRFGLDRYQCDCTRTGYSGPNCTIPGLWTWLRNSLRPSPSFTHFLLTHGRWFWE...


COX2 Sequence (first 100 amino acids):
MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMS...

