In [1]:
import pandas as pd
import numpy as np
from chembl_webresource_client.new_client import new_client
from skimpy import skim
import requests

In [2]:
target = new_client.target
target_query = target.search('Neuraminidase')
targets = pd.DataFrame.from_dict(target_query)

In [3]:
dataframes = []

In [4]:
for i in range(len(targets)):
    target_chembl_id = targets.target_chembl_id.iloc[i]
    target_name = targets.target_chembl_id.iloc[i]
    
    activity = new_client.activity
    activity_query = activity.filter(target_chembl_id=target_chembl_id).filter(standard_type="IC50")
    
    df = pd.DataFrame.from_dict(activity_query)
    df['target'] = target_name
    
    dataframes.append(df)

In [5]:
df = pd.concat(dataframes[:40], ignore_index=True)
df.to_csv('Data1.csv')

In [6]:
skim(df)

In [7]:
def get_uniprot_sequence(uniprot_id):
    try:
        uniprot_url = f'https://www.uniprot.org/uniprot/{uniprot_id}.fasta'
        response = requests.get(uniprot_url)
        response.raise_for_status()
        # Parse the FASTA format to extract the sequence
        sequence = '\n'.join(response.text.split('\n')[1:])
        return sequence
    except requests.exceptions.HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
    except Exception as err:
        print(f'An error occurred: {err}')

# Example usage
uniprot_id = 'P03468'  # Replace with the actual UniProt ID
sequence = get_uniprot_sequence(uniprot_id)
print(f'Genetic Sequence from UniProt: {sequence}')

Genetic Sequence from UniProt: MNPNQKIITIGSICLVVGLISLILQIGNIISIWISHSIQTGSQNHTGICNQNIITYKNST
WVKDTTSVILTGNSSLCPIRGWAIYSKDNSIRIGSKGDVFVIREPFISCSHLECRTFFLT
QGALLNDKHSNGTVKDRSPYRALMSCPVGEAPSPYNSRFESVAWSASACHDGMGWLTIGI
SGPDNGAVAVLKYNGIITETIKSWRKKILRTQESECACVNGSCFTIMTDGPSDGLASYKI
FKIEKGKVTKSIELNAPNSHYEECSCYPDTGKVMCVCRDNWHGSNRPWVSFDQNLDYQIG
YICSGVFGDNPRPEDGTGSCGPVYVDGANGVKGFSYRYGNGVWIGRTKSHSSRHGFEMIW
DPNGWTETDSKFSVRQDVVAMTDWSGYSGSFVQHPELTGLDCMRPCFWVELIRGRPKEKT
IWTSASSISFCGVNSDTVDWSWPDGAELPFSIDK



In [8]:
from rdkit import Chem
from rdkit.Chem import AllChem

def smiles_to_pdb(smiles, output_pdb):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        mol = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol, randomSeed=42)
        Chem.MolToPDBFile(mol, output_pdb)
    else:
        print("Invalid SMILES.")

if __name__ == "__main__":
    input_smiles = "CCC(CC)O[C@@H]1C=C(C(=O)O)C[C@H](N)[C@H]1NC(C)=O"  # Replace with your SMILES string
    output_pdb = "output1.pdb"  # Specify the output PDB file name
    
    smiles_to_pdb(input_smiles, output_pdb)