# Import Required Libraries
Import necessary libraries including rdkit, pandas, tqdm, and google.colab for file upload functionality.

In [None]:
pip install rdkit-pypi

In [3]:
# Import necessary libraries
import pandas as pd
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem.FilterCatalog import FilterCatalog, FilterCatalogParams
from tqdm import tqdm
from google.colab import files
import io
# Enable tqdm pandas integration
tqdm.pandas()

# Setup File Upload
Use google.colab's file upload widget to allow users to upload their bioactivity CSV file.

In [16]:
file_path = "" #@param {type:"string"}

if file_path:
    df = pd.read_csv(file_path)
    df


# Initialize PAINS Filter
Create the PAINS filter initialization function using RDKit's FilterCatalog.

In [17]:
# Initialize PAINS filter
def initialize_pains_filter():
    params = FilterCatalogParams()
    params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)
    return FilterCatalog(params)

# Create PAINS Checking Function
Define the function to check SMILES strings against PAINS filters.

In [18]:
# Function to check PAINS status
def check_pains(smiles, catalog):
    try:
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            return "Invalid"
        return "P" if catalog.HasMatch(mol) else "NP"
    except:
        return "Error"

# Process Dataset with PAINS Analysis
Load the uploaded CSV file and apply PAINS analysis with progress bar tracking.

In [None]:
# Setup PAINS detection
catalog = initialize_pains_filter()

# Add PAINS status with progress bar
tqdm.pandas(desc="Checking PAINS")
df['PAIN_STATUS'] = df['canonical_smiles'].progress_apply(lambda x: check_pains(x, catalog))

# Print summary
print("\nPAINS Analysis Summary:")
print(df['PAIN_STATUS'].value_counts())
print("\nPercentage PAINS:", round(100 * len(df[df['PAIN_STATUS']=='P']) / len(df), 2), "%")

# Display first few PAINS compounds
print("\nExample PAINS compounds:")
print(df[df['PAIN_STATUS']=='P'][['canonical_smiles', 'PAIN_STATUS']].head())

# Save Results
Save the PAINS analysis results to a CSV file.

In [None]:
# Save Results
output_filename = 'pains_analysis_results.csv'
df.to_csv(output_filename, index=False)
print(f"Results saved to {output_filename}")