# ChemAI Discovery - Demo Notebook

Welcome to ChemAI Discovery! This notebook demonstrates the platform's capabilities for molecular analysis and generation.

## Features Demonstrated
- Molecular property prediction
- Novel molecule generation
- 3D visualization
- Structure-activity relationships

In [None]:
# Import required libraries
import requests
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from IPython.display import display, HTML
import json

## Setup

Make sure the ChemAI Discovery platform is running on localhost:8000

In [None]:
# Platform configuration
BASE_URL = 'http://localhost:8000/api/v2'

# Test connection
try:
    response = requests.get('http://localhost:8000/health')
    print('✅ Platform is running!')
    print(f'Status: {response.json()["status"]}'
    print(f'Version: {response.json()["version"]}'
except Exception as e:
    print(f'❌ Platform not accessible: {e}')
    print('Please start the platform with: python src/main.py')

## 1. Molecular Property Prediction

Let's analyze some well-known pharmaceutical compounds

In [None]:
# Sample pharmaceutical compounds
compounds = {
    'Aspirin': 'CC(=O)Oc1ccccc1C(=O)O',
    'Caffeine': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',
    'Ibuprofen': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1',
    'Paracetamol': 'CC(=O)Nc1ccc(O)cc1',
    'Penicillin': 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)Cc3ccccc3)C(=O)O)C'
}

# Analyze each compound
results = []

for name, smiles in compounds.items():
    print(f'Analyzing {name}...')
    
    response = requests.post(f'{BASE_URL}/analyze-molecule', 
                           json={'smiles': smiles})
    
    if response.status_code == 200:
        data = response.json()
        results.append({
            'name': name,
            'smiles': smiles,
            'solubility': data['predictions']['solubility']['value'],
            'toxicity': data['predictions']['toxicity']['value'],
            'bioavailability': data['predictions']['bioavailability']['value'],
            'drug_likeness': data['predictions']['drug_likeness']['value'],
            'confidence': data['overall_confidence']
        })
        print(f'  ✅ Success (Confidence: {data["overall_confidence"]:.1%})')
    else:
        print(f'  ❌ Error: {response.status_code}')

# Create DataFrame
df = pd.DataFrame(results)
display(df)

In [None]:
# Visualize property comparison
fig = go.Figure()

properties = ['solubility', 'toxicity', 'bioavailability', 'drug_likeness']

for prop in properties:
    fig.add_trace(go.Bar(
        name=prop.replace('_', ' ').title(),
        x=df['name'],
        y=df[prop]
    ))

fig.update_layout(
    title='Pharmaceutical Compound Properties',
    barmode='group',
    height=500
)

fig.show()

## 2. Novel Molecule Generation

Generate new molecules with specific target properties

In [None]:
# Define target properties for drug-like molecules
target_properties = {
    'solubility': -2.0,      # Good solubility
    'bioavailability': 75.0,  # High bioavailability
    'drug_likeness': 0.8,     # Very drug-like
    'toxicity': 0.2           # Low toxicity
}

print('🧪 Generating novel drug-like molecules...')
print(f'Target properties: {target_properties}')

response = requests.post(f'{BASE_URL}/generate-molecules', 
                       json={
                           'target_properties': target_properties,
                           'count': 10
                       })

if response.status_code == 200:
    generation_data = response.json()
    molecules = generation_data['molecules']
    stats = generation_data['statistics']
    
    print(f'✅ Generated {len(molecules)} molecules')
    print(f'Average novelty: {stats["average_novelty"]:.1%}')
    print(f'Average validity: {stats["average_validity"]:.1%}')
    print(f'Generation time: {stats["generation_time"]:.2f}s')
else:
    print(f'❌ Generation failed: {response.status_code}')

In [None]:
# Display generated molecules
if 'molecules' in locals():
    gen_df = pd.DataFrame([
        {
            'Name': mol['name'],
            'SMILES': mol['smiles'],
            'Novelty': f"{mol['novelty_score']:.1%}",
            'Validity': f"{mol['validity_score']:.1%}",
            'Confidence': f"{mol['confidence']:.1%}"
        }
        for mol in molecules[:5]  # Show first 5
    ])
    
    display(gen_df)

In [None]:
# Visualize generation results
if 'molecules' in locals():
    novelty_scores = [mol['novelty_score'] for mol in molecules]
    validity_scores = [mol['validity_score'] for mol in molecules]
    confidence_scores = [mol['confidence'] for mol in molecules]
    
    fig = go.Figure(data=go.Scatter(
        x=novelty_scores,
        y=validity_scores,
        mode='markers',
        marker=dict(
            size=12,
            color=confidence_scores,
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title='Confidence')
        ),
        text=[mol['name'] for mol in molecules],
        hovertemplate='<b>%{text}</b><br>Novelty: %{x:.1%}<br>Validity: %{y:.1%}<extra></extra>'
    ))
    
    fig.update_layout(
        title='Generated Molecules: Novelty vs Validity',
        xaxis_title='Novelty Score',
        yaxis_title='Validity Score',
        height=500
    )
    
    fig.show()

## 3. Structure-Activity Relationship Analysis

Analyze how molecular structure affects properties

In [None]:
# Create a series of related molecules (alcohol series)
alcohol_series = {
    'Methanol': 'CO',
    'Ethanol': 'CCO',
    'Propanol': 'CCCO',
    'Butanol': 'CCCCO',
    'Pentanol': 'CCCCCO',
    'Hexanol': 'CCCCCCO'
}

print('🔬 Analyzing alcohol series for SAR trends...')

sar_results = []

for name, smiles in alcohol_series.items():
    response = requests.post(f'{BASE_URL}/analyze-molecule', 
                           json={'smiles': smiles})
    
    if response.status_code == 200:
        data = response.json()
        sar_results.append({
            'name': name,
            'carbon_count': len([c for c in smiles if c == 'C']),
            'solubility': data['predictions']['solubility']['value'],
            'bioavailability': data['predictions']['bioavailability']['value']
        })

sar_df = pd.DataFrame(sar_results)
display(sar_df)

In [None]:
# Plot SAR trends
fig = go.Figure()

# Solubility trend
fig.add_trace(go.Scatter(
    x=sar_df['carbon_count'],
    y=sar_df['solubility'],
    mode='lines+markers',
    name='Solubility (LogS)',
    line=dict(color='blue', width=3),
    marker=dict(size=8)
))

# Bioavailability trend
fig.add_trace(go.Scatter(
    x=sar_df['carbon_count'],
    y=sar_df['bioavailability'],
    mode='lines+markers',
    name='Bioavailability (%)',
    yaxis='y2',
    line=dict(color='red', width=3),
    marker=dict(size=8)
))

fig.update_layout(
    title='Structure-Activity Relationship: Alcohol Series',
    xaxis_title='Carbon Chain Length',
    yaxis_title='Solubility (LogS)',
    yaxis2=dict(
        title='Bioavailability (%)',
        overlaying='y',
        side='right'
    ),
    height=500
)

fig.show()

## 4. Platform Statistics

Check platform performance and usage statistics

In [None]:
# Get platform statistics
response = requests.get(f'{BASE_URL}/stats')

if response.status_code == 200:
    stats_data = response.json()
    
    print('📊 Platform Statistics')
    print('=' * 30)
    
    platform_stats = stats_data['platform_stats']
    print(f'Total Analyses: {platform_stats["total_analyses"]:,}')
    print(f'Molecules Generated: {platform_stats["molecules_generated"]:,}')
    print(f'Average Accuracy: {platform_stats["average_accuracy"]}%')
    
    model_perf = stats_data['model_performance']
    print(f'\nAI Model Performance:')
    print(f'- Molecular AI: {"✅" if model_perf["molecular_ai"]["initialized"] else "❌"} Initialized')
    print(f'- Total Predictions: {model_perf["molecular_ai"]["total_predictions"]}')
    
    system_info = stats_data['system_info']
    print(f'\nSystem Information:')
    print(f'- Version: {system_info["version"]}')
    print(f'- GPU Enabled: {system_info["gpu_enabled"]}')
    print(f'- Max Workers: {system_info["max_workers"]}')
else:
    print(f'❌ Failed to get statistics: {response.status_code}')

## Conclusion

This notebook demonstrated the key capabilities of ChemAI Discovery:

1. **Molecular Analysis**: Accurate prediction of key pharmaceutical properties
2. **Novel Generation**: AI-powered creation of new molecules with target properties
3. **SAR Analysis**: Understanding structure-activity relationships
4. **Performance Monitoring**: Real-time platform statistics

### Next Steps

- Explore the web interface at http://localhost:8000
- Try the API endpoints directly
- Integrate ChemAI Discovery into your drug discovery workflow
- Contribute to the open-source project

**Happy Drug Discovery! 🧬💊**