# Synthesis Planning with OChem Helper

This notebook demonstrates retrosynthetic analysis and reaction prediction capabilities.

In [None]:
# Setup
import sys
sys.path.append('../src')
sys.path.append('../mcp')

import asyncio
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from rdkit import Chem
from rdkit.Chem import Draw
from IPython.display import Image, display

# Import synthesis tools
from tools.suggest_synthesis import retrosynthetic_analysis
from tools.reaction_prediction import check_feasibility

# Set up async environment
import nest_asyncio
nest_asyncio.apply()

%matplotlib inline

## 1. Retrosynthetic Analysis

Analyze a target molecule to find synthetic routes.

In [None]:
# Define target molecule (Aspirin)
target_smiles = "CC(=O)Oc1ccccc1C(=O)O"
target_name = "Aspirin (Acetylsalicylic acid)"

# Visualize target
target_mol = Chem.MolFromSmiles(target_smiles)
img = Draw.MolToImage(target_mol, size=(400, 300))
print(f"Target: {target_name}")
display(img)

In [None]:
# Perform retrosynthetic analysis
print("Performing retrosynthetic analysis...")
retro_results = await retrosynthetic_analysis(
    target_smiles=target_smiles,
    max_steps=3,
    starting_materials=None  # Let the system find routes
)

if 'error' not in retro_results:
    print(f"Found {len(retro_results.get('routes', []))} synthetic routes")
    print(f"Analysis time: {retro_results.get('analysis_time', 'N/A')}")

## 2. Visualize Synthetic Routes

Display the discovered synthetic routes with reaction steps.

In [None]:
# Analyze and display routes
if 'routes' in retro_results and retro_results['routes']:
    # Create dataframe of routes
    route_data = []
    
    for i, route in enumerate(retro_results['routes'][:5]):  # Top 5 routes
        route_data.append({
            'Route': i + 1,
            'Score': route['score'],
            'Steps': route['num_steps'],
            'Complexity': route.get('complexity', 'N/A'),
            'Key Reaction': route['steps'][0]['reaction_type'] if route['steps'] else 'N/A'
        })
    
    df_routes = pd.DataFrame(route_data)
    print("Top Synthetic Routes:")
    display(df_routes)

In [None]:
# Visualize best route in detail
if 'routes' in retro_results and retro_results['routes']:
    best_route = retro_results['routes'][0]
    
    print(f"\nBest Route Details (Score: {best_route['score']:.3f})")
    print("=" * 50)
    
    # Display each step
    for i, step in enumerate(best_route['steps']):
        print(f"\nStep {i + 1}: {step['reaction_type']}")
        print(f"Reactants: {', '.join(step['reactants'])}")
        print(f"Products: {', '.join(step['products'])}")
        print(f"Conditions: {step.get('conditions', 'Standard conditions')}")
        
        # Visualize molecules in this step
        mols = []
        labels = []
        
        # Add reactants
        for reactant in step['reactants']:
            mol = Chem.MolFromSmiles(reactant)
            if mol:
                mols.append(mol)
                labels.append("Reactant")
        
        # Add products
        for product in step['products']:
            mol = Chem.MolFromSmiles(product)
            if mol:
                mols.append(mol)
                labels.append("Product")
        
        if mols:
            img = Draw.MolsToGridImage(
                mols, 
                molsPerRow=len(mols), 
                subImgSize=(200, 200),
                legends=labels
            )
            display(img)

## 3. Reaction Feasibility Check

Check the feasibility of specific reactions in the synthesis.

In [None]:
# Check feasibility of the first reaction step
if 'routes' in retro_results and retro_results['routes']:
    first_step = retro_results['routes'][0]['steps'][0]
    
    print("Checking reaction feasibility...")
    feasibility = await check_feasibility(
        reactants=first_step['reactants'],
        products=first_step['products'],
        conditions={'temperature': 'room temperature', 'solvent': 'DCM'}
    )
    
    if 'error' not in feasibility:
        print(f"\nFeasibility Analysis:")
        print(f"Feasible: {feasibility['feasible']}")
        print(f"Confidence: {feasibility['confidence']:.2f}")
        print(f"Reaction Type: {feasibility.get('reaction_type', 'Unknown')}")
        
        if feasibility.get('issues'):
            print("\nIssues:")
            for issue in feasibility['issues']:
                print(f"  • {issue}")
        
        if feasibility.get('suggestions'):
            print("\nSuggestions:")
            for suggestion in feasibility['suggestions']:
                print(f"  • {suggestion}")

## 4. Alternative Target Analysis

Let's analyze a more complex molecule.

In [None]:
# Complex target: Acetaminophen (Paracetamol)
complex_target = "CC(=O)Nc1ccc(O)cc1"
complex_name = "Acetaminophen"

# Visualize
complex_mol = Chem.MolFromSmiles(complex_target)
img = Draw.MolToImage(complex_mol, size=(400, 300))
print(f"Target: {complex_name}")
display(img)

In [None]:
# Analyze with specific starting materials
starting_materials = ['c1ccc(O)cc1', 'CC(=O)Cl']  # Phenol and Acetyl chloride

complex_results = await retrosynthetic_analysis(
    target_smiles=complex_target,
    max_steps=3,
    starting_materials=starting_materials
)

if 'routes' in complex_results:
    print(f"Found {len(complex_results['routes'])} routes using specified starting materials")

## 5. Reaction Network Visualization

Create a network diagram of the synthetic routes.

In [None]:
def create_synthesis_network(routes, target_smiles):
    """Create a network graph of synthesis routes."""
    G = nx.DiGraph()
    
    # Add target node
    G.add_node(target_smiles, type='target', label='Target')
    
    # Add routes
    for i, route in enumerate(routes[:3]):  # Top 3 routes
        for j, step in enumerate(route['steps']):
            # Add reactant nodes
            for reactant in step['reactants']:
                if not G.has_node(reactant):
                    G.add_node(reactant, type='reactant', label=f'R{i}_{j}')
            
            # Add product nodes
            for product in step['products']:
                if not G.has_node(product):
                    G.add_node(product, type='intermediate', label=f'I{i}_{j}')
                
                # Add edges from reactants to products
                for reactant in step['reactants']:
                    G.add_edge(reactant, product, 
                             reaction=step['reaction_type'],
                             route=i)
    
    return G

# Create and visualize network
if 'routes' in retro_results and retro_results['routes']:
    G = create_synthesis_network(retro_results['routes'], target_smiles)
    
    # Set up the plot
    plt.figure(figsize=(12, 8))
    
    # Define positions
    pos = nx.spring_layout(G, k=2, iterations=50)
    
    # Draw nodes
    node_colors = {'target': 'red', 'reactant': 'green', 'intermediate': 'yellow'}
    for node_type, color in node_colors.items():
        nodes = [n for n, d in G.nodes(data=True) if d.get('type') == node_type]
        nx.draw_networkx_nodes(G, pos, nodelist=nodes, 
                             node_color=color, node_size=500, alpha=0.8)
    
    # Draw edges
    nx.draw_networkx_edges(G, pos, alpha=0.5, arrows=True, arrowsize=20)
    
    # Draw labels
    labels = nx.get_node_attributes(G, 'label')
    nx.draw_networkx_labels(G, pos, labels, font_size=8)
    
    plt.title("Synthesis Route Network")
    plt.axis('off')
    plt.tight_layout()
    plt.show()

## 6. Reaction Conditions Optimization

Analyze optimal conditions for key reactions.

In [None]:
# Test different conditions for a reaction
if 'routes' in retro_results and retro_results['routes']:
    test_step = retro_results['routes'][0]['steps'][0]
    
    # Test different conditions
    conditions_to_test = [
        {'solvent': 'DCM', 'temperature': '0°C', 'catalyst': 'None'},
        {'solvent': 'THF', 'temperature': 'RT', 'catalyst': 'DMAP'},
        {'solvent': 'DMF', 'temperature': '60°C', 'catalyst': 'K2CO3'},
        {'solvent': 'Toluene', 'temperature': 'reflux', 'catalyst': 'p-TsOH'}
    ]
    
    condition_results = []
    
    for conditions in conditions_to_test:
        result = await check_feasibility(
            reactants=test_step['reactants'],
            products=test_step['products'],
            conditions=conditions
        )
        
        if 'error' not in result:
            condition_results.append({
                'Solvent': conditions['solvent'],
                'Temperature': conditions['temperature'],
                'Catalyst': conditions['catalyst'],
                'Feasible': result['feasible'],
                'Confidence': result['confidence']
            })
    
    # Display results
    df_conditions = pd.DataFrame(condition_results)
    df_conditions = df_conditions.sort_values('Confidence', ascending=False)
    
    print("Condition Optimization Results:")
    display(df_conditions)

## 7. Starting Material Analysis

Analyze the availability and cost of starting materials.

In [None]:
# Collect all starting materials from routes
if 'routes' in retro_results:
    all_starting_materials = set()
    
    for route in retro_results['routes']:
        materials = route.get('starting_materials', [])
        all_starting_materials.update(materials)
    
    # Create analysis table
    material_data = []
    
    for material in all_starting_materials:
        mol = Chem.MolFromSmiles(material)
        if mol:
            material_data.append({
                'SMILES': material,
                'MW': Chem.Descriptors.MolWt(mol),
                'Atoms': mol.GetNumAtoms(),
                'Availability': 'Common' if mol.GetNumAtoms() < 10 else 'Check supplier'
            })
    
    df_materials = pd.DataFrame(material_data)
    df_materials = df_materials.sort_values('MW')
    
    print(f"Total unique starting materials: {len(all_starting_materials)}")
    display(df_materials)

In [None]:
# Visualize starting materials
if material_data:
    sm_mols = []
    sm_labels = []
    
    for mat in material_data[:6]:  # First 6
        mol = Chem.MolFromSmiles(mat['SMILES'])
        if mol:
            sm_mols.append(mol)
            sm_labels.append(f"MW: {mat['MW']:.1f}")
    
    if sm_mols:
        img = Draw.MolsToGridImage(
            sm_mols,
            molsPerRow=3,
            subImgSize=(250, 200),
            legends=sm_labels
        )
        print("Key Starting Materials:")
        display(img)

## 8. Export Synthesis Plan

Export the synthesis plan for laboratory implementation.

In [None]:
# Create synthesis plan document
def create_synthesis_plan(target_name, target_smiles, route):
    """Create a formatted synthesis plan."""
    plan = f"""SYNTHESIS PLAN
{'=' * 50}

Target: {target_name}
SMILES: {target_smiles}
Route Score: {route['score']:.3f}
Total Steps: {route['num_steps']}

REACTION STEPS:
{'=' * 50}
"""
    
    for i, step in enumerate(route['steps']):
        plan += f"""\nStep {i + 1}: {step['reaction_type']}
{'-' * 30}
Reactants:
"""
        for reactant in step['reactants']:
            plan += f"  - {reactant}\n"
        
        plan += f"\nProducts:\n"
        for product in step['products']:
            plan += f"  - {product}\n"
        
        plan += f"\nConditions: {step.get('conditions', 'Standard conditions')}\n"
        
        if step.get('yield'):
            plan += f"Expected Yield: {step['yield']}%\n"
    
    plan += f"""\n{'=' * 50}
STARTING MATERIALS:
{'=' * 50}
"""
    
    for material in route.get('starting_materials', []):
        plan += f"  - {material}\n"
    
    return plan

# Generate and save plan
if 'routes' in retro_results and retro_results['routes']:
    best_route = retro_results['routes'][0]
    synthesis_plan = create_synthesis_plan(target_name, target_smiles, best_route)
    
    # Save to file
    with open('synthesis_plan.txt', 'w') as f:
        f.write(synthesis_plan)
    
    print("Synthesis plan saved to synthesis_plan.txt")
    print("\nPreview:")
    print(synthesis_plan[:500] + "...")

## Summary

In this notebook, we demonstrated:

1. **Retrosynthetic Analysis**: Breaking down target molecules into simpler precursors
2. **Route Visualization**: Displaying synthetic routes with reaction steps
3. **Feasibility Checking**: Evaluating reaction feasibility and conditions
4. **Network Analysis**: Creating synthesis route networks
5. **Condition Optimization**: Testing different reaction conditions
6. **Starting Material Analysis**: Evaluating availability of precursors
7. **Plan Export**: Creating laboratory-ready synthesis plans

Key insights:
- Multiple synthetic routes often exist for a target molecule
- Route scoring considers feasibility, step count, and complexity
- Reaction conditions significantly impact feasibility
- Starting material availability is crucial for practical synthesis

Next steps:
- Validate routes experimentally
- Optimize reaction conditions in the lab
- Consider alternative routes if issues arise
- Use MCP integration for AI-assisted planning (see notebook 04)