# Tutorial 3: CO2RR Multi-Pathway Analysis

## Comparing Catalyst Performance Across Different CO2 Reduction Pathways

This tutorial demonstrates how to analyze catalysts for the CO2 Reduction Reaction (CO2RR) across multiple reaction pathways:

1. **CO pathway** - Direct CO production
2. **CHO pathway** - Formyl intermediate route
3. **COCOH pathway** - Coupling intermediate route

You will learn:
- How to analyze multiple reaction pathways
- Cross-pathway comparison techniques
- Identifying versatile catalysts that perform well across pathways

---

## 1. Setup and Configuration

In [None]:
import sys
sys.path.insert(0, '..')

from ascicat import ASCICalculator
from ascicat.visualizer import Visualizer
from ascicat.analyzer import Analyzer
from ascicat.config import REACTION_CONFIGS

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.dpi'] = 100

print("Setup complete!")

### Understanding CO2RR Pathways

Each pathway has a different optimal adsorption energy (Sabatier optimum):

In [None]:
# Display pathway configurations
pathways = ['CO2RR-CO', 'CO2RR-CHO', 'CO2RR-COCOH']

print("CO2RR Pathway Configurations")
print("="*60)
print(f"{'Pathway':<15} {'Optimal E (eV)':<15} {'Width (eV)':<12} {'Product'}")
print("-"*60)

products = {'CO2RR-CO': 'Carbon Monoxide', 
            'CO2RR-CHO': 'Formaldehyde/Methanol', 
            'CO2RR-COCOH': 'C2+ Products'}

for pathway in pathways:
    config = REACTION_CONFIGS[pathway]
    print(f"{pathway:<15} {config.optimal_energy:<15.2f} {config.activity_width:<12.2f} {products[pathway]}")

## 2. Load and Analyze Each Pathway

In [None]:
# Define pathway data files
pathway_files = {
    'CO2RR-CO': '../data/CO2RR_CO_clean.csv',
    'CO2RR-CHO': '../data/CO2RR_CHO_clean.csv',
    'CO2RR-COCOH': '../data/CO2RR_COCOH_clean.csv'
}

# Store calculators and results
calculators = {}
results = {}

# Analyze each pathway
for pathway, data_file in pathway_files.items():
    print(f"\nProcessing {pathway}...")
    print("-"*40)
    
    # Initialize calculator
    calc = ASCICalculator(reaction='CO2RR', pathway=pathway, scoring_method='linear')
    
    # Load data
    calc.load_data(data_file)
    print(f"  Loaded {len(calc.data)} catalysts")
    
    # Calculate ASCI (equal weights)
    result = calc.calculate_asci(w_a=0.33, w_s=0.33, w_c=0.34)
    print(f"  ASCI calculated (mean: {result['ASCI'].mean():.3f}, max: {result['ASCI'].max():.3f})")
    
    # Store
    calculators[pathway] = calc
    results[pathway] = result

print("\n" + "="*50)
print("All pathways analyzed successfully!")

## 3. Top Catalysts Per Pathway

In [None]:
# Display top 10 for each pathway
for pathway in pathways:
    print(f"\nTop 10 Catalysts: {pathway}")
    print("="*70)
    
    top_10 = calculators[pathway].get_top_catalysts(n=10)
    
    display_cols = ['symbol', 'DFT_ads_E', 'activity_score', 'stability_score', 'cost_score', 'ASCI']
    available = [c for c in display_cols if c in top_10.columns]
    
    print(top_10[available].round(3).to_string())
    print()

## 4. Cross-Pathway Comparison Visualization

In [None]:
# Create comparison plots
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

colors = {'CO2RR-CO': '#3498db', 'CO2RR-CHO': '#2ecc71', 'CO2RR-COCOH': '#e74c3c'}

# Panel A: ASCI Score Distribution
for pathway in pathways:
    axes[0, 0].hist(results[pathway]['ASCI'], bins=30, alpha=0.5, 
                    label=pathway, color=colors[pathway], edgecolor='white')
axes[0, 0].set_xlabel('ASCI Score', fontsize=12)
axes[0, 0].set_ylabel('Count', fontsize=12)
axes[0, 0].set_title('A. ASCI Distribution by Pathway', fontsize=13, fontweight='bold')
axes[0, 0].legend()

# Panel B: Adsorption Energy Distribution with Optima
for pathway in pathways:
    axes[0, 1].hist(results[pathway]['DFT_ads_E'], bins=30, alpha=0.4, 
                    label=pathway, color=colors[pathway], edgecolor='white')
    opt_e = REACTION_CONFIGS[pathway].optimal_energy
    axes[0, 1].axvline(opt_e, color=colors[pathway], linestyle='--', linewidth=2)
    
axes[0, 1].set_xlabel('Adsorption Energy (eV)', fontsize=12)
axes[0, 1].set_ylabel('Count', fontsize=12)
axes[0, 1].set_title('B. Adsorption Energy Distribution (dashed = optimum)', fontsize=13, fontweight='bold')
axes[0, 1].legend()

# Panel C: Activity vs Cost Score
for pathway in pathways:
    axes[1, 0].scatter(results[pathway]['activity_score'], 
                       results[pathway]['cost_score'],
                       alpha=0.4, s=20, label=pathway, color=colors[pathway])
axes[1, 0].set_xlabel('Activity Score', fontsize=12)
axes[1, 0].set_ylabel('Cost Score', fontsize=12)
axes[1, 0].set_title('C. Activity vs Cost by Pathway', fontsize=13, fontweight='bold')
axes[1, 0].legend()

# Panel D: Box plot of ASCI scores
box_data = [results[p]['ASCI'] for p in pathways]
bp = axes[1, 1].boxplot(box_data, labels=[p.split('-')[1] for p in pathways], patch_artist=True)
for patch, pathway in zip(bp['boxes'], pathways):
    patch.set_facecolor(colors[pathway])
    patch.set_alpha(0.6)
axes[1, 1].set_xlabel('Pathway', fontsize=12)
axes[1, 1].set_ylabel('ASCI Score', fontsize=12)
axes[1, 1].set_title('D. ASCI Score Distribution', fontsize=13, fontweight='bold')

plt.tight_layout()
plt.show()

## 5. Identifying Versatile Catalysts

Find catalysts that perform well across multiple pathways - these are particularly valuable for practical applications.

In [None]:
# Get top 50 from each pathway
top_sets = {}
for pathway in pathways:
    top_50 = calculators[pathway].get_top_catalysts(n=50)
    if 'symbol' in top_50.columns:
        top_sets[pathway] = set(top_50['symbol'].tolist())
    else:
        top_sets[pathway] = set(top_50.index.tolist())

# Find catalysts in top 50 across all pathways
versatile_all = top_sets['CO2RR-CO'] & top_sets['CO2RR-CHO'] & top_sets['CO2RR-COCOH']

# Find catalysts in top 50 of at least 2 pathways
versatile_two = (
    (top_sets['CO2RR-CO'] & top_sets['CO2RR-CHO']) |
    (top_sets['CO2RR-CO'] & top_sets['CO2RR-COCOH']) |
    (top_sets['CO2RR-CHO'] & top_sets['CO2RR-COCOH'])
)

print("Versatile Catalyst Analysis")
print("="*60)
print(f"\nCatalysts in top 50 of ALL three pathways: {len(versatile_all)}")
if versatile_all:
    print(f"  {sorted(versatile_all)}")

print(f"\nCatalysts in top 50 of AT LEAST two pathways: {len(versatile_two)}")
if versatile_two:
    print(f"  {sorted(versatile_two)[:20]}...")  # Show first 20

In [None]:
# Create a comprehensive ranking table for versatile catalysts
if versatile_two:
    versatile_rankings = []
    
    for catalyst in versatile_two:
        row = {'catalyst': catalyst}
        total_rank = 0
        appearances = 0
        
        for pathway in pathways:
            df = results[pathway]
            if 'symbol' in df.columns:
                match = df[df['symbol'] == catalyst]
            else:
                match = df.loc[[catalyst]] if catalyst in df.index else pd.DataFrame()
            
            if not match.empty:
                rank = match['rank'].values[0]
                asci = match['ASCI'].values[0]
                row[f'{pathway.split("-")[1]}_rank'] = int(rank)
                row[f'{pathway.split("-")[1]}_ASCI'] = round(asci, 3)
                total_rank += rank
                appearances += 1
            else:
                row[f'{pathway.split("-")[1]}_rank'] = '-'
                row[f'{pathway.split("-")[1]}_ASCI'] = '-'
        
        row['avg_rank'] = round(total_rank / appearances, 1) if appearances > 0 else np.nan
        versatile_rankings.append(row)
    
    versatile_df = pd.DataFrame(versatile_rankings)
    versatile_df = versatile_df.sort_values('avg_rank')
    
    print("\nTop 15 Versatile Catalysts (sorted by average rank)")
    print("="*80)
    print(versatile_df.head(15).to_string(index=False))

## 6. Pathway-Specific Analysis: ASCI vs Adsorption Energy

In [None]:
# Create volcano-style plots for each pathway
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for ax, pathway in zip(axes, pathways):
    df = results[pathway]
    config = REACTION_CONFIGS[pathway]
    
    # Scatter plot
    scatter = ax.scatter(df['DFT_ads_E'], df['ASCI'], 
                         c=df['ASCI'], cmap='viridis', alpha=0.6, s=30)
    
    # Mark optimal energy
    ax.axvline(config.optimal_energy, color='red', linestyle='--', 
               linewidth=2, label=f'Optimum: {config.optimal_energy:.2f} eV')
    
    # Highlight top 10
    top_10 = df.nsmallest(10, 'rank')
    ax.scatter(top_10['DFT_ads_E'], top_10['ASCI'], 
               c='red', s=100, marker='*', edgecolors='black', 
               linewidths=0.5, label='Top 10', zorder=5)
    
    ax.set_xlabel('Adsorption Energy (eV)', fontsize=12)
    ax.set_ylabel('ASCI Score', fontsize=12)
    ax.set_title(f'{pathway}', fontsize=13, fontweight='bold')
    ax.legend(loc='lower right')
    
    # Add colorbar
    plt.colorbar(scatter, ax=ax, label='ASCI')

plt.tight_layout()
plt.show()

## 7. Statistical Summary

In [None]:
# Create summary statistics table
summary_data = []

for pathway in pathways:
    df = results[pathway]
    calc = calculators[pathway]
    
    # Create Analyzer instance for Pareto analysis
    analyzer = Analyzer(df, calc.config)
    pareto = analyzer.get_pareto_optimal()
    
    summary_data.append({
        'Pathway': pathway,
        'N Catalysts': len(df),
        'Optimal E (eV)': REACTION_CONFIGS[pathway].optimal_energy,
        'Mean ASCI': df['ASCI'].mean(),
        'Max ASCI': df['ASCI'].max(),
        'Std ASCI': df['ASCI'].std(),
        'N Pareto': len(pareto)
    })

summary_df = pd.DataFrame(summary_data)

print("\nPathway Summary Statistics")
print("="*80)
print(summary_df.round(3).to_string(index=False))

## 8. Generate Figures

In [None]:
# Generate figures for each pathway
output_base = '../results/tutorial_CO2RR_figures'

for pathway in pathways:
    output_dir = f'{output_base}/{pathway}'
    os.makedirs(output_dir, exist_ok=True)
    
    viz = Visualizer(results[pathway], REACTION_CONFIGS[pathway])
    viz.generate_publication_figures(output_dir)
    
    print(f"Generated figures for {pathway}: {output_dir}")

print(f"\nAll figures saved to: {output_base}")

## 9. Export Results

In [None]:
# Export all pathway results
results_dir = '../results/tutorial_CO2RR_results'
os.makedirs(results_dir, exist_ok=True)

for pathway in pathways:
    pathway_short = pathway.replace('CO2RR-', '')
    
    # Full results
    results[pathway].to_csv(f'{results_dir}/{pathway_short}_full_results.csv', index=False)
    
    # Top 20
    top_20 = calculators[pathway].get_top_catalysts(n=20)
    top_20.to_csv(f'{results_dir}/{pathway_short}_top20.csv', index=False)

# Save versatile catalyst rankings
if 'versatile_df' in dir():
    versatile_df.to_csv(f'{results_dir}/versatile_catalysts.csv', index=False)

# Save summary
summary_df.to_csv(f'{results_dir}/pathway_summary.csv', index=False)

print(f"Results exported to: {results_dir}")
print("\nFiles created:")
for f in os.listdir(results_dir):
    print(f"  - {f}")

## 10. Summary

In this tutorial, we performed a comprehensive CO2RR multi-pathway analysis:

1. **Analyzed three pathways**: CO, CHO, and COCOH with different Sabatier optima
2. **Compared catalyst performance** across pathways using visualizations
3. **Identified versatile catalysts** that excel across multiple pathways
4. **Generated pathway-specific figures** for each reaction route
5. **Created comprehensive rankings** for each reaction product

### Key Insights
- Each pathway has unique optimal catalysts
- Versatile catalysts are valuable for multi-product processes
- ASCI enables direct comparison across different reactions

---

**Next:** Tutorial 4 - Advanced Visualization Guide