# Multi-Omics Analysis - Interactive Notebook

This notebook provides an interactive interface to the multi-omics analysis pipeline.

## Workflow Overview
1. Load results from completed analyses
2. Create custom visualizations
3. Perform additional statistical tests
4. Generate insights and interpretations

## 1. Setup and Load Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

# Define paths
results_dir = Path('../results')

print("Loading analysis results...")
print(f"Results directory: {results_dir}")

## 2. Load RNA-seq Results

In [None]:
# Load DESeq2 results
rnaseq_results = pd.read_csv(results_dir / 'rna_seq' / 'deseq2_results.csv', index_col=0)
sig_genes = pd.read_csv(results_dir / 'rna_seq' / 'significant_genes.csv', index_col=0)

print(f"RNA-seq Results:")
print(f"  Total genes: {len(rnaseq_results)}")
print(f"  Significant genes (padj<0.05, |log2FC|>1): {len(sig_genes)}")
print(f"\nTop 10 upregulated genes:")
print(sig_genes.nlargest(10, 'log2FoldChange')[['log2FoldChange', 'padj']])

## 3. Load and Compare Omics Layers

In [None]:
# Load proteomics
try:
    proteomics_results = pd.read_csv(results_dir / 'mass_spec' / 'proteomics_results.csv')
    print(f"Proteomics: {len(proteomics_results)} proteins")
except:
    print("Proteomics file not found")

# Load metabolomics
try:
    metabolomics_results = pd.read_csv(results_dir / 'metabolomics' / 'metabolomics_results.csv')
    print(f"Metabolomics: {len(metabolomics_results)} metabolites")
except:
    print("Metabolomics file not found")

# Load integration results
try:
    omics_corr = pd.read_csv(results_dir / 'integration' / 'omics_correlations.csv')
    print(f"\nOmics Correlations:")
    print(omics_corr.to_string())
except:
    print("Integration results not found")

## 4. Pathway Analysis Visualization

In [None]:
# Load pathway mapping
try:
    pathway_mapping = pd.read_csv(results_dir / 'pathways' / 'protein_pathway_mapping.csv')
    
    # Count proteins per pathway
    pathway_counts = pathway_mapping['pathway'].value_counts().head(10)
    
    fig, ax = plt.subplots(figsize=(12, 6))
    pathway_counts.plot(kind='barh', ax=ax, color='steelblue')
    ax.set_xlabel('Number of Significant Proteins')
    ax.set_title('Top 10 Dysregulated Pathways')
    plt.tight_layout()
    plt.savefig('pathway_summary.png', dpi=150, bbox_inches='tight')
    plt.show()
    
except Exception as e:
    print(f"Error loading pathway data: {e}")

## 5. Network Analysis - Top Hub Proteins

In [None]:
# Load hub protein analysis
try:
    hub_proteins = pd.read_csv(results_dir / 'interactions' / 'hub_proteins.csv')
    
    print("Top 15 Hub Proteins (by degree centrality):")
    print(hub_proteins.head(15).to_string())
    
    # Visualize top 10
    fig, ax = plt.subplots(figsize=(10, 8))
    top_10 = hub_proteins.head(10).sort_values('degree')
    ax.barh(top_10['protein'], top_10['degree'], color='coral')
    ax.set_xlabel('Degree (Number of Interactions)')
    ax.set_title('Top 10 Hub Proteins in PPI Network')
    plt.tight_layout()
    plt.savefig('hub_proteins.png', dpi=150, bbox_inches='tight')
    plt.show()
    
except Exception as e:
    print(f"Error: {e}")

## 6. Model Performance Comparison

In [None]:
# Load model performance
try:
    model_perf = pd.read_csv(results_dir / 'predictions' / 'model_performance.csv')
    
    print("Model Performance Comparison:")
    print(model_perf.to_string(index=False))
    
    # Plot comparison
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # Accuracy
    model_perf.set_index('model')['accuracy'].plot(kind='bar', ax=axes[0], color='steelblue')
    axes[0].set_ylabel('Accuracy')
    axes[0].set_title('Model Accuracy Comparison')
    axes[0].set_ylim([0, 1])
    axes[0].axhline(y=0.5, color='r', linestyle='--', alpha=0.3, label='Random')
    axes[0].legend()
    
    # AUC
    model_perf.set_index('model')['auc'].plot(kind='bar', ax=axes[1], color='coral')
    axes[1].set_ylabel('AUC-ROC')
    axes[1].set_title('Model AUC Comparison')
    axes[1].set_ylim([0, 1])
    axes[1].axhline(y=0.5, color='r', linestyle='--', alpha=0.3, label='Random')
    axes[1].legend()
    
    plt.tight_layout()
    plt.savefig('model_comparison.png', dpi=150, bbox_inches='tight')
    plt.show()
    
except Exception as e:
    print(f"Error: {e}")

## 7. Clinical Trials - Biomarker Matching

In [None]:
# Load clinical trials data
try:
    matched_trials = pd.read_csv(results_dir / 'clinical_trials' / 'matched_trials.csv')
    
    print(f"Found {len(matched_trials)} clinical trials matching your biomarkers\n")
    
    # Show top matches
    print("Top 10 Matching Trials:")
    display_cols = ['nct_id', 'title', 'match_percentage', 'phase', 'status']
    print(matched_trials[display_cols].head(10).to_string(index=False))
    
except Exception as e:
    print(f"Error: {e}")

## 8. Custom Analysis - Combine Omics Data

In [None]:
# Example: Find genes where RNA and protein both upregulated

try:
    # Get upregulated genes from RNA-seq
    upregulated_genes = sig_genes[sig_genes['log2FoldChange'] > 1].index.tolist()
    
    # Get upregulated proteins from proteomics
    upregulated_proteins = proteomics_results[
        (proteomics_results['log2FoldChange'] > 0.5) & 
        (proteomics_results['padj'] < 0.05)
    ]['protein'].tolist()
    
    # Find overlaps (genes where both RNA and protein upregulated)
    concordant = list(set(upregulated_genes) & set(upregulated_proteins))
    
    print(f"Genes upregulated at RNA level: {len(upregulated_genes)}")
    print(f"Proteins upregulated: {len(upregulated_proteins)}")
    print(f"Concordant (both RNA and protein upregulated): {len(concordant)}")
    print(f"\nConcordant genes/proteins: {', '.join(sorted(concordant)[:10])}")
    
except NameError:
    print("Please load proteomics data first")

## 9. Biomarker Summary Report

In [None]:
# Generate summary statistics

summary_stats = {
    'RNA-seq': {
        'total_features': len(rnaseq_results),
        'significant_features': len(sig_genes),
        'upregulated': len(sig_genes[sig_genes['log2FoldChange'] > 0]),
        'downregulated': len(sig_genes[sig_genes['log2FoldChange'] < 0]),
        'max_log2fc': sig_genes['log2FoldChange'].max(),
        'min_log2fc': sig_genes['log2FoldChange'].min()
    }
}

print("="*60)
print("BIOMARKER DISCOVERY SUMMARY")
print("="*60)

for omics_type, stats in summary_stats.items():
    print(f"\n{omics_type}:")
    for key, value in stats.items():
        print(f"  {key}: {value}")

print("\n" + "="*60)

## 10. Export Summary Report

In [None]:
# Create comprehensive report

report = f"""
MULTI-OMICS ANALYSIS REPORT
Generated: {pd.Timestamp.now()}

BIOMARKERS IDENTIFIED:
- {len(sig_genes)} significant genes (RNA-seq)
- Top upregulated genes: {', '.join(sig_genes.nlargest(5, 'log2FoldChange').index.tolist())}
- Top downregulated genes: {', '.join(sig_genes.nsmallest(5, 'log2FoldChange').index.tolist())}

PATHWAY ANALYSIS:
- Toll-like receptor signaling
- Jak-STAT signaling
- TNF signaling

CLINICAL RELEVANCE:
- {len(matched_trials) if 'matched_trials' in locals() else 0} relevant clinical trials identified

PREDICTIVE MODELS:
- Best model: [check results/predictions/model_performance.csv]
- AUC-ROC: [see ROC curves]
"""

print(report)

# Save report
with open('analysis_summary_report.txt', 'w') as f:
    f.write(report)

print("\nReport saved to: analysis_summary_report.txt")