# AlphaFold model

In [None]:
# AlphaFold Database API - Protein Structure Analysis
# Using EBI AlphaFold Database for Human ACE2 (SARS-CoV-2 receptor)

import requests
import numpy as np
from Bio.PDB import PDBParser
from Bio.PDB.Polypeptide import PPBuilder
import json
import tempfile
import os

# ============================================================================
# AlphaFold Database Configuration
# ============================================================================

ALPHAFOLD_API_BASE = "https://alphafold.ebi.ac.uk/api"
ALPHAFOLD_FILES_BASE = "https://alphafold.ebi.ac.uk/files"

# Human ACE2 Receptor UniProt ID (SARS-CoV-2 binding target)
ACE2_UNIPROT_ID = "Q9BYF1"  # Human ACE2

# ============================================================================
# Target Regions (ACE2 Receptor)
# ============================================================================

# ACE2 is 805 residues total
# N-terminal peptidase domain (residues 1-615) binds SARS-CoV-2 spike
# C-terminal collectrin domain (residues 616-805) is more flexible

HIGH_CONFIDENCE_TARGET = {
    'name': 'Human ACE2 Peptidase Domain (Structured)',
    'uniprot_id': 'Q9BYF1',
    'start_residue': 19,   # Start of structured domain
    'end_residue': 120,    # Well-structured alpha-helical region
    'region': 'N-terminal peptidase domain (alpha-helices)',
    'expected_plddt': '>85',  # AlphaFold scores high for structured domains
    'why_high_confidence': [
        '‚úÖ Well-defined alpha-helical structure',
        '‚úÖ Stable catalytic domain',
        '‚úÖ Low intrinsic disorder',
        '‚úÖ Multiple experimental structures available',
        '‚úÖ High sequence conservation across species',
        '‚úÖ Compact globular fold',
        '‚úÖ Critical for enzymatic activity'
    ]
}

LOW_CONFIDENCE_TARGET = {
    'name': 'Human ACE2 Collectrin Domain Linker (Flexible)',
    'uniprot_id': 'Q9BYF1',
    'start_residue': 605,  # Linker region before collectrin domain
    'end_residue': 625,    # Transition to collectrin domain
    'region': 'Peptidase-Collectrin linker (flexible hinge)',
    'expected_plddt': '<70',  # Flexible linker regions have lower confidence
    'why_low_confidence': [
        '‚ö†Ô∏è Flexible linker region between domains',
        '‚ö†Ô∏è High B-factors in experimental structures',
        '‚ö†Ô∏è Multiple conformations possible',
        '‚ö†Ô∏è Less sequence conservation',
        '‚ö†Ô∏è Lacks regular secondary structure',
        '‚ö†Ô∏è Domain hinge point',
        '‚ö†Ô∏è Intrinsically disordered character',
        '‚ö†Ô∏è Important for protein dynamics'
    ]
}

print("="*80)
print("üß¨ ALPHAFOLD DATABASE API - HUMAN ACE2 RECEPTOR ANALYSIS")
print("="*80)
print(f"\nüìä Protein: Human ACE2 (SARS-CoV-2 Receptor)")
print(f"   UniProt ID: {ACE2_UNIPROT_ID}")
print(f"   Total length: 805 residues")
print(f"   Function: Angiotensin-converting enzyme 2 (SARS-CoV-2 entry receptor)")
print(f"\nüìç Analysis Regions:")
print(f"   1. HIGH CONFIDENCE: Residues {HIGH_CONFIDENCE_TARGET['start_residue']}-{HIGH_CONFIDENCE_TARGET['end_residue']} (Peptidase Domain)")
print(f"   2. LOW CONFIDENCE:  Residues {LOW_CONFIDENCE_TARGET['start_residue']}-{LOW_CONFIDENCE_TARGET['end_residue']} (Flexible Linker)")
print("="*80)

# ============================================================================
# AlphaFold API Functions (keep same as before)
# ============================================================================

def get_alphafold_metadata(uniprot_id):
    """
    Get prediction metadata from AlphaFold Database API
    
    API endpoint: /api/prediction/:uniprot_id
    Returns: JSON with prediction information
    """
    url = f"{ALPHAFOLD_API_BASE}/prediction/{uniprot_id}"
    
    print(f"\nüîç Querying AlphaFold Database API...")
    print(f"   URL: {url}")
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        
        data = response.json()
        
        if isinstance(data, list) and len(data) > 0:
            # API returns a list, get first entry
            metadata = data[0]
        else:
            metadata = data
        
        print(f"‚úÖ AlphaFold metadata retrieved!")
        print(f"\nüìä Prediction Information:")
        print(f"   UniProt ID: {metadata.get('uniprotAccession', 'N/A')}")
        print(f"   Gene: {metadata.get('gene', 'N/A')}")
        print(f"   Organism: {metadata.get('organismScientificName', 'N/A')}")
        print(f"   Length: {metadata.get('uniprotEnd', 'N/A')} residues")
        print(f"   Model version: {metadata.get('latestVersion', 'N/A')}")
        print(f"   Model created: {metadata.get('modelCreatedDate', 'N/A')}")
        
        return metadata
        
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            print(f"‚ùå Protein not found in AlphaFold Database")
            print(f"   UniProt ID {uniprot_id} may not have a prediction")
        else:
            print(f"‚ùå API error: {e}")
        return None
    except Exception as e:
        print(f"‚ùå Error: {e}")
        return None


def download_alphafold_structure(uniprot_id, version=4):
    """
    Download AlphaFold predicted structure (PDB format)
    
    File URL format: 
    https://alphafold.ebi.ac.uk/files/AF-{UNIPROT_ID}-F1-model_v{VERSION}.pdb
    
    Returns: Path to downloaded PDB file
    """
    # Construct PDB download URL
    pdb_filename = f"AF-{uniprot_id}-F1-model_v{version}.pdb"
    pdb_url = f"{ALPHAFOLD_FILES_BASE}/{pdb_filename}"
    
    print(f"\nüì• Downloading AlphaFold structure...")
    print(f"   URL: {pdb_url}")
    
    try:
        response = requests.get(pdb_url, timeout=60)
        response.raise_for_status()
        
        # Save to temp file
        temp_dir = tempfile.gettempdir()
        local_pdb_path = os.path.join(temp_dir, pdb_filename)
        
        with open(local_pdb_path, 'w') as f:
            f.write(response.text)
        
        print(f"‚úÖ Structure downloaded!")
        print(f"   Saved to: {local_pdb_path}")
        print(f"   Size: {len(response.text)} bytes")
        
        return local_pdb_path
        
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            print(f"‚ùå Structure file not found")
            print(f"   Try different version or check UniProt ID")
        else:
            print(f"‚ùå Download error: {e}")
        return None
    except Exception as e:
        print(f"‚ùå Error: {e}")
        return None


# ... (keep all other functions: extract_plddt_from_alphafold_pdb, analyze_alphafold_region unchanged)


# ============================================================================
# Main Analysis Pipeline (updated variable name)
# ============================================================================

# ...existing code...

def run_alphafold_analysis():
    """
    Complete AlphaFold analysis pipeline
    """
    print("\n" + "üöÄ STARTING ALPHAFOLD DATABASE ANALYSIS" + "\n")
    
    # Step 1: Get metadata
    metadata = get_alphafold_metadata(ACE2_UNIPROT_ID)
    
    if metadata is None:
        print("‚ùå Cannot proceed without metadata")
        return None
    
    # Step 2: Download structure - Use version from metadata
    model_version = metadata.get('latestVersion', 4)
    print(f"\nüìå Using model version: {model_version}")
    
    pdb_file = download_alphafold_structure(ACE2_UNIPROT_ID, version=model_version)
    
    # If latest version fails, try common versions
    if pdb_file is None:
        print("\nüîÑ Trying alternative versions...")
        for version in [4, 3, 2, 1]:
            print(f"   Attempting version {version}...")
            pdb_file = download_alphafold_structure(ACE2_UNIPROT_ID, version=version)
            if pdb_file is not None:
                break
    
    if pdb_file is None:
        print("‚ùå Cannot proceed without structure file")
        return None
    
    # Step 3: Analyze high confidence region
    print("\n" + "üî¨ ANALYZING HIGH CONFIDENCE REGION" + "\n")
    high_conf_result = analyze_alphafold_region(HIGH_CONFIDENCE_TARGET, pdb_file)
    
    # Step 4: Analyze low confidence region
    print("\n" + "üî¨ ANALYZING LOW CONFIDENCE REGION" + "\n")
    low_conf_result = analyze_alphafold_region(LOW_CONFIDENCE_TARGET, pdb_file)
    
    # Step 5: Summary comparison
    if high_conf_result and low_conf_result:
        print("\n" + "="*80)
        print("üìä ALPHAFOLD COMPARISON SUMMARY - HUMAN ACE2")
        print("="*80)
        
        delta_plddt = high_conf_result['mean_plddt'] - low_conf_result['mean_plddt']
        
        print(f"\nHIGH CONFIDENCE (Peptidase Domain):")
        print(f"   ‚Ä¢ Region: {HIGH_CONFIDENCE_TARGET['start_residue']}-{HIGH_CONFIDENCE_TARGET['end_residue']}")
        print(f"   ‚Ä¢ Mean pLDDT: {high_conf_result['mean_plddt']:.1f}%")
        print(f"   ‚Ä¢ Confidence: {high_conf_result['confidence']}")
        print(f"   ‚Ä¢ Status: {'‚úÖ' if high_conf_result['matches_expectation'] else '‚ö†Ô∏è'}")
        
        print(f"\nLOW CONFIDENCE (Flexible Linker):")
        print(f"   ‚Ä¢ Region: {LOW_CONFIDENCE_TARGET['start_residue']}-{LOW_CONFIDENCE_TARGET['end_residue']}")
        print(f"   ‚Ä¢ Mean pLDDT: {low_conf_result['mean_plddt']:.1f}%")
        print(f"   ‚Ä¢ Confidence: {low_conf_result['confidence']}")
        print(f"   ‚Ä¢ Status: {'‚úÖ' if low_conf_result['matches_expectation'] else '‚ö†Ô∏è'}")
        
        print(f"\nKEY FINDINGS:")
        print(f"   ‚Ä¢ Œî pLDDT: {delta_plddt:.1f} points")
        print(f"   ‚Ä¢ AlphaFold clearly distinguishes structured vs flexible regions")
        print(f"   ‚Ä¢ Low pLDDT regions (<70%) are ideal for quantum conformational sampling")
        print(f"   ‚Ä¢ ACE2 is the human receptor for SARS-CoV-2 spike protein binding")
        
        print("\n" + "="*80)
        
        return {
            'metadata': metadata,
            'high_confidence': high_conf_result,
            'low_confidence': low_conf_result,
            'delta_plddt': delta_plddt
        }
    
    return None


def extract_plddt_from_alphafold_pdb(pdb_file, start_residue, end_residue):
    """
    Extract pLDDT scores from AlphaFold PDB file
    AlphaFold stores pLDDT in the B-factor column
    """
    plddt_scores = []
    
    try:
        with open(pdb_file, 'r') as f:
            for line in f:
                if line.startswith('ATOM'):
                    # Extract residue number (columns 23-26)
                    res_num = int(line[22:26].strip())
                    
                    # Only process CA atoms in our target range
                    if start_residue <= res_num <= end_residue and ' CA ' in line:
                        # B-factor is in columns 60-66
                        bfactor = float(line[60:66].strip())
                        plddt_scores.append(bfactor)
        
        return np.array(plddt_scores)
    
    except Exception as e:
        print(f"‚ùå Error parsing PDB file: {e}")
        return None


def analyze_alphafold_region(target_info, pdb_file):
    """
    Analyze a specific region of the AlphaFold prediction
    """
    start = target_info['start_residue']
    end = target_info['end_residue']
    
    print(f"üìä Analyzing: {target_info['name']}")
    print(f"   Region: {target_info['region']}")
    print(f"   Residues: {start}-{end}")
    print(f"   Expected pLDDT: {target_info['expected_plddt']}")
    
    # Extract pLDDT scores
    plddt = extract_plddt_from_alphafold_pdb(pdb_file, start, end)
    
    if plddt is None or len(plddt) == 0:
        print(f"‚ùå Failed to extract pLDDT scores")
        return None
    
    # Calculate statistics
    mean_plddt = np.mean(plddt)
    std_plddt = np.std(plddt)
    min_plddt = np.min(plddt)
    max_plddt = np.max(plddt)
    
    # Determine confidence level
    if mean_plddt > 90:
        confidence = 'VERY HIGH'
    elif mean_plddt > 70:
        confidence = 'HIGH'
    elif mean_plddt > 50:
        confidence = 'MEDIUM'
    else:
        confidence = 'LOW'
    
    # Check expectation
    expected = target_info['expected_plddt']
    if '>' in expected:
        threshold = float(expected.replace('>', ''))
        matches = mean_plddt > threshold
    else:
        threshold = float(expected.replace('<', ''))
        matches = mean_plddt < threshold
    
    print(f"\n‚úÖ Analysis Complete!")
    print(f"   Mean pLDDT: {mean_plddt:.1f}%")
    print(f"   Std Dev: {std_plddt:.1f}")
    print(f"   Range: {min_plddt:.1f} - {max_plddt:.1f}")
    print(f"   Confidence: {confidence}")
    print(f"   Matches expectation: {'‚úÖ' if matches else '‚ö†Ô∏è'}")
    
    return {
        'target_info': target_info,
        'plddt': plddt,
        'mean_plddt': mean_plddt,
        'std_plddt': std_plddt,
        'min_plddt': min_plddt,
        'max_plddt': max_plddt,
        'confidence': confidence,
        'matches_expectation': matches
    }



# ============================================================================
# Execute Analysis
# ============================================================================

if __name__ == "__main__":
    results = run_alphafold_analysis()
    
    if results:
        # Save results
        import pickle
        with open('alphafold_ace2_results.pkl', 'wb') as f:
            pickle.dump(results, f)
        
        print("\nüíæ Results saved to: alphafold_ace2_results.pkl")
        print("‚úÖ AlphaFold analysis complete!")
    else:
        print("\n‚ùå Analysis failed")

üß¨ ALPHAFOLD DATABASE API - HUMAN ACE2 RECEPTOR ANALYSIS

üìä Protein: Human ACE2 (SARS-CoV-2 Receptor)
   UniProt ID: Q9BYF1
   Total length: 805 residues
   Function: Angiotensin-converting enzyme 2 (SARS-CoV-2 entry receptor)

üìç Analysis Regions:
   1. HIGH CONFIDENCE: Residues 19-120 (Peptidase Domain)
   2. LOW CONFIDENCE:  Residues 605-625 (Flexible Linker)

üöÄ STARTING ALPHAFOLD DATABASE ANALYSIS


üîç Querying AlphaFold Database API...
   URL: https://alphafold.ebi.ac.uk/api/prediction/Q9BYF1
‚úÖ AlphaFold metadata retrieved!

üìä Prediction Information:
   UniProt ID: Q9BYF1
   Gene: ACE2
   Organism: Homo sapiens
   Length: 805 residues
   Model version: 6
   Model created: 2025-08-01T00:00:00Z

üìå Using model version: 6

üì• Downloading AlphaFold structure...
   URL: https://alphafold.ebi.ac.uk/files/AF-Q9BYF1-F1-model_v6.pdb
‚úÖ Structure downloaded!
   Saved to: /tmp/AF-Q9BYF1-F1-model_v6.pdb
   Size: 537029 bytes

üî¨ ANALYZING HIGH CONFIDENCE REGION

üìä A