# HP Model Deep Dive

Comprehensive tutorial on the Hydrophobic-Polar (HP) lattice model for protein folding.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from quantum_protein_folding.data import load_hp_sequence
from quantum_protein_folding.data.preprocess import map_to_lattice
from quantum_protein_folding.classical import exact_enumeration_fold
from quantum_protein_folding.analysis import compute_contact_map, compute_rmsd

## Understanding the HP Model

### Energy Function

The HP model simplifies protein folding to:

$$E = \sum_{\substack{(i,j) \text{ in contact} \\ |i-j| > 2}} \epsilon_{ij}$$

where $\epsilon_{HH} = -1$ (favorable), $\epsilon_{HP} = 0$ (neutral).

## Example 1: Short Sequence (Exact Solution)

In [None]:
# Very short sequence for exact enumeration
short_seq = "HPHPH"

sequence = load_hp_sequence(short_seq)
print(f"Sequence: {short_seq}")
print(f"Contact Matrix:\n{sequence.contact_matrix}")

In [None]:
# Encode to lattice
encoding = map_to_lattice(sequence, lattice_dim=2, encoding_type='turn_direction')

print(f"Number of qubits: {encoding.n_qubits}")
print(f"Hamiltonian terms: {len(encoding.hamiltonian)}")

In [None]:
# Find exact ground state
exact_result = exact_enumeration_fold(encoding, max_conformations=10000)

print(f"Exact ground state energy: {exact_result.energy:.4f}")
print(f"Conformations checked: {exact_result.n_iterations}")
print(f"Optimal conformation:\n{exact_result.conformation}")

## Example 2: Benchmark Sequences

Standard HP sequences from literature:

In [None]:
benchmark_sequences = {
    'S1_10': 'HPHPPHHPHH',  # 10-mer
    'S2_12': 'HPHPHPHPHPHP',  # 12-mer
    'S3_16': 'HPHPPHHPPHHPPHPH',  # 16-mer
    'S4_20': 'HPHPPHHPPHHPPHPHHPHP',  # 20-mer
}

# Known ground state energies
known_energies = {
    'S1_10': -4,
    'S2_12': -3,
    'S3_16': -7,
    'S4_20': -9,
}

for name, seq in benchmark_sequences.items():
    print(f"{name}: {seq} (known E = {known_energies[name]})")

## Example 3: Contact Map Analysis

In [None]:
# Compute contact map for optimal structure
contact_map = compute_contact_map(exact_result.conformation, cutoff=1.5)

# Visualize
fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(contact_map, cmap='Blues', vmin=0, vmax=1)
ax.set_title('Contact Map', fontweight='bold')
ax.set_xlabel('Residue Index')
ax.set_ylabel('Residue Index')
plt.colorbar(im, ax=ax)
plt.tight_layout()
plt.show()

n_contacts = np.sum(np.triu(contact_map, k=3))  # Non-local contacts
print(f"Number of non-local contacts: {int(n_contacts)}")

## Example 4: Encoding Comparison

Compare binary position vs turn direction encoding:

In [None]:
test_seq = "HPHPPHHPHH"
sequence = load_hp_sequence(test_seq)

# Turn encoding
enc_turn = map_to_lattice(sequence, lattice_dim=2, encoding_type='turn_direction')

# Binary encoding
enc_binary = map_to_lattice(sequence, lattice_dim=2, encoding_type='binary_position')

print("Encoding Comparison:")
print(f"Turn Direction: {enc_turn.n_qubits} qubits")
print(f"Binary Position: {enc_binary.n_qubits} qubits")
print(f"\nTurn encoding is {enc_binary.n_qubits / enc_turn.n_qubits:.1f}x more efficient!")

## Summary

Key takeaways:
- HP model provides a simplified but meaningful protein folding problem
- Turn direction encoding is more qubit-efficient
- Exact solutions only feasible for N â‰¤ 10
- Contact maps reveal structural features