# NetMHCII Peptide Binding Prediction Demo

This notebook demonstrates the core functionality of the NetMHCII prediction pipeline for peptide-MHC Class II binding predictions.

## Setup and Imports

In [None]:
import os
import pandas as pd
from pathlib import Path

from src.predictor import (
    NetMHCIIPredictor,
    PeptideProcessor,
    AlleleMapper,
    SampleDataProcessor
)
from src.predictor.utils import FileManager

## Configuration

In [None]:
# Set up paths
BASE_PATH = "/path/to/base/directory"
MODEL_PATH = "/path/to/netMHCIIpan-4.3"
RESULTS_PATH = "/path/to/results"

# Initialize predictor
predictor = NetMHCIIPredictor(
    base_path=RESULTS_PATH,
    model_path=MODEL_PATH
)

## Quick Start Example

In [None]:
# Test single prediction
test_peptides = ['FVNQHLCGSHLVEAL', 'PKYVKQNTLKLAT']
test_alleles = ['DRB1_0101', 'DRB1_0301']

results = predictor.run_prediction(test_peptides, test_alleles)
print("\nSingle prediction results:")
print(results)

## Process MS Data

In [None]:
# Load sample MS data
ms_data = pd.read_csv("path/to/ms_data.csv")

# Initialize processor
processor = SampleDataProcessor(min_peptide_length=13)

# Process samples
processed_samples = processor.process_ms_data(ms_data)

print(f"\nProcessed {len(processed_samples)} samples")

## Map HLA Alleles

In [None]:
# Initialize allele mapper
allele_mapper = AlleleMapper(os.path.join(MODEL_PATH, "data/allele.list"))

# Load HLA typing data
hla_typing = pd.read_csv("path/to/hla_typing.csv")

# Map alleles for each sample
for idx, row in hla_typing.iterrows():
    sample_id = row['SampleID']
    if sample_id in processed_samples:
        valid_alleles, _ = allele_mapper.process_hla_typing(row)
        processed_samples[sample_id].alleles = valid_alleles

## Batch Prediction

In [None]:
# Run predictions for all samples
results = predictor.process_samples(processed_samples)

# Print basic statistics
for sample_id, result_df in results.items():
    if result_df is not None:
        print(f"\nSample {sample_id}:")
        print(f"  Total predictions: {len(result_df)}")
        print(f"  Strong binders (<2%): {(result_df['%Rank_EL'] < 2).sum()}")

## Save Results


In [None]:
# Save results for further analysis
for sample_id, result_df in results.items():
    if result_df is not None:
        predictor.save_sample_results(result_df, sample_id)

print("\nResults saved to:", RESULTS_PATH)