# BasicFeatureGenerator Demo

Demonstrates basic peptide-level feature generation.


In [1]:
import pandas as pd
from dia_aspire_rescore.features import BasicFeatureGenerator
from dia_aspire_rescore.io import read_diann2


In [2]:
psm_df = read_diann2("../data/raw/SYS026_RA957/DDA_SYSMHC_bynam/lib-base-result-first-pass.parquet")
psm_sample = psm_df[psm_df['fdr1_search1'] < 0.01].head(100).copy()
print(f"Loaded {len(psm_sample)} PSMs")




Loaded 100 PSMs


In [3]:
generator = BasicFeatureGenerator()
print(f"Features to generate: {generator.feature_names}")


Features to generate: ['charge_1', 'charge_2', 'charge_3', 'charge_4', 'charge_5', 'charge_6', 'charge_gt_6', 'mod_num']


In [4]:
psm_sample = generator.generate(psm_sample)
psm_sample[generator.feature_names].head(10)


Unnamed: 0,charge_1,charge_2,charge_3,charge_4,charge_5,charge_6,charge_gt_6,mod_num
1,0,1,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0
13,0,1,0,0,0,0,0,0
14,0,1,0,0,0,0,0,0
15,1,0,0,0,0,0,0,0
16,1,0,0,0,0,0,0,0
19,0,1,0,0,0,0,0,0
21,0,1,0,0,0,0,0,0
22,0,1,0,0,0,0,0,0
27,0,1,0,0,0,0,0,0


In [5]:
print("Feature Statistics:")
psm_sample[generator.feature_names].describe()


Feature Statistics:


Unnamed: 0,charge_1,charge_2,charge_3,charge_4,charge_5,charge_6,charge_gt_6,mod_num
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.12,0.88,0.0,0.0,0.0,0.0,0.0,0.04
std,0.326599,0.326599,0.0,0.0,0.0,0.0,0.0,0.196946
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
