# MultiPepGen Tutorial: Reproducibility of Results

This notebook demonstrates how to use the **MultiPepGen** package to load a pre-trained model, generate targeted peptide sequences, and visualize their physicochemical properties.

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Add src to path if not installed as a package
sys.path.append('../src')

from multipepgen.models.cgan import ConditionalGAN
from multipepgen.utils.descriptors import get_descriptors
from multipepgen.config import LABELS

print("Libraries loaded successfully.")

## 1. Model Initialization

We configure the CGAN model with default parameters.

In [None]:
gan = ConditionalGAN(
    sequence_length=35,
    vocab_size=21,
    latent_dim=100,
    num_classes=len(LABELS)
)

# If you have pre-trained weights, uncomment the following line:
# gan.load_model_weights('../models/pretrained/multipepgen')

print("Model initialized.")

## 2. Massive Peptide Generation

We will generate 50 peptides with 'microbial' and 'bacterial' activity.

In [None]:
target_classes = ['microbiano', 'bacteriano']
num_seqs = 50

generated_df = gan.generate_class(num_seqs, target_classes)
print(f"Generated {num_seqs} peptides for classes: {target_classes}")
print(generated_df.head())

## 3. Property Analysis (Descriptors)

We will calculate physicochemical properties to validate the quality of the sequences.

In [None]:
def calculate_properties(df):
    props = []
    for seq in df['sequence']:
        d = get_descriptors(seq)
        props.append(d)
    return pd.DataFrame(props)

properties_df = calculate_properties(generated_df)
print("Properties calculated.")
print(properties_df.describe())

## 4. Result Visualization

We replicate a typical distribution plot for Net Charge and Hydrophobicity.

In [None]:
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
sns.histplot(properties_df['NetCharge'], kde=True, color='skyblue')
plt.title('Net Charge Distribution')
plt.xlabel('Net Charge')

plt.subplot(1, 2, 2)
sns.histplot(properties_df['Hydrophobicity'], kde=True, color='salmon')
plt.title('Hydrophobicity Distribution')
plt.xlabel('Hydrophobicity')

plt.tight_layout()
plt.show()