# VCF Subset Extraction Demo - pyMut




In [None]:
import sys
import os
import logging

# Configurar el directorio del proyecto
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'src'))
if project_root not in sys.path:
    sys.path.append(project_root)

print('✅ PYTHONPATH configurado para incluir:', project_root)


## Configuración de Logging

Configuramos el logging para ver **todos los detalles** del proceso de extracción.


In [2]:
# Configurar logging para ver todos los detalles
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
    force=True  # Forzar reconfiguración si ya existe
)

# Obtener el logger específico de pyMut.input
logger = logging.getLogger('pyMut.input')
logger.setLevel(logging.INFO)

## Importar la Función de Extracción

Importamos la función que nos permite extraer subconjuntos del VCF.


In [4]:
from pyMut.utils.create_subset import extract_vcf_subset

print("✅ Función extract_vcf_subset importada correctamente")


✅ Función extract_vcf_subset importada correctamente


## Extracción

In [5]:
from pathlib import Path

vcf_file = "../src/pyMut/data/examples/ALL.chr10.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz"

# Extraer 1000 variantes 
output_file_2 = "../docs/examples/subset_demo_1000_python.vcf"

success = extract_vcf_subset(
    input_vcf_path=vcf_file,
    output_vcf_path=output_file_2,
    max_variants=1000,
)

print(f"\n📊 Resultado: {'✅ ÉXITO' if success else '❌ FALLÓ'}")
if success and Path(output_file_2).exists():
    size_kb = Path(output_file_2).stat().st_size / 1024
    print(f"📄 Archivo creado: {output_file_2} ({size_kb:.1f} KB)")


2025-07-06 23:32:25,542 | INFO | pyMut.utils.create_subset | Extracting VCF subset
2025-07-06 23:32:25,542 | INFO | pyMut.utils.create_subset | Input: ../../src/pyMut/data/examples/ALL.chr10.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz
2025-07-06 23:32:25,542 | INFO | pyMut.utils.create_subset | Output: subset_demo_1000_python.vcf
2025-07-06 23:32:25,543 | INFO | pyMut.utils.create_subset | Max variants: 1000
2025-07-06 23:32:25,543 | INFO | pyMut.utils.create_subset | Chromosome filter: None
2025-07-06 23:32:25,543 | INFO | pyMut.utils.create_subset | Position range: None-None
2025-07-06 23:32:25,621 | INFO | pyMut.utils.create_subset | VCF subset extraction completed successfully
2025-07-06 23:32:25,621 | INFO | pyMut.utils.create_subset | Variants extracted: 1000
2025-07-06 23:32:25,621 | INFO | pyMut.utils.create_subset | Input file size: 641.30 MB
2025-07-06 23:32:25,622 | INFO | pyMut.utils.create_subset | Output file size: 9.85 MB
2025-07-06 23:32:25,622 | INF


📊 Resultado: ✅ ÉXITO
📄 Archivo creado: subset_demo_1000_python.vcf (10090.9 KB)
