In [None]:
import pandas as pd

# Read the TSV file
df = pd.read_csv('~/brca_test_data/output/variants_output.tsv', sep='\t')

# Search for the specific variant c.548-17G>T
target_variant = 'c.548-17G>T'
print(f'Searching for variant: {target_variant}')
print('=' * 50)

# Find the variant in the cdna column
matches = df[df['cdna'].astype(str).str.contains(target_variant, case=False, na=False)]

if not matches.empty:
    for idx, row in matches.iterrows():
        print(f'Variant: {row["cdna"]}')
        print(f'Reference base (ref): {row["ref"]}')
        print(f'Alternative base (alt): {row["alt"]}')
        print(f'Genomic position: {row["pos"]}')
        print(f'Chromosome: {row["chr"]}')
        print(f'Genomic HGVS (GRCh38): {row["genomic_hgvs_38"]}')
        if pd.notna(row.get("genomic_hgvs_37")):
            print(f'Genomic HGVS (GRCh37): {row["genomic_hgvs_37"]}')
        print()
        print('Summary:')
        print(f'  • Reference DNA base: {row["ref"]}')
        print(f'  • Alternative DNA base: {row["alt"]}')
        print(f'  • This corresponds to the cDNA change: {target_variant}')
else:
    print(f'Variant {target_variant} not found in the dataset.')