# Variant Effect Prediction with Motif Intpretation with Attributions 

## CLI API

In [1]:
! decima vep-attribution --help

Usage: decima vep-attribution [OPTIONS]

  Predict variant effect and save to parquet

  Examples:

      >>> decima vep-attribution -v "data/sample.vcf" -o "vep_results.h5"

Options:
  -v, --variants PATH             Path to the variant file .vcf file. VCF file
                                  need to be normalized. Try normalizing th
                                  vcf file incase of an error. `bcftools norm
                                  -f ref.fasta input.vcf.gz -o output.vcf.gz`
  -o, --output_h5 PATH            Path to the output h5 file.
  --tasks TEXT                    Tasks to predict. If not provided, all tasks
                                  will be predicted.
  --off-tasks TEXT                Tasks to contrast against. If not provided,
                                  no contrast will be performed.
  --model INTEGER                 Model to use for attribution analysis.
                                  Available options: ['v1_rep0', 'v1_rep1',
                   

In [None]:
! decima vep -v "data/sample.vcf" -o "vep_vcf_attributions.h5"

In [None]:
! ls vep_vcf_attributions.*


## Python API

In [None]:
import h5py
import torch
from decima.core.attribution import VariantAttributionResult
from decima.vep import variant_effect_attribution

device = "cuda" if torch.cuda.is_available() else "cpu"

%matplotlib inline

In [None]:
variant_effect_attribution(
    "tests/data/test.vcf",
    "vep_vcf_attributions.h5",
    model=0,
    method="inputxgradient",
)

In [None]:
with h5py.File("vep_vcf_attributions.h5", "r") as f:
    print(f.keys())

In [None]:
with VariantAttributionResult("vep_vcf_attributions.h5", tss_distance=10_000, num_workers=1) as ar:
    genes = ar.genes
    variants = ar.variants
    print(genes)
    print(variants)

In [None]:
with VariantAttributionResult("vep_vcf_attributions.h5", tss_distance=10_000, num_workers=1) as ar:
    seqs_ref, attrs_ref, seqs_alt, attrs_alt = ar.load(variants, genes)
    print(seqs_ref)
    print(attrs_ref)

In [None]:
with VariantAttributionResult("vep_vcf_attributions.h5", tss_distance=10_000, num_workers=1) as ar:
    attribution_ref, attribution_alt = ar.load(variants, genes)

In [None]:
with VariantAttributionResult("vep_vcf_attributions.h5", tss_distance=10_000, num_workers=1) as ar:
    df_peaks, df_motifs = ar.recursive_seqlet_calling(variants, genes)

In [None]:
df_peaks

In [None]:
df_motifs

In [None]:
attribution_ref.plot_seqlogo(relative_loc=291)

In [None]:
attribution_alt.plot_seqlogo(relative_loc=291)