In [2]:
import vmc

# Copy Number
This notebook demonstrated a *proposed* Copy Number model for GA4GH VR.

CNVs of a region may be:
* empirical (location not known) or located (location known)
* absolute or relative CN
* defined at any valid Location


## ① CNV of a simple SequenceLocation, copy location unknown/unspecified

In [3]:
sr = vmc.models.SimpleInterval(start=20,end=30)
sl = vmc.models.SequenceLocation(sequence_id="NM_0001234.5", region=sr)

cnvstate = vmc.models.CNVState(min_copies=3, max_copies=5, copy_measure="TOTAL")

a = vmc.models.Allele(location=sl, state=cnvstate)
a.id = vmc.computed_id(a)
a.as_dict()

{'id': 'VMC:GA_jfxtN9iWJDjh1qt8i7sOuFAcSvhDhdvp',
 'location': {'region': {'end': 30, 'start': 20, 'type': 'SimpleInterval'},
  'sequence_id': 'NM_0001234.5',
  'type': 'SequenceLocation'},
 'state': {'copy_measure': 'TOTAL',
  'max_copies': 5,
  'min_copies': 3,
  'type': 'CNVState'},
 'type': 'Allele'}

## ② Same CNV, now with known location

In [22]:
sr = vmc.models.Region(start=20,end=30)
sl = vmc.models.SequenceLocation(sequence_id="NM_0001234.5", region=sr)

# 👉 Note addition of location in CNVState
# When CNV.location == Allele.location, CN is total copy number and copies are tandem
cnvstate = vmc.models.CNVState(min_copies=3, max_copies=5, copy_measure="TOTAL", location = sl)

a = vmc.models.Allele(location=sl, state=cnvstate)
a.id = vmc.computed_id(a)
a.as_dict()

{'id': 'VMC:GA_deCg4f_LrHcsfDTAg9y7AnWGKaPzhXby',
 'location': {'region': {'end': 30, 'start': 20, 'type': 'SimpleInterval'},
  'sequence_id': 'NM_0001234.5',
  'type': 'SequenceLocation'},
 'state': {'copy_measure': 'TOTAL',
  'location': {'region': {'end': 30, 'start': 20, 'type': 'SimpleInterval'},
   'sequence_id': 'NM_0001234.5',
   'type': 'SequenceLocation'},
  'max_copies': 5,
  'min_copies': 3,
  'type': 'CNVState'},
 'type': 'Allele'}

## ③ CNV at a Gene Location
Because any Location may be used to define an Allele, it's straightforward to define gene copy number

In [17]:
gl = vmc.models.GeneLocation(gene="HGNC:MSH2")

cnvstate = vmc.models.CNVState(min_copies=3, max_copies=5, copy_measure="RELATIVE")

a = vmc.models.Allele(location=gl, state=cnvstate)
a.id = vmc.computed_id(a)
a.as_dict()

{'id': 'VMC:GA_TraGwt0_Ks5VSR7_DeLJnuIwHWEpJTov',
 'location': {'gene': 'HGNC:MSH2', 'type': 'GeneLocation'},
 'state': {'copy_measure': 'RELATIVE',
  'max_copies': 5,
  'min_copies': 3,
  'type': 'CNVState'},
 'type': 'Allele'}