In [32]:
import gumpy, copy, pandas

In [2]:
reference = gumpy.Genome('./gpas_testing/data/NC_000962.3.gbk.gz')

## Sample 1, MDR

isoniazid and rifampicin resistant

```
761155 c t
2155169 a t
```

In [23]:
gene_name = 'rpoB'
amino_acid_number = 450

gene = reference.build_gene(gene_name)
gene.codons[gene.amino_acid_number==amino_acid_number]
mask1 = gene.amino_acid_number==amino_acid_number
mask2 = gene.gene_position==amino_acid_number
gene.reverse_complement, gene.amino_acid_sequence[mask1], gene.nucleotide_index[mask2], gene.nucleotide_sequence[mask2]

(False,
 array(['S'], dtype='<U1'),
 array([761154, 761155, 761156]),
 array(['t', 'c', 'g'], dtype='<U1'))

In [24]:
gene_name = 'katG'
amino_acid_number = 315

gene = reference.build_gene(gene_name)
gene.codons[gene.amino_acid_number==amino_acid_number]
mask1 = gene.amino_acid_number==amino_acid_number
mask2 = gene.gene_position==amino_acid_number
gene.reverse_complement, gene.amino_acid_sequence[mask1], gene.nucleotide_index[mask2], gene.nucleotide_sequence[mask2]


(True,
 array(['S'], dtype='<U1'),
 array([2155169, 2155168, 2155167]),
 array(['a', 'g', 'c'], dtype='<U1'))

## Sample 2, pre-XDR

rifampicin and a fluoroquinolone (incl. S95T)

```
761155 c t
7570 c t
7585 g c
```

In [26]:
gene_name = 'gyrA'
amino_acid_number = 95

gene = reference.build_gene(gene_name)
gene.codons[gene.amino_acid_number==amino_acid_number]
mask1 = gene.amino_acid_number==amino_acid_number
mask2 = gene.gene_position==amino_acid_number
gene.reverse_complement, gene.amino_acid_sequence[mask1], gene.nucleotide_index[mask2], gene.nucleotide_sequence[mask2]

(False,
 array(['S'], dtype='<U1'),
 array([7584, 7585, 7586]),
 array(['a', 'g', 'c'], dtype='<U1'))

## Sample 3, XDR

rifampicin, a fluoroquinolone and a Group A agent (linezolid)

```
761155 c t
7570 c t
7585 g c
801268 t c
```

In [27]:
gene_name = 'rplC'
amino_acid_number = 154

gene = reference.build_gene(gene_name)
gene.codons[gene.amino_acid_number==amino_acid_number]
mask1 = gene.amino_acid_number==amino_acid_number
mask2 = gene.gene_position==amino_acid_number
gene.reverse_complement, gene.amino_acid_sequence[mask1], gene.nucleotide_index[mask2], gene.nucleotide_sequence[mask2]

(False,
 array(['C'], dtype='<U1'),
 array([801268, 801269, 801270]),
 array(['t', 'g', 't'], dtype='<U1'))

## Sample 4, Lineage 2

In [51]:
lineage2 = pandas.read_csv('../SNP-IT/lib/beijing', sep='\t', names=['nucleotide_index', 'alt'])
def find_ref_base(row):
    alt_base = row.alt.lower()
    index = int(row.nucleotide_index)
    mask = reference.nucleotide_index==index
    ref_base = reference.nucleotide_sequence[mask][0]
    assert ref_base != alt_base
    return pandas.Series([ref_base, alt_base])

lineage2[['ref_base', 'alt_base']] = lineage2.apply(find_ref_base, axis=1)
lineage2.drop(columns=['alt'],inplace=True)
lineage2.to_csv('tb-test-lineage2-vanilla.txt', header=None, index=False, sep=' ')
lineage2

Unnamed: 0,nucleotide_index,ref_base,alt_base
0,1011511,a,c
1,1022003,a,c
2,1028217,g,a
3,1034758,c,t
4,1071966,a,g
...,...,...,...
299,940602,c,g
300,94388,g,a
301,96894,c,t
302,97350,a,c


In [50]:
lineage3 = pandas.read_csv('../SNP-IT/lib/EAI', sep='\t', names=['nucleotide_index', 'alt'])
lineage3[['ref_base', 'alt_base']] = lineage3.apply(find_ref_base, axis=1)
lineage3.drop(columns=['alt'],inplace=True)
lineage3.to_csv('tb-test-lineage3-vanilla.txt', header=None, index=False, sep=' ')
lineage3

Unnamed: 0,nucleotide_index,ref_base,alt_base
0,1009490,c,t
1,1027727,g,c
2,1029997,a,g
3,1032524,t,c
4,1043136,c,t
...,...,...,...
420,952597,c,t
421,976043,g,t
422,987601,c,t
423,98966,g,c
