# Introduction

In this tutorial we will see how to analyse the data we just retrieved!

In [1]:
from CodonU import analyzer as an

# Setting file  path

In [2]:
in_file = 'Nucleotide/Staphylococcus_agnetis_nucleotide.fasta'
# in_file = ''    your choice of file

# Setting up necessary parameters

Here we are going to set 3 parameters, viz.:
- `genetic_code_num`: Genetic table number for codon table. To know more about genetic table number, click [here](https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
- `gene_analysis`: If you want to perform gene analysis or genome analysis. Set `True` if you want to take account of indiviual gene, else set `False` if you want perform the gene as whole, rather as genome
- `min_len_threshold`: Minimum length of nucleotide sequence to be considered as a gene

In [3]:
genetic_code_num = 11
gene_analysis = True
min_len_threshold = 200

# Calculating CAI

## Gene Analysis

One word of caution, you may find something called `nan`, please look into first part of [Generate Summary](#generate_summary)

In [4]:
cai = an.calculate_cai(in_file, genetic_code_num, min_len_threshold, gene_analysis)

# below code is just for performing the vizualization
for gene, cai_dct in cai.items():
    print(f'{gene}:')
    for codon, cai_val in cai_dct.items():
        print(f'    {codon}: {cai_val}')

gene_1:
    TTT: 1.0
    TTC: 0.5
    TTA: 1.0
    TTG: 0.5
    TCT: 0.6956521739130435
    TCC: 0.43478260869565216
    TCA: 1.0
    TCG: 0.043478260869565216
    TAT: 1.0
    TAC: 1.0
    TGT: 1.0
    TGC: 1.0
    TGG: nan
    CTT: 0.5
    CTC: 0.5
    CTA: 0.5
    CTG: 1.0
    CCT: 1.0
    CCC: 0.25
    CCA: 0.5
    CCG: 0.5
    CAT: 1.0
    CAC: 0.5
    CAA: 1.0
    CAG: 0.0625
    CGT: 1.0
    CGC: 0.25
    CGA: 0.12500000000000003
    CGG: 0.12500000000000003
    ATT: 0.5
    ATC: 1.0
    ATA: 0.5
    ATG: nan
    ACT: 1.0
    ACC: 0.2
    ACA: 0.9333333333333333
    ACG: 0.3333333333333333
    AAT: 1.0
    AAC: 0.2857142857142857
    AAA: 1.0
    AAG: 0.1346153846153846
    AGT: 0.13043478260869565
    AGC: 0.021739130434782608
    AGA: 0.75
    AGG: 0.12500000000000003
    GTT: 1.0
    GTC: 0.12500000000000003
    GTA: 0.7500000000000001
    GTG: 0.25
    GCT: 1.0
    GCC: 0.14285714285714285
    GCA: 0.42857142857142855
    GCG: 0.3571428571428572
    GAT: 1.0
    GAC: 0.92857

    GCC: 0.08333333333333333
    GCA: 1.0
    GCG: 0.3333333333333333
    GAT: 1.0
    GAC: 0.36363636363636365
    GAA: 1.0
    GAG: 0.04166666666666668
    GGT: 1.0
    GGC: 0.6
    GGA: 0.39999999999999997
    GGG: 0.39999999999999997
gene_647:
    TTT: 0.5
    TTC: 1.0
    TTA: 1.0
    TTG: 0.7499999999999999
    TCT: 0.75
    TCC: 0.25
    TCA: 1.0
    TCG: 0.12500000000000003
    TAT: 1.0
    TAC: 0.25
    TGT: 1.0
    TGC: 0.5
    TGG: nan
    CTT: 1.0
    CTC: 0.5
    CTA: 0.25
    CTG: 0.12500000000000003
    CCT: 1.0
    CCC: 1.0
    CCA: 0.5
    CCG: 0.5
    CAT: 1.0
    CAC: 1.0
    CAA: 1.0
    CAG: 0.06666666666666667
    CGT: 0.3333333333333333
    CGC: 0.16666666666666669
    CGA: 0.16666666666666669
    CGG: 0.16666666666666669
    ATT: 1.0
    ATC: 0.2
    ATA: 0.8
    ATG: nan
    ACT: 1.0
    ACC: 0.12500000000000003
    ACA: 0.75
    ACG: 1.0
    AAT: 1.0
    AAC: 0.12500000000000003
    AAA: 1.0
    AAG: 0.1714285714285714
    AGT: 0.5
    AGC: 0.12500000000000003

    GTA: 0.8571428571428571
    GTG: 0.42857142857142855
    GCT: 1.0
    GCC: 0.4
    GCA: 0.6000000000000001
    GCG: 1.0
    GAT: 1.0
    GAC: 0.30769230769230765
    GAA: 1.0
    GAG: 0.25
    GGT: 1.0
    GGC: 0.12500000000000003
    GGA: 0.5
    GGG: 0.25
gene_1301:
    TTT: 1.0
    TTC: 0.2727272727272727
    TTA: 1.0
    TTG: 0.7272727272727272
    TCT: 0.3636363636363636
    TCC: 0.04545454545454544
    TCA: 1.0
    TCG: 0.0909090909090909
    TAT: 1.0
    TAC: 0.11111111111111115
    TGT: 1.0
    TGC: 1.0
    TGG: nan
    CTT: 0.8181818181818181
    CTC: 0.2727272727272727
    CTA: 0.2727272727272727
    CTG: 0.0909090909090909
    CCT: 1.0
    CCC: 0.25
    CCA: 0.7500000000000001
    CCG: 0.12500000000000003
    CAT: 1.0
    CAC: 0.2857142857142857
    CAA: 1.0
    CAG: 0.11764705882352941
    CGT: 1.0
    CGC: 0.4285714285714286
    CGA: 0.14285714285714288
    CGG: 0.07142857142857144
    ATT: 1.0
    ATC: 0.3529411764705882
    ATA: 0.02941176470588235
    ATG: nan
    A

    TTC: 1.0
    TTA: 1.0
    TTG: 0.6
    TCT: 1.0
    TCC: 1.0
    TCA: 0.5
    TCG: 0.5
    TAT: 1.0
    TAC: 0.25
    TGT: 0.5
    TGC: 1.0
    TGG: nan
    CTT: 0.2
    CTC: 0.2
    CTA: 0.10000000000000002
    CTG: 0.10000000000000002
    CCT: 1.0
    CCC: 1.0
    CCA: 0.5
    CCG: 0.5
    CAT: 1.0
    CAC: 0.3333333333333333
    CAA: 1.0
    CAG: 1.0
    CGT: 0.25
    CGC: 0.25
    CGA: 1.0
    CGG: 1.0
    ATT: 1.0
    ATC: 0.25
    ATA: 0.5
    ATG: nan
    ACT: 0.25
    ACC: 0.25
    ACA: 0.25
    ACG: 1.0
    AAT: 1.0
    AAC: 0.5
    AAA: 1.0
    AAG: 1.0
    AGT: 0.5
    AGC: 1.0
    AGA: 0.25
    AGG: 0.25
    GTT: 1.0
    GTC: 1.0
    GTA: 1.0
    GTG: 1.0
    GCT: 0.5
    GCC: 0.25
    GCA: 1.0
    GCG: 0.25
    GAT: 1.0
    GAC: 0.0625
    GAA: 1.0
    GAG: 0.6666666666666667
    GGT: 0.5
    GGC: 0.25
    GGA: 1.0
    GGG: 0.25
gene_1930:
    TTT: 1.0
    TTC: 0.25
    TTA: 0.5
    TTG: 1.0
    TCT: 0.375
    TCC: 0.0625
    TCA: 1.0
    TCG: 0.12500000000000003
    T

## Genome analysis

In [5]:
cai = an.calculate_cai(in_file, genetic_code_num, min_len_threshold, gene_analysis=False)

# below code is just for performing the vizualization
for codon, cai_val in cai.items():
    print(f'{codon}: {cai_val}')

TTT: 1.0
TTC: 0.3111422862730094
TTA: 1.0
TTG: 0.23731475400474006
TCT: 0.776226021350028
TCC: 0.1724054900072237
TCA: 1.0
TCG: 0.24496348021510556
TAT: 1.0
TAC: 0.3058442831927829
TGT: 1.0
TGC: 0.21599999999999997
TGG: nan
CTT: 0.291453699209046
CTC: 0.12292624425344793
CTA: 0.13391964821107338
CTG: 0.06047799891493677
CCT: 0.8883158103550883
CCC: 0.1428288038087681
CCA: 1.0
CCG: 0.3528069827415195
CAT: 1.0
CAC: 0.3163272210569051
CAA: 1.0
CAG: 0.12275955360162324
CGT: 1.0
CGC: 0.31975716591101205
CGA: 0.2684238838084992
CGG: 0.06432029508952584
ATT: 1.0
ATC: 0.35400293635028934
ATA: 0.29239715577050407
ATG: nan
ACT: 0.4592800082996161
ACC: 0.16651104886399
ACA: 1.0
ACG: 0.5451810353771138
AAT: 1.0
AAC: 0.3886311389759665
AAA: 1.0
AAG: 0.1867115010239426
AGT: 0.8015089493538807
AGC: 0.25451480857211656
AGA: 0.33504956581879664
AGG: 0.05409974640743871
GTT: 1.0
GTC: 0.4962616214810481
GTA: 0.9237370782133801
GTG: 0.6747935764904752
GCT: 0.6145768993205682
GCC: 0.2512868025530163
GCA: 1

# CBI

In [None]:
cbi = an.calculate_cbi(in_file, genetic_code_num, min_len_threshold, gene_analysis)

# below code is just for performing the vizualization
for gene, cai_dct in cai.items():
    print(f'{gene}:')
    for codon, cai_val in cai_dct.items():
        print(f'    {codon}: {cai_val}')

# Generate Summary

## generate_summary