In [1]:
import pandas as pd

## Global Variables

#### Nucleotides

In [None]:
Nucleotides = ["A", "T", "C" , "G"]

#### Resistance Table

In [None]:
Resistance = collections.OrderedDict()
Resistance[28] = ['T','V','A','S']
Resistance[29] = ['S'],
Resistance[30] = ['E','H','R','D','G','K','T','Q','P','S'], 
Resistance[31] = ['M','I','V','F'],
Resistance[32] = ['L'],
Resistance[58] = ['D','S'],
Resistance[92] = ['K'],
Resistance[93] = ['H','N','C','S']

#### Genetic Code

In [None]:
codontable = {
    'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
    'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
    'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
    'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
    'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
    'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
    'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
    'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
    'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
    'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
    'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
    'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
    'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
    'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
    'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
    'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W',
    }

#### Codon Usage

In [17]:
codon_usage_table = pd.read_csv("../Data/Codon_Usage.csv", delimiter = ";")

In [18]:
codon_usage_g1 = dict(zip(list(codon_usage_table['Codon']), list(codon_usage_table['HCV-G1'])))

In [19]:
codon_usage_g3 = dict(zip(list(codon_usage_table['Codon']), list(codon_usage_table['HCV-G3'])))

 #### Drugs

In [2]:
drugs_3letters = {
    "ASV":"asunaprevir",
    "DCV":"daclatasvir",
    "DSV":"dasabuvir",
    "EBR":"elbasvir",
    "GZR":"grazoprevir",
    "LDV":"ledipasvir",
    "NSV":"nebusvir",
    "OMV":"ombitasvir",
    "PIB":"pibrentasvir",
    "PTV":"paritaprevir",
    "SOF":"sofosbuvir",
    "VEL":"velpatasvir",
    "VOX":"voxilaprevir"
}

## Functions

### Function to generate all intermediaries codons of a pair 

In [None]:
def intermediaries(codon, rcodon):
   """ Generator of Intermediaries """
   codon = codon.upper()
   rcodon = rcodon.upper()
   codon_inter = []
   codon_inter.append(codon[:2] + rcodon[2])
   codon_inter.append(codon[0] + rcodon[1:])
   codon_inter.append(rcodon[0] + codon[1:])
   codon_inter.append(rcodon[:2] + codon[2])
   codon_inter.append(codon[0] + rcodon[1] + codon[2])
   codon_inter.append(rcodon[0] + codon[1] + rcodon[2])
   codon_inter.append(rcodon[:])
   return(codon_inter)   

#### Genetic Barrier Function

In [None]:
def genetic_barrier(codon):
    Nucleotides = ["A", "T", "C" , "G"]
    codon = codon.upper()
    List_1 = []
    """List all possible codons by changing the first nucleotide """
    List_2 = []
    """List all possible codons by changing the second nucleotide"""
    List_3 = []
    """List all possible codons by changing the third nucleotide"""
    Scores_1 = {}
    """
    Scores of the alterations in the first nucleotide
    
    Key = Reference Nucleotide in the first position + 0 (the position) + Altered Nucleotide in the first position
    
    Example:  'A0T': 2.5

    Value = Genetic Barrier until this step

    """
    Scores_2 = {}
    """
    Added alteration score in the first and second positions
    
    Key = Reference Nucleotide in the first position + 0 (position) + Altered Nucleotide in the first position
    + Reference Nucleotide in the second position + 1 (position) + Altered Nucleotide in the second position
    
    Example: 'A0CC1T': 3.5

    Value = Genetic Barrier until this step

    """
    Scores_3 = {}

    """
    All the 64 possible altered codons from the reference codon

    Key: Altered Nucleotide

    Value: Genetic Barrier Score

    This is the dictionary returned from the function.

    """
    
    """Substitution of the First Nucleotide"""
    
    if codon[0] == "A":
        """ The first step is to know what Nucleotide is in the position"""
        for N in Nucleotides:
            score = 0
            codon_0 = (codon[0].replace(codon[0], N)) + codon[1:]
            """Here, the algorithm replace the nucleotide with all the 4 DNA nucleotides, including the same one"""
            List_1.append(codon_0)
            if codon_0[0] == "T" or codon_0[0] == "C":
                """Then, a score is give based on the nature of the substitution, Transition or Transversion. For the same one the score remains the same."""
                score += 2.5
            elif codon_0[0] == "G":
                score += 1
            Scores_1[codon[0] + "%d" % 0 + codon_0[0]] = score
            """The respective score is stored with the altered codon in a dictionary | altered codon : score"""
            """The process repeats for all the 4 nucleotides possibles in the 3 positions, resulting in 64 possibilities"""
    elif codon[0] == "T":
        for N in Nucleotides:
            score = 0
            codon_0 = (codon[0].replace(codon[0], N)) + codon[1:]
            List_1.append(codon_0)
            if codon_0[0] == "A" or codon_0[0] == "G":
                score += 2.5
            elif codon_0[0] == "C":
                score += 1
            Scores_1[codon[0] + "%d" % 0 + codon_0[0]] = score
    elif codon[0] == "C":
        for N in Nucleotides:
            score = 0
            codon_0 = (codon[0].replace(codon[0], N)) + codon[1:]
            List_1.append(codon_0)
            if codon_0[0] == "A" or codon_0[0] == "G":
                score += 2.5
            elif codon_0[0] == "T":
                score += 1
            Scores_1[codon[0] + "%d" % 0 + codon_0[0]] = score
    elif codon[0] == "G":
        for N in Nucleotides:
            score = 0
            codon_0 = (codon[0].replace(codon[0], N)) + codon[1:]
            List_1.append(codon_0)
            if codon_0[0] == "T" or codon_0[0] == "C":
                score += 2.5
            elif codon_0[0] == "A":
                score += 1
            Scores_1[codon[0] + "%d" % 0 + codon_0[0]] = score

    """Substitution of the Second Nucleotide"""
            
    for C in List_1:               
            if C[1] == "A":
                for N in Nucleotides:
                    codon_1 = C[0] +(C[1].replace(C[1], N)) + C[2]
                    score_2 = Scores_1[codon[0] + "%d" % 0 + C[0]]
                    List_2.append(codon_1)
                    if codon_1[1] == "T" or codon_1[1] == "C":
                        score_2 += 2.5 
                    elif codon_1[1] == "G":
                        score_2 += 1
                    Scores_2[codon[0] + "%d" % 0 + C[0] + codon[1] + "%d" % 1 + codon_1[1]] = score_2
            if C[1] == "T":
                for N in Nucleotides:
                    codon_1 = C[0] +(C[1].replace(C[1], N)) + C[2]
                    score_2 = Scores_1[codon[0] + "%d" % 0 + C[0]]
                    List_2.append(codon_1)
                    if codon_1[1] == "A" or codon_1[1] == "G":
                        score_2 += 2.5 
                    elif codon_1[1] == "C":
                        score_2 += 1
                    Scores_2[codon[0] + "%d" % 0 + C[0] + codon[1] + "%d" % 1 + codon_1[1]] = score_2
            if C[1] == "C":
                for N in Nucleotides:
                    codon_1 = C[0] +(C[1].replace(C[1], N)) + C[2]
                    score_2 = Scores_1[codon[0] + "%d" % 0 + C[0]]
                    List_2.append(codon_1)
                    if codon_1[1] == "A" or codon_1[1] == "G":
                        score_2 += 2.5 
                    elif codon_1[1] == "T":
                        score_2 += 1
                    Scores_2[codon[0] + "%d" % 0 + C[0] + codon[1] + "%d" % 1 + codon_1[1]] = score_2
            if C[1] == "G":
                for N in Nucleotides:
                    codon_1 = C[0] +(C[1].replace(C[1], N)) + C[2]
                    score_2 = Scores_1[codon[0] + "%d" % 0 + C[0]]
                    List_2.append(codon_1)
                    if codon_1[1] == "T" or codon_1[1] == "C":
                        score_2 += 2.5 
                    elif codon_1[1] == "A":
                        score_2 += 1
                    Scores_2[codon[0] + "%d" % 0 + C[0] + codon[1] + "%d" % 1 + codon_1[1]] = score_2
                    
    """Substitution of the Third Nucleotide"""
                    
    for C in List_2:
            if C[2] == "A":
                for N in Nucleotides:
                    codon_2 = C[0:2] +(C[2].replace(C[2], N))
                    score_3 = Scores_2[codon[0] + "%d" % 0 + C[0] + codon[1] + "%d" % 1 + C[1]]
                    List_3.append(codon_2)
                    if codon_2[2] == "T" or codon_2[2] == "C":
                        score_3 += 2.5
                    elif codon_2[2] == "G":
                        score_3 += 1
                    Scores_3[codon_2] = score_3
            elif C[2] == "T":
                for N in Nucleotides:
                    codon_2 = C[0:2] +(C[2].replace(C[2], N))
                    score_3 = Scores_2[codon[0] + "%d" % 0 + C[0] + codon[1] + "%d" % 1 + C[1]]
                    List_3.append(codon_2)
                    if codon_2[2] == "A" or codon_2[2] == "G":
                        score_3 += 2.5
                    elif codon_2[2] == "C":
                        score_3 += 1
                    Scores_3[codon_2] = score_3
            elif C[2] == "C":
                for N in Nucleotides:
                    codon_2 = C[0:2] +(C[2].replace(C[2], N))
                    score_3 = Scores_2[codon[0] + "%d" % 0 + C[0] + codon[1] + "%d" % 1 + C[1]]
                    List_3.append(codon_2)
                    if codon_2[2] == "A" or codon_2[2] == "G":
                        score_3 += 2.5
                    elif codon_2[2] == "T":
                        score_3 += 1
                    Scores_3[codon_2] = score_3
            elif C[2] == "G":
                for N in Nucleotides:
                    codon_2 = C[0:2] +(C[2].replace(C[2], N))
                    score_3 = Scores_2[codon[0] + "%d" % 0 + C[0] + codon[1] + "%d" % 1 + C[1]]
                    List_3.append(codon_2)
                    if codon_2[2] == "T" or codon_2[2] == "C":
                        score_3 += 2.5
                    elif codon_2[2] == "A":
                        score_3 += 1
                    Scores_3[codon_2] = score_3
    return (Scores_3)
    """This is the dictionary that will be returned from the function"""

#### Dictionary for not having to calculate every time
Talvez um arquivo seja melhor(?)

In [None]:
Genetic_Barrier_Scores = {}
for x in Nucleotides:
    for y in Nucleotides:
        for z in Nucleotides:
           Genetic_Barrier_Scores[x+y+z] = genetic_barrier(x+y+z)

#### Genetic Barrier Calculator 

Função que consulta o dicionário e traz o valor de barreira genética de dois códons

In [None]:
def Genetic_Barrier_Calc(first_codon,second_codon):
    first_codon = first_codon.upper()
    second_codon = second_codon.upper()
    try:
        print ("Genetic Barrier: %s -> %s = %.1f" % (first_codon,second_codon,Genetic_Barrier_Scores[first_codon][second_codon]))
    except:
        print ("Invalid codons!")
        
""" This function prints in a more organized way the informations about the sequences """

In [None]:
def List_Info_Seqs(Data_Seq):
    Frequencies = collections.OrderedDict()
    for Sequence in Data_Seq:
        print("Sequence: %s " % Sequence[0])
        for info_seq in Sequence[1]:
            print("Position: %s " % info_seq[0])
            if info_seq[1] not in Frequencies:
                Frequencies[info_seq[1]] = 0
            Frequencies[info_seq[1]] += 1
            print("codon: %s AminoAcid: %s Resistance: %r" % (info_seq[1],info_seq[2],info_seq[3]))