### Read CSV files

In [120]:
import csv

codons = []

with open('data/side-by-side.csv', newline='') as csvfile:
    csvreader = iter(csv.reader(csvfile, delimiter=','))
    # Skip first row
    next(csvreader)

    for abspos, virus, vaccine in csvreader:
        codons.append({
            'abspos': abspos,
            'virus': virus,
            'vaccine': vaccine
        })

print(len(codons))


1274


In [121]:
codon_table = []

with open('data/codon-table-grouped.csv', newline='') as csvfile:
    csvreader = iter(csv.reader(csvfile, delimiter=','))
    # Skip first row
    next(csvreader)

    for aminoacid, codon in csvreader:
        codon_table.append({
            'aminoacid': aminoacid,
            'codon': codon,
        })

print(len(codon_table))

64


### Categorize codon table by amount of 'good' chars.


In [122]:
def calculate_codon_score(codon):
    score = 0

    for char in codon:
        if char is 'G' or char is 'C':
            score += 1

    return score

In [123]:
grouped_codon_table = { 0: [], 1: [], 2: [], 3: [] }

for codon_row in codon_table:
    codon = codon_row['codon']
    score = calculate_codon_score(codon)

    grouped_codon_table[score].append(codon_row)

### Find suggestion vaccine

In [124]:
def find_codon_with_score(score, target_aminoacid):
    for codon_row in grouped_codon_table[score]:
        if codon_row['aminoacid'] is target_aminoacid:
            return codon_row

    return False

In [125]:
def find_target_aminoacid(codon): 
    for codon_row in codon_table:
        if codon_row['codon'] == codon:
            return codon_row['aminoacid']

In [126]:
for i in range(len(codons)):
    virus_codon = codons[i]['virus']
    virus_codon_score = calculate_codon_score(virus_codon)

    # Find target aminoacid
    target_aminoacid = find_target_aminoacid(virus_codon)       
    
    # Find codon with the highest score
    suggestion = None
    
    # Try to get a valid codon with a score > codon score of virus
    for score in range(3, virus_codon_score, -1):        
        best_codon = find_codon_with_score(score, target_aminoacid)
        if best_codon:
            suggestion = best_codon['codon']
            break
    
    if suggestion is None:
        suggestion = virus_codon

    codons[i]['suggestion'] = suggestion

### Check mismatches

In [127]:
for codon in codons:
    virus_target = find_target_aminoacid(codon['virus'])
    vaccine_target = find_target_aminoacid(codon['vaccine'])
    suggestion_target = find_target_aminoacid(codon['suggestion'])

    if vaccine_target != suggestion_target:
        print(f"MISMATCH - virus: {codon['virus']} -> {virus_target}, vaccine: {codon['vaccine']} -> {vaccine_target}, suggestion: {codon['suggestion']} -> {suggestion_target}")

MISMATCH - virus: AAA -> K, vaccine: CCT -> P, suggestion: AAG -> K
MISMATCH - virus: GTT -> V, vaccine: CCT -> P, suggestion: GTC -> V


### Calculate score

In [128]:
match_count = 0

for codon in codons:
    if codon['vaccine'] == codon['suggestion']:
        match_count += 1

percentage = 100 / len(codons) * match_count

f"Result: {percentage}%"


'Result: 62.71585557299843%'

### Print results

In [129]:
print("abspos, virus, vaccine, suggestion, vaccine_match")

for codon in codons:
    print(f"{codon['abspos']}, {codon['virus']}, {codon['vaccine']}, {codon['suggestion']}, {codon['vaccine'] == codon['suggestion']}")

abspos, virus, vaccine, suggestion, vaccine_match
0, ATG, ATG, ATG, True
3, TTT, TTC, TTC, True
6, GTT, GTG, GTC, False
9, TTT, TTC, TTC, True
12, CTT, CTG, CTC, False
15, GTT, GTG, GTC, False
18, TTA, CTG, CTC, False
21, TTG, CTG, CTC, False
24, CCA, CCT, CCC, False
27, CTA, CTG, CTC, False
30, GTC, GTG, GTC, False
33, TCT, TCC, AGC, False
36, AGT, AGC, AGC, True
39, CAG, CAG, CAG, True
42, TGT, TGT, TGC, False
45, GTT, GTG, GTC, False
48, AAT, AAC, AAC, True
51, CTT, CTG, CTC, False
54, ACA, ACC, ACC, True
57, ACC, ACC, ACC, True
60, AGA, AGA, CGC, False
63, ACT, ACA, ACC, False
66, CAA, CAG, CAG, True
69, TTA, CTG, CTC, False
72, CCC, CCT, CCC, False
75, CCT, CCA, CCC, False
78, GCA, GCC, GCC, True
81, TAC, TAC, TAC, True
84, ACT, ACC, ACC, True
87, AAT, AAC, AAC, True
90, TCT, AGC, AGC, True
93, TTC, TTT, TTC, False
96, ACA, ACC, ACC, True
99, CGT, AGA, CGC, False
102, GGT, GGC, GGC, True
105, GTT, GTG, GTC, False
108, TAT, TAC, TAC, True
111, TAC, TAC, TAC, True
114, CCT, CCC, CCC

### Find differences

In [130]:
total_pfizer = 0
total_suggestion = 0

for codon in codons:
    if codon['vaccine'] == codon['suggestion']:
        continue

    pfizer_score = calculate_codon_score(codon['vaccine'])
    suggestion_score = calculate_codon_score(codon['suggestion'])
    difference = suggestion_score - pfizer_score

    total_pfizer += pfizer_score
    total_suggestion += suggestion_score
    
    print(f"Pfizer: {codon['vaccine']}, Suggestion: {codon['suggestion']}, Score difference: {difference}")

Pfizer: GTG, Suggestion: GTC, Score difference: 0
Pfizer: CTG, Suggestion: CTC, Score difference: 0
Pfizer: GTG, Suggestion: GTC, Score difference: 0
Pfizer: CTG, Suggestion: CTC, Score difference: 0
Pfizer: CTG, Suggestion: CTC, Score difference: 0
Pfizer: CCT, Suggestion: CCC, Score difference: 1
Pfizer: CTG, Suggestion: CTC, Score difference: 0
Pfizer: GTG, Suggestion: GTC, Score difference: 0
Pfizer: TCC, Suggestion: AGC, Score difference: 0
Pfizer: TGT, Suggestion: TGC, Score difference: 1
Pfizer: GTG, Suggestion: GTC, Score difference: 0
Pfizer: CTG, Suggestion: CTC, Score difference: 0
Pfizer: AGA, Suggestion: CGC, Score difference: 2
Pfizer: ACA, Suggestion: ACC, Score difference: 1
Pfizer: CTG, Suggestion: CTC, Score difference: 0
Pfizer: CCT, Suggestion: CCC, Score difference: 1
Pfizer: CCA, Suggestion: CCC, Score difference: 1
Pfizer: TTT, Suggestion: TTC, Score difference: 1
Pfizer: AGA, Suggestion: CGC, Score difference: 2
Pfizer: GTG, Suggestion: GTC, Score difference: 0


### Total scores

_Can be slightly different because of 2 changed values by Pfizer_

In [131]:
print(f"Total pfizer score: {total_pfizer} (higher is better)")
print(f"Total suggestion score: {total_suggestion} (higher is better)")

Total pfizer score: 733 (higher is better)
Total suggestion score: 993 (higher is better)


### Print entire result

In [132]:
result = ""
for codon in codons:
    result += codon['suggestion']

print(result)

ATGTTCGTCTTCCTCGTCCTCCTCCCCCTCGTCAGCAGCCAGTGCGTCAACCTCACCACCCGCACCCAGCTCCCCCCCGCCTACACCAACAGCTTCACCCGCGGCGTCTACTACCCCGACAAGGTCTTCCGCTCCAGCGTCCTCCACAGCACCCAGGACCTCTTCCTCCCCTTCTTCTCCAACGTCACCTGGTTCCACGCCATCCACGTCAGCGGGACCAACGGCACCAAGCGCTTCGACAACCCCGTCCTCCCCTTCAACGACGGCGTCTACTTCGCCTCCACCGAGAAGAGCAACATCATCCGCGGCTGGATCTTCGGCACCACCCTCGACTCGAAGACCCAGTCCCTCCTCATCGTCAACAACGCCACCAACGTCGTCATCAAGGTCTGCGAGTTCCAGTTCTGCAACGACCCCTTCCTCGGCGTCTACTACCACAAGAACAACAAGAGCTGGATGGAGAGCGAGTTCCGCGTCTACAGCAGCGCGAACAACTGCACCTTCGAGTACGTCAGCCAGCCCTTCCTCATGGACCTCGAGGGCAAGCAGGGCAACTTCAAGAACCTCCGCGAGTTCGTGTTCAAGAACATCGACGGCTACTTCAAGATCTACAGCAAGCACACGCCCATCAACCTCGTGCGCGACCTCCCCCAGGGCTTCTCGGCCCTCGAGCCCCTCGTCGACCTCCCCATCGGCATCAACATCACCCGCTTCCAGACCCTCCTCGCCCTCCACCGCAGCTACCTCACCCCCGGCGACAGCAGCAGCGGCTGGACCGCCGGCGCCGCCGCCTACTACGTGGGCTACCTCCAGCCCCGCACCTTCCTCCTCAAGTACAACGAGAACGGCACCATCACCGACGCCGTCGACTGCGCCCTCGACCCCCTCAGCGAGACCAAGTGCACGCTCAAGTCCTTCACCGTCGAGAAGGGCATCTACCAGACCAGCAACTTCCGCGTCCAGCCCACCGAGAGCATCGTCCGCTTCCCCAACATCACCA