In [7]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
from matplotlib import pyplot as plt
from abnumber import Chain
from bin.evaluation import plot_overlap_venn, evaluate_forward_mutation_overlap, evaluate_back_mutation_overlap, \
    evaluate_vernier_forward_mutation_overlap, evaluate_vernier_back_mutation_overlap

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

In [8]:
parental = Chain('EVQLQQSGPELVKPGASVKMSCKASGYTFTDYYMKWVKQSHGKSLEWIGDIIPSNGATFYNQKFKGKATLTVDRSSSTAYMHLNSLTSEDSAVYYCTRSHLLRASWFAYWGQGTLVTVSA', name='Example', scheme='kabat')
parental.print(numbering=True)

0        1         2         3         4         5          6         7         8            9         10           11  
123456789012345678901234567890123456789012345678901223456789012345678901234567890122223456789012345678900001234567890123
                                                    A                              ABC                  ABC             
EVQLQQSGPELVKPGASVKMSCKASGYTFTDYYMKWVKQSHGKSLEWIGDIIPSNGATFYNQKFKGKATLTVDRSSSTAYMHLNSLTSEDSAVYYCTRSHLLRASWFAYWGQGTLVTVSA
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^^^°          


In [9]:
baseline = Chain('EVQLVQSGAEVKKPGESLKISCKGSGYSFTDYYMKWARQMPGKGLEWMGDIIPSNGATFYNQKFKGQVTISADKSISTTYLQWSSLKASDTAMYYCARSHLLRASWFAYWGQGTMVTVSS', name='Example', scheme='kabat')
baseline.print(numbering=True)

0        1         2         3         4         5          6         7         8            9         10           11  
123456789012345678901234567890123456789012345678901223456789012345678901234567890122223456789012345678900001234567890123
                                                    A                              ABC                  ABC             
EVQLVQSGAEVKKPGESLKISCKGSGYSFTDYYMKWARQMPGKGLEWMGDIIPSNGATFYNQKFKGQVTISADKSISTTYLQWSSLKASDTAMYYCARSHLLRASWFAYWGQGTMVTVSS
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^^^°          


In [10]:
predicted = Chain('QVQLVQSGAEVKKPGASVKVSCKASGYTFTDYYMKWVRQAPGQGLEWMGDIIPSNGATFYNQKFKGRVTLTRDTSTSTAYMELNSLTSEDTAVYYCARSHLLRASWFAYWGQGTLVTVSS', name='Example', scheme='kabat')
predicted.print(numbering=True)

0        1         2         3         4         5          6         7         8            9         10           11  
123456789012345678901234567890123456789012345678901223456789012345678901234567890122223456789012345678900001234567890123
                                                    A                              ABC                  ABC             
QVQLVQSGAEVKKPGASVKVSCKASGYTFTDYYMKWVRQAPGQGLEWMGDIIPSNGATFYNQKFKGRVTLTRDTSTSTAYMELNSLTSEDTAVYYCARSHLLRASWFAYWGQGTLVTVSS
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^^^°          


# Forward mutations

In [11]:
aligned = baseline.align(parental, predicted)

print('Humanized Experimental')
print('↑↑↑↑↑')
print('Parental')
print('↓↓↓↓↓')
print('Humanized Predicted')
print()

print(aligned)

for pos, (aa, bb, cc) in aligned:
    mutated = aa != bb or bb != cc
    print('x' if mutated else ' ', end='')
print()

for pos, (aa, bb, cc) in aligned:
    mutated = aa != bb or bb != cc
    print('*' if mutated and pos.is_in_vernier() else ' ', end='')
print()
print('x Mutated anywhere')
print('* Mutated Vernier')
print()

Humanized Experimental
↑↑↑↑↑
Parental
↓↓↓↓↓
Humanized Predicted

EVQLVQSGAEVKKPGESLKISCKGSGYSFTDYYMKWARQMPGKGLEWMGDIIPSNGATFYNQKFKGQVTISADKSISTTYLQWSSLKASDTAMYYCARSHLLRASWFAYWGQGTMVTVSS
||||.|||.|+.|||.|+|+|||.|||+||||||||.+|..||.|||+||||||||||||||||||+.|++.|+|.||.|+..+||.+.|+|+|||.|||||||||||||||||+||||+
EVQLQQSGPELVKPGASVKMSCKASGYTFTDYYMKWVKQSHGKSLEWIGDIIPSNGATFYNQKFKGKATLTVDRSSSTAYMHLNSLTSEDSAVYYCTRSHLLRASWFAYWGQGTLVTVSA
+|||.|||.|+.|||||||+|||||||||||||||||+|+.|+.|||+||||||||||||||||||+.|||.|.|+|||||.||||||||+|||||.||||||||||||||||||||||+
QVQLVQSGAEVKKPGASVKVSCKASGYTFTDYYMKWVRQAPGQGLEWMGDIIPSNGATFYNQKFKGRVTLTRDTSTSTAYMELNSLTSEDTAVYYCARSHLLRASWFAYWGQGTLVTVSS
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^^^°          
x   x   x xx   x x x   x   x        xx xx xx   x                  xx xxx x x  x xxxx  xxx x x   x                 x    x
                           *                   *                   * * * *    *         

In [14]:
evaluate_forward_mutation_overlap(baseline, parental, predicted)

shared_both        12
shared_position     7
predicted_only      2
baseline_only      16
Name: Example, dtype: int64

In [15]:
evaluate_vernier_forward_mutation_overlap(baseline, parental, predicted)

shared_both        3
shared_position    2
predicted_only     0
baseline_only      3
Name: Example, dtype: int64

# Backmutations

In [16]:
aligned = parental.align(baseline, baseline.find_merged_human_germline().renumber('kabat'))

print('Parental')
print('↓↓↓↓↓')
print('Humanized')
print('↓↓↓↓↓')
print('Germline')
print()

print(aligned)

for pos, (parental_aa, predicted_aa, germline_aa) in aligned:
    backmutated = germline_aa != predicted_aa and parental_aa == predicted_aa
    print('x' if backmutated else ' ', end='')
print()

for pos, (parental_aa, predicted_aa, germline_aa) in aligned:
    backmutated = germline_aa != predicted_aa and parental_aa == predicted_aa
    print('*' if backmutated and pos.is_in_vernier() else ' ', end='')
print()
print('x Back-mutated anywhere')
print('* Back-mutated Vernier')
print()

Parental
↓↓↓↓↓
Humanized
↓↓↓↓↓
Germline

EVQLQQSGPELVKPGASVKMSCKASGYTFTDYYMKWVKQSHGKSLEWIGDIIPSNGATFYNQKFKGKATLTVDRSSSTAYMHLNSLTSEDSAVYYCTRSHLLRASWFAYWGQGTLVTVSA
||||.|||.|+.|||.|+|+|||.|||+||||||||.+|..||.|||+||||||||||||||||||+.|++.|+|.||.|+..+||.+.|+|+|||.|||||||||||||||||+||||+
EVQLVQSGAEVKKPGESLKISCKGSGYSFTDYYMKWARQMPGKGLEWMGDIIPSNGATFYNQKFKGQVTISADKSISTTYLQWSSLKASDTAMYYCARSHLLRASWFAYWGQGTMVTVSS
||||||||||||||||||||||||||||||.|+..|.||||||||||||.|.|.+..|.|+..|+|||||||||||||.|||||||||||||||||||+.........+|||||+|||||
EVQLVQSGAEVKKPGESLKISCKGSGYSFTSYWTGWVRQMPGKGLEWMGIIYPGDSDTRYSPSFQGQVTISADKSISTAYLQWSSLKASDTAMYYCARAEYF-----QHWGQGTLVTVSS
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^^^°          
                              x xxx              x x xxxx x xxx x                                 xxxxxxxxxxx           
                                                                                                                

In [21]:
aligned = parental.align(predicted, predicted.find_merged_human_germline().renumber('kabat'))

print('Parental')
print('↓↓↓↓↓')
print('Humanized')
print('↓↓↓↓↓')
print('Germline')
print()

print(aligned)

for pos, (parental_aa, predicted_aa, germline_aa) in aligned:
    backmutated = germline_aa != predicted_aa and parental_aa == predicted_aa
    print('x' if backmutated else ' ', end='')
print()

for pos, (parental_aa, predicted_aa, germline_aa) in aligned:
    backmutated = germline_aa != predicted_aa and parental_aa == predicted_aa
    print('*' if backmutated and pos.is_in_vernier() else ' ', end='')
print()
print('x Back-mutated anywhere')
print('* Back-mutated Vernier')
print()

Parental
↓↓↓↓↓
Humanized
↓↓↓↓↓
Germline

EVQLQQSGPELVKPGASVKMSCKASGYTFTDYYMKWVKQSHGKSLEWIGDIIPSNGATFYNQKFKGKATLTVDRSSSTAYMHLNSLTSEDSAVYYCTRSHLLRASWFAYWGQGTLVTVSA
+|||.|||.|+.|||||||+|||||||||||||||||+|+.|+.|||+||||||||||||||||||+.|||.|.|+|||||.||||||||+|||||.||||||||||||||||||||||+
QVQLVQSGAEVKKPGASVKVSCKASGYTFTDYYMKWVRQAPGQGLEWMGDIIPSNGATFYNQKFKGRVTLTRDTSTSTAYMELNSLTSEDTAVYYCARSHLLRASWFAYWGQGTLVTVSS
||||||||||||||||||||||||||||||.|||.||||||||||||||.|.||.|+|.|.|||+||||+||||||||.||||+||.|||||||||||+.........+|||||||||||
QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYYMHWVRQAPGQGLEWMGIINPSGGSTSYAQKFQGRVTMTRDTSTSTVYMELSSLRSEDTAVYYCARAEYF-----QHWGQGTLVTVSS
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^^^°          
                              x   x              x x  x x x x   x    x        x    x  x           xxxxxxxxxxx           
                                                                     *        *                                 

In [22]:
evaluate_back_mutation_overlap(baseline, parental, predicted, pos_filter=lambda pos: not pos.is_in_cdr())

shared_both        0
shared_position    0
predicted_only     4
baseline_only      0
Name: Example, dtype: int64

In [23]:
evaluate_vernier_back_mutation_overlap(baseline, parental, predicted)

shared_both        0
shared_position    0
predicted_only     2
baseline_only      0
Name: Example, dtype: int64