In [1]:
import os
from abnumber import Chain, Position, Alignment
import pandas as pd

In [2]:
SCHEME = 'kabat'
CDR_DEFINITION = 'kabat'
TASK_DIR = '../../data/tasks/therapeutic_rediscovery'

In [3]:
def read_chains(oasis_path):
    vh_table = pd.read_excel(oasis_path, sheet_name='VH', index_col=0)
    vl_table = pd.read_excel(oasis_path, sheet_name='VL', index_col=0)
    scheme = vh_table['scheme'].iloc[0]
    assert scheme == SCHEME, f'Expected OASis in {SCHEME}, got {scheme} in: {oasis_path}'
    vh_chains = Chain.from_dataframe(vh_table, scheme=SCHEME, as_series=True)    
    vl_chains = Chain.from_dataframe(vl_table, scheme=SCHEME, as_series=True)
    return vh_chains, vl_chains

## Parental (murine) chains

In [4]:
parental_heavy, parental_light = read_chains(os.path.join(TASK_DIR, f'oas_cdr_hits/parental_oasis.xlsx'))

In [5]:
print(parental_heavy[0])
parental_heavy[0].graft_cdrs_onto_human_germline()

QVQLQQSGAELAKPGASVKLSCKASGYTFTSFWMHWVKQRPGQGLEWIGYINPRSGYTEYNEIFRDKATLTADKSSSTAYMQLSSLTSEDSAVYYCARFLGRGAMDYWGQGTSVTVSS


QVQLVQSGAEVKKPGASVKVSCKASGYTFTSFWMHWVRQAPGQGLEWMGYINPRSGYTEYNEIFRDRVTMTRDTSTSTVYMELSSLRSEDTAVYYCARFLGRGAMDYWGQGTMVTVSS
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^°          

In [6]:
print(parental_light[0])
parental_light[0].graft_cdrs_onto_human_germline()

DIQMTQTTSSLSASLGDRVTISCRASQDISNYLAWYQQKPDGTIKLLIYYTSKIHSGVPSRFSGSGSGTDYSLTISNLEQEDIATYFCQQGNTFPYTFGGGTKLEIK


DIQMTQSPSSLSASVGDRVTITCRASQDISNYLAWYQQKPGKVPKLLIYYTSKIHSGVPSRFSGSGSGTDFTLTISSLQPEDVATYYCQQGNTFPYTFGQGTKLEIK
 ° °                   ^^^^^^^^^^^°°         °°°°^^^^^^^       ° ° °° °                 ^^^^^^^^^°         

## Experimentally humanized sequences

In [7]:
experimental_heavy, experimental_light = read_chains(os.path.join(TASK_DIR, f'thera/humanized_oasis.xlsx'))

In [44]:
def get_germline_names(chain):
    v_chains, j_chains = chain.find_human_germlines(1)
    return (
        v_chains[0].name.split('*')[0],
        j_chains[0].name.split('*')[0]
    )

In [49]:
genes_heavy = experimental_heavy.apply(get_germline_names)
genes_heavy.head()

Abituzumab           (IGHV1-46, IGHJ3)
Afutuzumab           (IGHV1-69, IGHJ4)
Alacizumab            (IGHV3-7, IGHJ3)
Alemtuzumab        (IGHV4-30-4, IGHJ4)
Anrukinzumab    (IGHV3/OR16-10, IGHJ4)
dtype: object

In [50]:
genes_light = experimental_light.apply(get_germline_names)
genes_light.head()

Abituzumab      (IGKV1-33, IGKJ1)
Afutuzumab      (IGKV2-28, IGKJ4)
Alacizumab      (IGKV1-16, IGKJ1)
Alemtuzumab     (IGKV1-33, IGKJ1)
Anrukinzumab    (IGKV1-39, IGKJ1)
dtype: object

In [51]:
genes_heavy.value_counts(normalize=True).head(10).apply('{:.0%}'.format)

(IGHV1-46, IGHJ4)     11%
(IGHV3-23, IGHJ4)     10%
(IGHV1-2, IGHJ4)       7%
(IGHV1-69, IGHJ4)      7%
(IGHV1-3, IGHJ4)       4%
(IGHV3-66, IGHJ4)      3%
(IGHV7-4-1, IGHJ4)     2%
(IGHV4-4, IGHJ4)       2%
(IGHV3-74, IGHJ4)      2%
(IGHV1-18, IGHJ4)      2%
dtype: object

In [52]:
genes_light.value_counts(normalize=True).head(10).apply('{:.0%}'.format)

(IGKV1-39, IGKJ1)    20%
(IGKV1-33, IGKJ1)    10%
(IGKV1-39, IGKJ4)     8%
(IGKV4-1, IGKJ2)      5%
(IGKV1-39, IGKJ2)     4%
(IGKV4-1, IGKJ1)      4%
(IGKV2-28, IGKJ1)     3%
(IGKV3-11, IGKJ4)     2%
(IGKV1-16, IGKJ1)     2%
(IGKV3-11, IGKJ2)     2%
dtype: object

## Straight CDR grafting

In [53]:
manual_straight_heavy = [c.graft_cdrs_onto_human_germline(*genes_heavy.loc[c.name]) for c in parental_heavy]
manual_straight_light = [c.graft_cdrs_onto_human_germline(*genes_light.loc[c.name]) for c in parental_light]

In [12]:
automatic_straight_heavy = [c.graft_cdrs_onto_human_germline() for c in parental_heavy]
automatic_straight_light = [c.graft_cdrs_onto_human_germline() for c in parental_light]

## Vernier+CDR grafting

In [54]:
manual_vernier_heavy = [c.graft_cdrs_onto_human_germline(*genes_heavy.loc[c.name], backmutate_vernier=True) for c in parental_heavy]
manual_vernier_light = [c.graft_cdrs_onto_human_germline(*genes_light.loc[c.name], backmutate_vernier=True) for c in parental_light]

In [24]:
automatic_vernier_heavy = [c.graft_cdrs_onto_human_germline(backmutate_vernier=True) for c in parental_heavy]
automatic_vernier_light = [c.graft_cdrs_onto_human_germline(backmutate_vernier=True) for c in parental_light]

## Alignments

In [55]:
LIMIT = 10

In [56]:
for parental, straight, vernier in zip(parental_heavy[:LIMIT], manual_straight_heavy[:LIMIT], manual_vernier_heavy[:LIMIT]):
    print(parental.name+' (parental -> vernier -> straight):')
    print(parental.align(vernier, straight))

Abituzumab (parental -> vernier -> straight):
QVQLQQSGAELAKPGASVKLSCKASGYTFTSFWMHWVKQRPGQGLEWIGYINPRSGYTEYNEIFRDKATLTADKSSSTAYMQLSSLTSEDSAVYYCARFLGRGAMDYWGQGTSVTVSS
||||.|||||+.|||||||+|||||||||||||||||+|.||||||||||||||||||||||||||+||||||||+|||||+||||.|||+|||||||||||||||||||||.|||||
QVQLVQSGAEVKKPGASVKVSCKASGYTFTSFWMHWVRQAPGQGLEWIGYINPRSGYTEYNEIFRDRATLTADKSTSTAYMELSSLRSEDTAVYYCARFLGRGAMDYWGQGTMVTVSS
|||||||||||||||||||||||||||||||||||||||||||||||+|||||||||||||||||||.|+|.|.||||.|||||||||||||||||||||||||||||||||||||||
QVQLVQSGAEVKKPGASVKVSCKASGYTFTSFWMHWVRQAPGQGLEWMGYINPRSGYTEYNEIFRDRVTMTRDTSTSTVYMELSSLRSEDTAVYYCARFLGRGAMDYWGQGTMVTVSS
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^°          
Afutuzumab (parental -> vernier -> straight):
QVQLQQSGPELVKPGASVKISCKASGYAFSYSWINWVKQRPGQGLEWIGRIFPGDGDTDYNGKFKGKATLTADKSSSTAYMHLSSLTSVDSAVYFCARNVFDGYWLVYWGQGTLVTVSA
||||.|||.|+.|||+|||+|||||||||||||||||+|.||||||||||||||||||||||||||+|||||||

In [57]:
for parental, straight, vernier in zip(parental_light[:LIMIT], manual_straight_light[:LIMIT], manual_vernier_light[:LIMIT]):
    print(parental.name+' (parental -> vernier -> straight):')
    print(parental.align(vernier, straight))

Abituzumab (parental -> vernier -> straight):
DIQMTQTTSSLSASLGDRVTISCRASQDISNYLAWYQQKPDGTIKLLIYYTSKIHSGVPSRFSGSGSGTDYSLTISNLEQEDIATYFCQQGNTFPYTFGGGTKLEIK
||||||+.||||||+||||||+||||||||||||||||||....|||||||||||||||||||||||||||+.|||+|+.||||||+||||||||||||.|||+|||
DIQMTQSPSSLSASVGDRVTITCRASQDISNYLAWYQQKPGKAPKLLIYYTSKIHSGVPSRFSGSGSGTDYTFTISSLQPEDIATYYCQQGNTFPYTFGQGTKVEIK
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||+||||||||||||||||||||||||||||||||||||
DIQMTQSPSSLSASVGDRVTITCRASQDISNYLAWYQQKPGKAPKLLIYYTSKIHSGVPSRFSGSGSGTDFTFTISSLQPEDIATYYCQQGNTFPYTFGQGTKVEIK
 ° °                   ^^^^^^^^^^^°°         °°°°^^^^^^^       ° ° °° °                 ^^^^^^^^^°         
Afutuzumab (parental -> vernier -> straight):
DIVMTQAAFSNPVTLGTSASISCRSSKSLLHSNGITYLYWYLQKPGQSPQLLIYQMSNLVSGVPDRFSGSGSGTDFTLRISRVEAEDVGVYYCAQNLELPYTFGGGTKLEIK
||||||+..|.|||.|..||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||+|||||||||||||||||||||||||||||+|||
DIVMTQSPLSLPVTPGEPASISCRSSKSLLHSNG

## Save results

In [19]:
def save_pairs(heavy_chains, light_chains, path):
    assert len(heavy_chains) == len(light_chains)
    with open(path, 'w') as f:
        for heavy, light in zip(heavy_chains, light_chains):
            Chain.to_fasta(heavy, f, description='VH')
            Chain.to_fasta(light, f, description='VL')

### Manual (matched) pairs

In [58]:
save_pairs(manual_straight_heavy, manual_straight_light, os.path.join(TASK_DIR, 'cdr_grafts_manual/straight_grafts.fa'))

In [59]:
save_pairs(manual_vernier_heavy, manual_vernier_light, os.path.join(TASK_DIR, 'cdr_grafts_manual/vernier_grafts.fa'))

In [60]:
!cd ../../; make data/tasks/therapeutic_rediscovery/cdr_grafts_manual/straight_grafts_oasis.xlsx 
!cd ../../; make data/tasks/therapeutic_rediscovery/cdr_grafts_manual/vernier_grafts_oasis.xlsx

source $(conda info --base)/bin/activate biophi && biophi oasis \
        data/tasks/therapeutic_rediscovery/cdr_grafts_matched/straight_grafts.fa \
        --output data/tasks/therapeutic_rediscovery/cdr_grafts_matched/straight_grafts_oasis.xlsx \
        --oas-db sqlite:///../biophi/work/oas_clean_human_subject_9mers_2019_11.db \
        --proteome-db sqlite:///../biophi/work/uniprot_human_proteome_9mers.db
      __     ____  _       ____  _     _      ___    _   ____  _
  /| /  \   | __ )(_) ___ |  _ \| |__ (_)    / _ \  / \ / ___|(_)___
 ( || [] )  |  _ \| |/ _ \| |_) | '_ \| |   | | | |/ _ \\___ \| / __|
  \_   _/   | |_) | | (_) |  __/| | | | |   | |_| / ___ \___| | \__ \
    | |     |____/|_|\___/|_|   |_| |_|_|    \___/_/   \_\___/|_|___/
    |_|                       version 1.0                  version 1.0
────────────────────────────────────────────────────────────────────────

Settings:
- OAS database: sqlite:///../biophi/work/oas_clean_human_subject_9mers_2019_11.db
- Prot

In [61]:
!cd ../../; make data/tasks/therapeutic_rediscovery/cdr_grafts_manual/straight_grafts_T20.tsv
!cd ../../; make data/tasks/therapeutic_rediscovery/cdr_grafts_manual/vernier_grafts_T20.tsv

bin/humanness_t20_score.py data/tasks/therapeutic_rediscovery/cdr_grafts_matched/straight_grafts.fa data/tasks/therapeutic_rediscovery/cdr_grafts_matched/straight_grafts_T20.tsv
Note: The sequences will be processed through lakepharma T20 service! Sleeping for 10s, press Ctrl+C to cancel...
Processing...
43it [02:23,  3.31s/it]HTTPSConnectionPool(host='dm.lakepharma.com', port=443): Max retries exceeded with url: /cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=EIVLTQSPATLSLSPGERATLSCRASESVDSYGKSFMHWYQQKPGQAPRLLIYRASNLESGIPARFSGSGSGTDFTLTISSLEPEDFAVYYCQQSNEDPWTFGQGTKVEIK (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1124)')))
Retry 1
304it [19:26,  3.84s/it]
Saved to: data/tasks/therapeutic_rediscovery/cdr_grafts_matched/straight_grafts_T20.tsv
bin/humanness_t20_score.py data/tasks/therapeutic_rediscovery/cdr_grafts_matched/vernier_grafts.fa data/tasks/therapeutic_rediscovery/cdr_grafts_matched/vernier_grafts_T20.tsv
Note: The sequences will be proc

### Automatic pairs

In [22]:
save_pairs(automatic_straight_heavy, automatic_straight_light, os.path.join(TASK_DIR, 'cdr_grafts_automatic/straight_grafts.fa'))

In [25]:
save_pairs(automatic_vernier_heavy, automatic_vernier_light, os.path.join(TASK_DIR, 'cdr_grafts_automatic/vernier_grafts.fa'))

In [None]:
!cd ../../; make data/tasks/therapeutic_rediscovery/cdr_grafts_automatic/straight_grafts_oasis.xlsx 
!cd ../../; make data/tasks/therapeutic_rediscovery/cdr_grafts_automatic/vernier_grafts_oasis.xlsx

In [None]:
!cd ../../; make data/tasks/therapeutic_rediscovery/cdr_grafts_automatic/straight_grafts_T20.tsv
!cd ../../; make data/tasks/therapeutic_rediscovery/cdr_grafts_automatic/vernier_grafts_T20.tsv