In [1]:
import os
from abnumber import Chain, Position, Alignment
import pandas as pd

In [2]:
SCHEME = 'kabat'
CDR_DEFINITION = 'kabat'
TASK_DIR = '../../data/tasks/humab_25_pairs'

In [3]:
def read_chains(oasis_path):
    vh_table = pd.read_excel(oasis_path, sheet_name='VH', index_col=0)
    vl_table = pd.read_excel(oasis_path, sheet_name='VL', index_col=0)
    scheme = vh_table['scheme'].iloc[0]
    assert scheme == SCHEME, f'Expected OASis in {SCHEME}, got {scheme} in: {oasis_path}'
    vh_chains = Chain.from_dataframe(vh_table, scheme=SCHEME, as_series=True)    
    vl_chains = Chain.from_dataframe(vl_table, scheme=SCHEME, as_series=True)
    return vh_chains, vl_chains

## Parental (murine) chains

In [4]:
parental_heavy, parental_light = read_chains(os.path.join(TASK_DIR, f'pairs/parental_oasis.xlsx'))

In [5]:
print(parental_heavy[0])
parental_heavy[0].graft_cdrs_onto_human_germline()

EVKLQQSGPGLVTPSQSLSITCTVSGFSLSDYGVHWVRQSPGQGLEWLGVIWAGGGTNYNSALMSRKSISKDNSKSQVFLKMNSLQADDTAVYYCARDKGYSYYYSMDYWGQGTSVTVSS


QVQLQESGPGLVKPSDTLSLTCTVSGGSISDYGVHWIRQPPGKGLEWIGVIWAGGGTNYNSALMSRVTISVDTSKNQFSLKLSSVTAADTAVYYCARDKGYSYYYSMDYWGQGTTVTVSS
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^^^^°          

In [6]:
print(parental_light[0])
parental_light[0].graft_cdrs_onto_human_germline()

DIETLQSPASLAVSLGQRATISCRASESVEYYVTSLMQWYQQKPGQPPKLLIFAASNVESGVPARFSGSGSGTNFSLNIHPVDEDDVAMYFCQQSRKYVPYTFGGGTKLEIK


DIVLTQSPASLAVSPGQRATITCRASESVEYYVTSLMQWYQQKPGQPPKLLIYAASNVESGVPARFSGSGSGTDFTLTINPVEANDTANYYCQQSRKYVPYTFGQGTKLEIK
 ° °                   ^^^^^^^^^^^^^^^°°         °°°°^^^^^^^       ° ° °° °                 ^^^^^^^^^^°         

## Straight CDR grafting

In [7]:
straight_heavy = [c.graft_cdrs_onto_human_germline() for c in parental_heavy]
straight_light = [c.graft_cdrs_onto_human_germline() for c in parental_light]

## Vernier+CDR grafting

In [8]:
vernier_heavy = [c.graft_cdrs_onto_human_germline(backmutate_vernier=True) for c in parental_heavy]
vernier_light = [c.graft_cdrs_onto_human_germline(backmutate_vernier=True) for c in parental_light]

## Alignments

In [9]:
for parental, straight, vernier in zip(parental_heavy, straight_heavy, vernier_heavy):
    print(parental.name+' (parental -> vernier -> straight):')
    print(parental.align(vernier, straight))

AntiCD28 (parental -> vernier -> straight):
EVKLQQSGPGLVTPSQSLSITCTVSGFSLSDYGVHWVRQSPGQGLEWLGVIWAGGGTNYNSALMSRKSISKDNSKSQVFLKMNSLQADDTAVYYCARDKGYSYYYSMDYWGQGTSVTVSS
+|+||+||||||.||.+||+||||||||||||||||+||.||+||||||||||||||||||||||||+|||||||+||.||++|+.|.||||||||||||||||||||||||||+|||||
QVQLQESGPGLVKPSDTLSLTCTVSGFSLSDYGVHWIRQPPGKGLEWLGVIWAGGGTNYNSALMSRKTISKDNSKNQVSLKLSSVTAADTAVYYCARDKGYSYYYSMDYWGQGTTVTVSS
||||||||||||||||||||||||||.|+||||||||||||||||||+||||||||||||||||||.|||.|.||||.||||||||||||||||||||||||||||||||||||||||||
QVQLQESGPGLVKPSDTLSLTCTVSGGSISDYGVHWIRQPPGKGLEWIGVIWAGGGTNYNSALMSRVTISVDTSKNQFSLKLSSVTAADTAVYYCARDKGYSYYYSMDYWGQGTTVTVSS
 °                        °°°°^^^^^           °°°^^^^^^^^^^^^^^^^ ° ° ° °    °                 °°^^^^^^^^^^^^°          
Bevacizumab (parental -> vernier -> straight):
EIQLVQSGPELKQPGETVRISCKASGYTFTNYGMNWVKQAPGKGLKWMGWINTYTGEPTYAADFKRRFTFSLETSASTAYLQISNLKNDDTATYFCAKYPHYYGSSHWYFDVWGAGTTVTVSS
+|||||||.|||+||.+|++|||||||||||||||||+||||+||+|||||||||||||

In [10]:
for parental, straight, vernier in zip(parental_light, straight_light, vernier_light):
    print(parental.name+' (parental -> vernier -> straight):')
    print(parental.align(vernier, straight))

AntiCD28 (parental -> vernier -> straight):
DIETLQSPASLAVSLGQRATISCRASESVEYYVTSLMQWYQQKPGQPPKLLIFAASNVESGVPARFSGSGSGTNFSLNIHPVDEDDVAMYFCQQSRKYVPYTFGGGTKLEIK
||.|.|||||||||.||||||+|||||||||||||||||||||||||||||||||||||||||||||||||||+|+|.|+||+.+|.|.|+|||||||||||||.|||||||
DIVTTQSPASLAVSPGQRATITCRASESVEYYVTSLMQWYQQKPGQPPKLLIFAASNVESGVPARFSGSGSGTDFTLTINPVEANDTANYYCQQSRKYVPYTFGQGTKLEIK
|||.||||||||||||||||||||||||||||||||||||||||||||||||+|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
DIVLTQSPASLAVSPGQRATITCRASESVEYYVTSLMQWYQQKPGQPPKLLIYAASNVESGVPARFSGSGSGTDFTLTINPVEANDTANYYCQQSRKYVPYTFGQGTKLEIK
 ° °                   ^^^^^^^^^^^^^^^°°         °°°°^^^^^^^       ° ° °° °                 ^^^^^^^^^^°         
Bevacizumab (parental -> vernier -> straight):
DIQMTQTTSSLSASLGDRVIISCSASQDISNYLNWYQQKPDGTVKVLIYFTSSLHSGVPSRFSGSGSGTDYSLTISNLEPEDIATYYCQQYSTVPWTFGGGTKLEIK
||||||+.||||||+||||.|+||||||||||||||||||....|||||||||||||||||||||||||||+.|||+|+||||||||||||||||||||.|||+|||
DIQMTQSPSSLSASV

## Save results

In [31]:
def save_pairs(heavy_chains, light_chains, path):
    assert len(heavy_chains) == len(light_chains)
    with open(path, 'w') as f:
        for heavy, light in zip(heavy_chains, light_chains):
            Chain.to_fasta(heavy, f, description='VH')
            Chain.to_fasta(light, f, description='VL')

In [32]:
save_pairs(straight_heavy, straight_light, os.path.join(TASK_DIR, 'cdr_grafts/straight_grafts.fa'))

In [33]:
save_pairs(vernier_heavy, vernier_light, os.path.join(TASK_DIR, 'cdr_grafts/vernier_grafts.fa'))

In [34]:
%%bash

cd ../../
make data/tasks/humab_25_pairs/cdr_grafts/straight_grafts_oasis.xlsx 
make data/tasks/humab_25_pairs/cdr_grafts/vernier_grafts_oasis.xlsx

source $(conda info --base)/bin/activate biophi && biophi oasis \
        data/tasks/humab_25_pairs/cdr_grafts/straight_grafts.fa \
        --output data/tasks/humab_25_pairs/cdr_grafts/straight_grafts_oasis.xlsx \
        --oas-db sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/oas_human_subject_9mers_2019_11.db \
        --proteome-db sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/uniprot_human_proteome_9mers.db
source $(conda info --base)/bin/activate biophi && biophi oasis \
        data/tasks/humab_25_pairs/cdr_grafts/vernier_grafts.fa \
        --output data/tasks/humab_25_pairs/cdr_grafts/vernier_grafts_oasis.xlsx \
        --oas-db sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/oas_human_subject_9mers_2019_11.db \
        --proteome-db sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/uniprot_human_proteome_9mers.db


      __     ____  _       ____  _     _      ___    _   ____  _
  /| /  \   | __ )(_) ___ |  _ \| |__ (_)    / _ \  / \ / ___|(_)___
 ( || [] )  |  _ \| |/ _ \| |_) | '_ \| |   | | | |/ _ \\___ \| / __|
  \_   _/   | |_) | | (_) |  __/| | | | |   | |_| / ___ \___| | \__ \
    | |     |____/|_|\___/|_|   |_| |_|_|    \___/_/   \_\___/|_|___/
    |_|                       version 1.0                  version 1.0
────────────────────────────────────────────────────────────────────────

Settings:
- OAS database: sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/oas_human_subject_9mers_2019_11.db
- Proteome database: sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/uniprot_human_proteome_9mers.db

Loading chains: data/tasks/humab_25_pairs/cdr_grafts/straight_grafts.fa
Running OASis on 25 antibodies...
100%|██████████| 25/25 [00:53<00:00,  2.13s/it]
Saved XLSX report to: data/tasks/humab_25_pairs/cdr_grafts/straight_grafts_oasis.xlsx
      __    

In [35]:
%%bash

cd ../../
make data/tasks/humab_25_pairs/cdr_grafts/straight_grafts_T20.tsv
make data/tasks/humab_25_pairs/cdr_grafts/vernier_grafts_T20.tsv

bin/humanness_t20_score.py data/tasks/humab_25_pairs/cdr_grafts/straight_grafts.fa data/tasks/humab_25_pairs/cdr_grafts/straight_grafts_T20.tsv
Note: The sequences will be processed through lakepharma T20 service! Sleeping for 10s, press Ctrl+C to cancel...
Processing...
Saved to: data/tasks/humab_25_pairs/cdr_grafts/straight_grafts_T20.tsv
bin/humanness_t20_score.py data/tasks/humab_25_pairs/cdr_grafts/vernier_grafts.fa data/tasks/humab_25_pairs/cdr_grafts/vernier_grafts_T20.tsv
Note: The sequences will be processed through lakepharma T20 service! Sleeping for 10s, press Ctrl+C to cancel...
Processing...
Saved to: data/tasks/humab_25_pairs/cdr_grafts/vernier_grafts_T20.tsv


50it [02:22,  2.84s/it]
50it [02:24,  2.89s/it]


# Custom germlines

In [38]:
#pairs = pd.read_csv('../../work/TheraSAbDab_Germlines_Included.csv')
#pairs = pairs[['Heavy Germlines','Light Germlines']].apply(lambda col: col.apply(lambda v: v.split('D-')[0].split('-')[0].split('/')[0]))
#pairs.apply(tuple, axis=1).value_counts()[:15]

(IGHV3, IGKV1)    127
(IGHV1, IGKV1)    101
(IGHV3, IGKV3)     54
(IGHV1, IGKV3)     40
(IGHV4, IGKV1)     28
(IGHV3, IGKV2)     26
(IGHV1, IGKV4)     24
(IGHV1, IGKV2)     23
(IGHV4, IGKV3)     20
(IGHV3, IGLV1)     20
(IGHV3, IGLV3)     10
(IGHV7, IGKV1)      9
(IGHV2, IGKV1)      8
(IGHV3, IGKV4)      8
(IGHV1, IGLV1)      7
dtype: int64

In [28]:
#pairs.apply(tuple, axis=1).value_counts()[:10].index

Index([('IGHV3', 'IGKV1'), ('IGHV1', 'IGKV1'), ('IGHV3', 'IGKV3'),
       ('IGHV1', 'IGKV3'), ('IGHV4', 'IGKV1'), ('IGHV3', 'IGKV2'),
       ('IGHV1', 'IGKV4'), ('IGHV1', 'IGKV2'), ('IGHV4', 'IGKV3'),
       ('IGHV3', 'IGLV1')],
      dtype='object')

In [39]:
#(pairs.apply(tuple, axis=1).value_counts()[:10].sum()-11) / len(pairs)

0.7635135135135135

In [14]:
c = vernier_heavy[0]

In [11]:
VH_GERMLINES = ['IGHV1','IGHV2','IGHV3','IGHV4','IGHV5','IGHV6','IGHV7']

In [12]:
VK_GERMLINES = ['IGKV1','IGKV2','IGKV3','IGKV4','IGKV5','IGKV6','IGKV7']

In [13]:
VL_GERMLINES = ['IGLV1','IGLV2','IGLV3','IGLV4','IGLV5','IGLV6','IGLV7']

In [40]:
GENE_PAIRS = [
    ('IGHV3', 'IGKV1'), 
    ('IGHV1', 'IGKV1'), 
    ('IGHV3', 'IGKV3'),
    ('IGHV1', 'IGKV3'), 
    ('IGHV4', 'IGKV1'), 
    ('IGHV3', 'IGKV2'),
    ('IGHV1', 'IGKV4'), 
    ('IGHV1', 'IGKV2'), 
    ('IGHV4', 'IGKV3'),
    ('IGHV7', 'IGKV1')
]

In [41]:
for heavy_gene, light_gene in GENE_PAIRS:
    custom_vernier_heavy = [c.graft_cdrs_onto_human_germline(v_gene=heavy_gene, backmutate_vernier=True) for c in parental_heavy]
    custom_vernier_light = [c.graft_cdrs_onto_human_germline(v_gene=light_gene, backmutate_vernier=True) for c in parental_light]
    save_pairs(
        custom_vernier_heavy, 
        custom_vernier_light, 
        os.path.join(TASK_DIR, f'cdr_grafts/custom/{heavy_gene}_{light_gene}_vernier_grafts.fa')
    )

In [43]:
%%bash

cd ../../

for file in data/tasks/humab_25_pairs/cdr_grafts/custom/*.fa; do
    make ${file/.fa/_oasis.xlsx}
done

source $(conda info --base)/bin/activate biophi && biophi oasis \
        data/tasks/humab_25_pairs/cdr_grafts/custom/IGHV1_IGKV1_vernier_grafts.fa \
        --output data/tasks/humab_25_pairs/cdr_grafts/custom/IGHV1_IGKV1_vernier_grafts_oasis.xlsx \
        --oas-db sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/oas_human_subject_9mers_2019_11.db \
        --proteome-db sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/uniprot_human_proteome_9mers.db
source $(conda info --base)/bin/activate biophi && biophi oasis \
        data/tasks/humab_25_pairs/cdr_grafts/custom/IGHV1_IGKV2_vernier_grafts.fa \
        --output data/tasks/humab_25_pairs/cdr_grafts/custom/IGHV1_IGKV2_vernier_grafts_oasis.xlsx \
        --oas-db sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/oas_human_subject_9mers_2019_11.db \
        --proteome-db sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/uniprot_human_proteome_9me

      __     ____  _       ____  _     _      ___    _   ____  _
  /| /  \   | __ )(_) ___ |  _ \| |__ (_)    / _ \  / \ / ___|(_)___
 ( || [] )  |  _ \| |/ _ \| |_) | '_ \| |   | | | |/ _ \\___ \| / __|
  \_   _/   | |_) | | (_) |  __/| | | | |   | |_| / ___ \___| | \__ \
    | |     |____/|_|\___/|_|   |_| |_|_|    \___/_/   \_\___/|_|___/
    |_|                       version 1.0                  version 1.0
────────────────────────────────────────────────────────────────────────

Settings:
- OAS database: sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/oas_human_subject_9mers_2019_11.db
- Proteome database: sqlite:////Users/prihodad/Documents/bioinformatics/projects/biophi/work/uniprot_human_proteome_9mers.db

Loading chains: data/tasks/humab_25_pairs/cdr_grafts/custom/IGHV1_IGKV1_vernier_grafts.fa
Running OASis on 25 antibodies...
100%|██████████| 25/25 [00:21<00:00,  1.17it/s]
Saved XLSX report to: data/tasks/humab_25_pairs/cdr_grafts/custom/IGHV1_IGKV1_ve