In [14]:
import pandas as pd
import numpy as np
import os
from abnumber import Chain
from Bio import SeqIO
import requests
from io import StringIO
import time
from tqdm import tqdm
import seaborn as sns
from bin.utils import iterate_fasta
import re
import time

%config InlineBackend.figure_format = 'retina'

In [15]:
SCHEME = 'kabat'
TASK_DIR = '../../data/tasks/humab_25_pairs'

In [25]:
results = pd.read_excel('../../data/tasks/humab_25_pairs/pairs/humab_results.xlsx', index_col=0)
results.head()

Unnamed: 0_level_0,V Gene,Initial Score,Target Score,# Exp. Mutations,# Hu-mAb Mutations,Mutation Ratio,Overlap Ratio,Adjusted OR
Therapeutic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AntiCD28,kv4,0.05,0.805,19,11,0.58,0.64,0.73
Campath,kv1,0.724,0.842,14,3,0.21,0.67,0.67
Bevacizumab,kv1,0.017,0.899,16,9,0.56,0.89,1.0
Herceptin,kv1,0.032,0.776,22,8,0.36,0.88,0.88
Omalizumab,kv1,0.081,0.874,25,19,0.76,0.89,0.95


In [16]:
def read_chains(oasis_path):
    vh_table = pd.read_excel(oasis_path, sheet_name='VH', index_col=0)
    vl_table = pd.read_excel(oasis_path, sheet_name='VL', index_col=0)
    scheme = vh_table['scheme'].iloc[0]
    assert scheme == SCHEME, f'Expected OASis in {SCHEME}, got {scheme} in: {oasis_path}'
    vh_chains = Chain.from_dataframe(vh_table, scheme=SCHEME, as_series=True)    
    vl_chains = Chain.from_dataframe(vl_table, scheme=SCHEME, as_series=True)
    return vh_chains, vl_chains

In [17]:
parental_heavy = {}
parental_light = {}
for record in iterate_fasta(os.path.join(TASK_DIR, f'pairs/parental.fa')):
    if record.description.endswith(' VH'):
        parental_heavy[record.id] = str(record.seq)
    elif record.description.endswith(' VL'):
        parental_light[record.id] = str(record.seq)
    else:
        raise ValueError(record.description)

In [18]:
parental_heavy['Pembrolizumab']

'QVQLQQPGAELVKPGTSVKLSCKASGYTFTNYYMYWVKQRPGQGLEWIGGINPSNGGTNFNEKFKNKATLTVDSSSSTTYMQLSSLTSEDSAVYYCTRRDYRFDMGFDYWGQGTTLTVSS'

In [19]:
parental_light['Pembrolizumab']

'DIVLTQSPASLAVSLGQRAAISCRASKGVSTSGYSYLHWYQQKPGQSPKLLIYLASYLESGVPARFSGSGSGTDFTLNIHPVEEEDAATYYCQHSRDLPLTFGTGTKLELK'

In [20]:
def submit_humanization(h_sequence, h_v_gene, h_threshold, l_sequence, l_v_gene, l_threshold, jobname='Test'):
    data = {
        'h_sequence': h_sequence,
        'h_v_gene': h_v_gene,
        'h_threshold': h_threshold,
        'l_sequence': l_sequence,
        'l_v_gene': l_v_gene,
        'l_threshold': l_threshold,
        'jobname': jobname
    }
    job_request = requests.post('http://opig.stats.ox.ac.uk/webapps/newsabdab/sabpred/humab', data=data)
    jobid = job_request.url.strip('/').split('/')[-1]
    return jobid

def get_humanization_results(jobid, jobname='Test'):
    url = f'http://opig.stats.ox.ac.uk/webapps/newsabdab/sabpred/humab_results/{jobid}'
    heavy_url = f'http://opig.stats.ox.ac.uk/webapps/newsabdab/sabpred/humab_results/{jobid}/{jobname}/heavy'
    light_url = f'http://opig.stats.ox.ac.uk/webapps/newsabdab/sabpred/humab_results/{jobid}/{jobname}/light'
    for wait in range(100):
        result_request = None
        for retry in range(100):
            try:
                result_request = requests.get(url)
                break
            except:
                time.sleep(retry * 5)
        assert result_request is not None, f'Retry failed: {url}'
        time.sleep(wait * 5)
        if 'Error loading Hu-mAb results' in result_request.text or '>failed<' in result_request.text:
            raise ValueError('Hu-mAb error') 
        heavy_request = requests.get(heavy_url)
        if heavy_request.ok:
            heavy_text = heavy_request.text
            light_text = requests.get(light_url).text
            break
    
    regex = re.compile('humanised sequence: (.*)')
    try:
        heavy_seq = regex.search(heavy_text).group(1).strip()
    except:
        print(heavy_text)
        raise ValueError()
    try:
        light_seq = regex.search(light_text).group(1).strip()
    except:
        print(light_text)
        raise ValueError()
    return heavy_seq, light_seq

In [21]:
!mkdir -p {TASK_DIR}/humab/tmp

In [22]:
out_dir = os.path.join(TASK_DIR, 'humab', 'tmp')

In [23]:
SKIP = []

In [None]:
for i, name in enumerate(parental_heavy.keys()):
    if name in SKIP:
        print('Skipping', name)
        continue
    outpath = os.path.join(out_dir, name+'.fa')
    print(i+1, name, outpath)
    if os.path.exists(outpath):
        continue
    jobname = name+'_25pairs'
    jobid_path = os.path.join(out_dir, name+'.jobid')
    if os.path.exists(jobid_path):
        with open(jobid_path, 'rt') as f:
            jobid = f.read().strip()
    else:
        jobid = submit_humanization(
            h_sequence=parental_heavy[name], 
            h_v_gene='', 
            h_threshold=1, 
            l_sequence=parental_light[name], 
            l_v_gene='', 
            l_threshold=1, 
            jobname=jobname
        )
        with open(jobid_path, 'wt') as f:
            f.write(f'{jobid}\n')
    
    print('  Waiting for job:', 'http://opig.stats.ox.ac.uk/webapps/newsabdab/sabpred/humab_results/'+jobid)
    try:
        heavy_seq, light_seq = get_humanization_results(jobid, jobname=jobname)
        with open(outpath, 'wt') as f:
            f.write(f'>{name} VH (threshold 1)\n')
            f.write(f'{heavy_seq}\n')
            f.write(f'>{name} VL (threshold 1)\n')
            f.write(f'{light_seq}\n')
    except Exception as e:
        print('  FAILED', str(e))


1 AntiCD28 ../../data/tasks/humab_25_pairs/humab/tmp/AntiCD28.fa
  Waiting for job: http://opig.stats.ox.ac.uk/webapps/newsabdab/sabpred/humab_results/20210311_0487374
  FAILED Hu-mAb error
2 Campath ../../data/tasks/humab_25_pairs/humab/tmp/Campath.fa
  Waiting for job: http://opig.stats.ox.ac.uk/webapps/newsabdab/sabpred/humab_results/20210312_0203231
  FAILED Hu-mAb error
3 Bevacizumab ../../data/tasks/humab_25_pairs/humab/tmp/Bevacizumab.fa
  Waiting for job: http://opig.stats.ox.ac.uk/webapps/newsabdab/sabpred/humab_results/20210318_0507171
