In [163]:
import pandas as pd
from Bio import SeqIO
import os

# Import Data

In [155]:
# file path
data_dir = '/Users/s236922/code/data/Kreye-JEM-2021/'
meta_data = pd.read_excel(f'{data_dir}/input/aBASE.xlsm', header=1)

# RhABs of interest (from patient ID 113)
pos_rhabs = [101, 115, 175, 198, 201]
neg_rhabs = [e for e in meta_data['Event No'] if e not in pos_rhabs]
all_rhabs = pos_rhabs + neg_rhabs

# Conver selected files

In [173]:
# find file names from meta_data
# Seq_ID = Heavy
# Seq_ID.1 = Kappa
# Seq_ID.2 = Lambda
selected = meta_data[meta_data['Event No'].isin(pos_rhabs)].loc[:,['Event No', 'Seq', 'Seq_ID', 'Seq_ID.1', 'Seq_ID.2']].reset_index(drop=True)
selected

Unnamed: 0,Event No,Seq,Seq_ID,Seq_ID.1,Seq_ID.2
0,101,H/L,88FJ02_C05,,88FJ01_H06
1,115,H/L,88FJ02_D01,,88FJ01_H11
2,175,H/K,88FJ00_D06,88FJ00_H04,
3,198,H/K,88FJ00_E05,88FI11,
4,201,H/K,88FJ00_E06,88FI12,


In [129]:
print([e for e in selected['Event No']])

[101, 115, 175, 198, 201]


In [None]:
# convert ab1 to fasta
output_path = f"{data_dir}SeqData-fasta/all-pos.fasta"
if os.path.exists(output_path):
    os.remove(output_path)

for event_no in selected['Event No']:
    e = selected[selected['Event No'] == event_no].reset_index(drop=True)
    print(e)
    print(e['Seq'][0])

    if e['Seq'][0] == 'x':
        continue
    
    chains = e['Seq'][0].split('/')
    for chain in chains:
        if chain == 'H':
            seq = e['Seq_ID']
            chain_type = 'heavy'
        elif chain == 'K':
            seq = e['Seq_ID.1']
            chain_type = 'kappa'
        elif chain == 'L':
            seq = e['Seq_ID.2']
            chain_type = 'lambda'
        seq = seq[0]
        with open(f"{data_dir}SeqData/{seq}.ab1", 'b+r') as input_handle, open(output_path, "a") as output_handle:
            sequences = []
            for record in SeqIO.parse(input_handle, "abi"):
                # Customize the FASTA header
                record.id = f"113-{e['Event No'][0]}.{chain_type}.{seq}"
                # record.description = f"Custom description for {seq}, event {e['Event No'][0]}, chain {chain_type}"
                sequences.append(record)
            
            # sequences = SeqIO.parse(input_handle, "abi")
            count = SeqIO.write(sequences, output_handle, "fasta")
            print("Converted %i records" % count)

   Event No  Seq      Seq_ID Seq_ID.1    Seq_ID.2
0       101  H/L  88FJ02_C05      NaN  88FJ01_H06
H/L
Converted 1 records
Converted 1 records
   Event No  Seq      Seq_ID Seq_ID.1    Seq_ID.2
0       115  H/L  88FJ02_D01      NaN  88FJ01_H11
H/L
Converted 1 records
Converted 1 records
   Event No  Seq      Seq_ID    Seq_ID.1 Seq_ID.2
0       175  H/K  88FJ00_D06  88FJ00_H04      NaN
H/K
Converted 1 records
Converted 1 records
   Event No  Seq      Seq_ID Seq_ID.1 Seq_ID.2
0       198  H/K  88FJ00_E05   88FI11      NaN
H/K
Converted 1 records
Converted 1 records
   Event No  Seq      Seq_ID Seq_ID.1 Seq_ID.2
0       201  H/K  88FJ00_E06   88FI12      NaN
H/K
Converted 1 records
Converted 1 records
