In [1]:
from Bio.PDB import PDBParser
from Bio.SeqUtils import seq1

from Bio import PDB

# Read sequence (using BioPDB)

In [2]:
def extract_sequence(chain_id, pdb_file_path):
    parser = PDBParser()
    structure = parser.get_structure('structure', pdb_file_path)
    sequence = ''

    for model in structure:
        for chain in model:
            if chain.id == chain_id:
                for residue in chain:
                    if residue.id[0] == ' ':  # Only standard amino acids
                        sequence += seq1(residue.resname, undef_code='X')
                return sequence
    return None


In [3]:
# Replace 'your_pdb_file.pdb' with the path to your PDB file
pdb_file_path = 'trast_H_pos_1_HLC_83922_unrelaxed_rank_001_alphafold2_multimer_v3_model_2_seed_000.pdb'
chain_a_sequence = extract_sequence('A', pdb_file_path)
chain_b_sequence = extract_sequence('B', pdb_file_path)

In [4]:
if chain_a_sequence:
    print(f"Chain A Sequence: {chain_a_sequence}")
else:
    print("Chain A not found or no sequence extracted.")

Chain A Sequence: EVQLVESGGGLVQPGGSLRLSCAASGFNIKDTYIHWVRQAPGKGLEWVARIYPTNGYTRYADSVKGRFTISADTSKNTAYLQMNSLRAEDTAVYYCSRFSNVNYYAFAYWGQGTLVTVSS


In [5]:
if chain_b_sequence:
    print(f"Chain B Sequence: {chain_b_sequence}")
else:
    print("Chain B not found or no sequence extracted.")

Chain B Sequence: DIQMTQSPSSLSASVGDRVTITCRASQDVNTAVAWYQQKPGKAPKLLIYSASFLYSGVPSRFSGSRSGTDFTLTISSLQPEDFATYYCQQHYTTPPTFGQGTKVEIK


# Numbering using Anarci

In [14]:
from anarci import anarci

def number_sequence(chain_a_sequence,chain_id):
    # Make sure chain_a_sequence is not empty
    if chain_a_sequence:
        # Properly handling the output based on the inspected structure
        results = anarci([(chain_id, chain_a_sequence)], scheme="chothia")

        numbered, details = results[0][0], results[1][0]

        return(numbered, details)

        # for i in range( len( numbered ) ): # Iterate over the identified domains (e.g. for an scfv)
        #     numbering = [ (n, a) for n, a in numbered[i][0] if a != '-' ] # Remove gaps if made (imgt scheme)
        #     yield [ (numbering[ri][0], ri+numbered[i][1]) for ri in range( len( numbering ) ) ], details[i]['chain_type'], details[i]


number_chain_a, detail_chain_a = number_sequence(chain_a_sequence, 'A')
number_chain_b, detail_chain_b = number_sequence(chain_b_sequence, 'B')

# number_chain_a, detail_chain_a

In [16]:
from anarci import run_anarci

def number_sequence_anarci(chain_a_sequence,chain_id):
    # Make sure chain_a_sequence is not empty
    if chain_a_sequence:
        # Properly handling the output based on the inspected structure
        results = run_anarci([(chain_id, chain_a_sequence)], scheme="chothia")

        numbered, details = results[0][0], results[1][0]

        return(numbered, details)

        # for i in range( len( numbered ) ): # Iterate over the identified domains (e.g. for an scfv)
        #     numbering = [ (n, a) for n, a in numbered[i][0] if a != '-' ] # Remove gaps if made (imgt scheme)
        #     yield [ (numbering[ri][0], ri+numbered[i][1]) for ri in range( len( numbering ) ) ], details[i]['chain_type'], details[i]


number_chain_a_, detail_chain_a_ = number_sequence_anarci(chain_a_sequence, 'A')
number_chain_b_, detail_chain_b_ = number_sequence_anarci(chain_b_sequence, 'B')

# number_chain_a, detail_chain_a

In [29]:
import anarci

print(anarci.__version__)

1.b


In [14]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install ANARCI



In [17]:
print(number_chain_a_)

('A', 'EVQLVESGGGLVQPGGSLRLSCAASGFNIKDTYIHWVRQAPGKGLEWVARIYPTNGYTRYADSVKGRFTISADTSKNTAYLQMNSLRAEDTAVYYCSRFSNVNYYAFAYWGQGTLVTVSS')


In [26]:
print(detail_chain_a_)

[([((1, ' '), 'E'), ((2, ' '), 'V'), ((3, ' '), 'Q'), ((4, ' '), 'L'), ((5, ' '), 'V'), ((6, ' '), 'E'), ((7, ' '), 'S'), ((8, ' '), 'G'), ((9, ' '), 'G'), ((10, ' '), 'G'), ((11, ' '), 'L'), ((12, ' '), 'V'), ((13, ' '), 'Q'), ((14, ' '), 'P'), ((15, ' '), 'G'), ((16, ' '), 'G'), ((17, ' '), 'S'), ((18, ' '), 'L'), ((19, ' '), 'R'), ((20, ' '), 'L'), ((21, ' '), 'S'), ((22, ' '), 'C'), ((23, ' '), 'A'), ((24, ' '), 'A'), ((25, ' '), 'S'), ((26, ' '), 'G'), ((27, ' '), 'F'), ((28, ' '), 'N'), ((29, ' '), 'I'), ((30, ' '), 'K'), ((31, ' '), 'D'), ((32, ' '), 'T'), ((33, ' '), 'Y'), ((34, ' '), 'I'), ((35, ' '), 'H'), ((36, ' '), 'W'), ((37, ' '), 'V'), ((38, ' '), 'R'), ((39, ' '), 'Q'), ((40, ' '), 'A'), ((41, ' '), 'P'), ((42, ' '), 'G'), ((43, ' '), 'K'), ((44, ' '), 'G'), ((45, ' '), 'L'), ((46, ' '), 'E'), ((47, ' '), 'W'), ((48, ' '), 'V'), ((49, ' '), 'A'), ((50, ' '), 'R'), ((51, ' '), 'I'), ((52, ' '), 'Y'), ((52, 'A'), 'P'), ((53, ' '), 'T'), ((54, ' '), 'N'), ((55, ' '), 'G')

In [27]:
print(detail_chain_b_)

[([((1, ' '), 'D'), ((2, ' '), 'I'), ((3, ' '), 'Q'), ((4, ' '), 'M'), ((5, ' '), 'T'), ((6, ' '), 'Q'), ((7, ' '), 'S'), ((8, ' '), 'P'), ((9, ' '), 'S'), ((10, ' '), 'S'), ((11, ' '), 'L'), ((12, ' '), 'S'), ((13, ' '), 'A'), ((14, ' '), 'S'), ((15, ' '), 'V'), ((16, ' '), 'G'), ((17, ' '), 'D'), ((18, ' '), 'R'), ((19, ' '), 'V'), ((20, ' '), 'T'), ((21, ' '), '-'), ((22, ' '), 'I'), ((23, ' '), 'T'), ((24, ' '), 'C'), ((25, ' '), 'R'), ((26, ' '), 'A'), ((27, ' '), 'S'), ((28, ' '), 'Q'), ((29, ' '), 'D'), ((30, ' '), 'V'), ((31, ' '), 'N'), ((32, ' '), 'T'), ((33, ' '), 'A'), ((34, ' '), 'V'), ((35, ' '), 'A'), ((36, ' '), 'W'), ((37, ' '), 'Y'), ((38, ' '), 'Q'), ((39, ' '), 'Q'), ((40, ' '), 'K'), ((41, ' '), 'P'), ((42, ' '), 'G'), ((43, ' '), 'K'), ((44, ' '), 'A'), ((45, ' '), 'P'), ((46, ' '), 'K'), ((47, ' '), 'L'), ((48, ' '), 'L'), ((49, ' '), 'I'), ((50, ' '), 'Y'), ((51, ' '), 'S'), ((51, 'A'), 'A'), ((52, ' '), '-'), ((53, ' '), 'S'), ((54, ' '), 'F'), ((55, ' '), 'L')

In [18]:
print(number_chain_a)

[([((1, ' '), 'E'), ((2, ' '), 'V'), ((3, ' '), 'Q'), ((4, ' '), 'L'), ((5, ' '), 'V'), ((6, ' '), 'E'), ((7, ' '), 'S'), ((8, ' '), 'G'), ((9, ' '), 'G'), ((10, ' '), 'G'), ((11, ' '), 'L'), ((12, ' '), 'V'), ((13, ' '), 'Q'), ((14, ' '), 'P'), ((15, ' '), 'G'), ((16, ' '), 'G'), ((17, ' '), 'S'), ((18, ' '), 'L'), ((19, ' '), 'R'), ((20, ' '), 'L'), ((21, ' '), 'S'), ((22, ' '), 'C'), ((23, ' '), 'A'), ((24, ' '), 'A'), ((25, ' '), 'S'), ((26, ' '), 'G'), ((27, ' '), 'F'), ((28, ' '), 'N'), ((29, ' '), 'I'), ((30, ' '), 'K'), ((31, ' '), 'D'), ((32, ' '), 'T'), ((33, ' '), 'Y'), ((34, ' '), 'I'), ((35, ' '), 'H'), ((36, ' '), 'W'), ((37, ' '), 'V'), ((38, ' '), 'R'), ((39, ' '), 'Q'), ((40, ' '), 'A'), ((41, ' '), 'P'), ((42, ' '), 'G'), ((43, ' '), 'K'), ((44, ' '), 'G'), ((45, ' '), 'L'), ((46, ' '), 'E'), ((47, ' '), 'W'), ((48, ' '), 'V'), ((49, ' '), 'A'), ((50, ' '), 'R'), ((51, ' '), 'I'), ((52, ' '), 'Y'), ((52, 'A'), 'P'), ((53, ' '), 'T'), ((54, ' '), 'N'), ((55, ' '), 'G')

In [19]:
print(number_chain_b)

[([((1, ' '), 'D'), ((2, ' '), 'I'), ((3, ' '), 'Q'), ((4, ' '), 'M'), ((5, ' '), 'T'), ((6, ' '), 'Q'), ((7, ' '), 'S'), ((8, ' '), 'P'), ((9, ' '), 'S'), ((10, ' '), 'S'), ((11, ' '), 'L'), ((12, ' '), 'S'), ((13, ' '), 'A'), ((14, ' '), 'S'), ((15, ' '), 'V'), ((16, ' '), 'G'), ((17, ' '), 'D'), ((18, ' '), 'R'), ((19, ' '), 'V'), ((20, ' '), 'T'), ((21, ' '), 'I'), ((22, ' '), 'T'), ((23, ' '), 'C'), ((24, ' '), 'R'), ((25, ' '), 'A'), ((26, ' '), 'S'), ((27, ' '), 'Q'), ((28, ' '), 'D'), ((29, ' '), 'V'), ((30, ' '), 'N'), ((31, ' '), 'T'), ((32, ' '), 'A'), ((33, ' '), 'V'), ((34, ' '), 'A'), ((35, ' '), 'W'), ((36, ' '), 'Y'), ((37, ' '), 'Q'), ((38, ' '), 'Q'), ((39, ' '), 'K'), ((40, ' '), 'P'), ((41, ' '), 'G'), ((42, ' '), 'K'), ((43, ' '), 'A'), ((44, ' '), 'P'), ((45, ' '), 'K'), ((46, ' '), 'L'), ((47, ' '), 'L'), ((48, ' '), 'I'), ((49, ' '), 'Y'), ((50, ' '), 'S'), ((51, ' '), 'A'), ((52, ' '), 'S'), ((53, ' '), 'F'), ((54, ' '), 'L'), ((55, ' '), 'Y'), ((56, ' '), 'S')

Split number and residue to list

In [8]:
def number_to_list(number):

    numbers = []
    residues = []
    
    for list_item in number:
        for tuple_item in list_item:
            for item in tuple_item:
                # print(item[0])
                residues.append(item[1])
                num = item[0]
                if num[1] == ' ':
                    numbers.append(str(num[0]))
                else:
                    numbers.append(str(num[0])+num[1])
            break

    return numbers, residues

In [13]:
numbers_chain_a, residues_chain_a = number_to_list(number_chain_a)

# numbers_chain_a, residues_chain_a
print(numbers_chain_a)

['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '52A', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '82A', '82B', '82C', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '100A', '100B', '100C', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113']


In [14]:
numbers_chain_b, residues_chain_b = number_to_list(number_chain_b)

# numbers_chain_b, residues_chain_b
print(numbers_chain_b)

['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '51A', '52', '53', '54', '55', '56', '57', '58', '59', '59A', '60', '61', '62', '63', '64', '65', '66', '66A', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '95A', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107']


In [23]:
print(len(numbers_chain_b))

111


# BioPandas

In [None]:
from biopandas.pdb import PandasPdb
data = PandasPdb().read_pdb(pdb_file_path)
# print(data.df['ATOM']['residue_number'].head())
# print(data.df['ATOM']['chain_id'] == 'A')

# if data.df['ATOM']['chain_id'] == 'A':
# for numb in numbers:
#     # print(numb)
#     curr_res_num = 1
#     for i, residue in data.df['ATOM'].iterrows():
#         # print(residue['residue_number'])
#         # print(i)
#         curr_res_num += 1
#         break
        # if residue != curr_res_num:
        #     break
        # else:
        #     data.df['ATOM']['residue_number'][i] = numb

residue_number_mapping = {}

current_residue_number = 1

for row in data.df['ATOM']:
    if data.df['ATOM']['chain_id'] == 'A' and row['residue_number'] not in residue_number_mapping:
        residue_number_mapping[row['residue_number']] = new_residue_numbers[current_residue_number - 1]
        current_residue_number += 1

# Update the residue_number in the data
for row in data:
    if row['chain_id'] == 'A':
        row['residue_number'] = residue_number_mapping[row['residue_number']]

using csv to manipulating data

In [11]:
import csv

# The new residue numbers provided
new_residue_numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', 
                       '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', 
                       '52A', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', 
                       '77', '78', '79', '80', '81', '82', '82A', '82B', '82C', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', 
                       '99', '100', '100A', '100B', '100C', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113']

# Mapping of original residue number to new residue number for chain_id A
residue_number_mapping = {}

# Read the CSV file
with open('data_trast.csv', mode='r') as infile:
    reader = csv.DictReader(infile)
    data = list(reader)

    # Generate the mapping for chain_id A
    current_residue_number = 1
    for row in data:
        if row['chain_id'] == 'A' and row['residue_number'] not in residue_number_mapping:
            residue_number_mapping[row['residue_number']] = new_residue_numbers[current_residue_number - 1]
            current_residue_number += 1

# Update the residue_number in the data
for row in data:
    if row['chain_id'] == 'A':
        row['residue_number'] = residue_number_mapping[row['residue_number']]

# # Write the updated data back to a new CSV file
# with open('updated_data_trast.csv', mode='w', newline='') as outfile:
#     writer = csv.DictWriter(outfile, fieldnames=reader.fieldnames)
#     writer.writeheader()
#     writer.writerows(data)

with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv') as tmpfile:
    df.to_csv(tmpfile.name, index=False)
with open('updated_data_trast.csv', mode='w', newline='') as outfile:
    writer = csv.DictWriter(outfile, fieldnames=reader.fieldnames)
    writer.writeheader()
    writer.writerows(data)

In [11]:
data.df['ATOM']['residue_number']

0         1
1         1
2         1
3         1
4         1
       ... 
2512    100
2513    100
2514    100
2515    100
2516    100
Name: residue_number, Length: 2517, dtype: object

In [14]:
res_num = data.df['ATOM']['residue_number'].copy()
res_num.to_csv('residue_number.csv', index=None)

In [9]:
data.df['ATOM'].to_csv('data_trast.csv', index=None)

Edit pdb file using biopandas

In [15]:
from biopandas.pdb import PandasPdb

# Load your PDB data
pdb_data = PandasPdb().read_pdb(pdb_file_path)

# The new residue numbers provided
new_residue_numbers_a = numbers_chain_a
new_residue_numbers_b = numbers_chain_b

def update_residue_numbers(chain_id, new_residue_numbers):
    # Filter the DataFrame for chain_id
    chain = pdb_data.df['ATOM'][pdb_data.df['ATOM']['chain_id'] == chain_id]
    
    # Update the residue_number for chain_id
    residue_seen = set()
    current_residue_number = 0
    for index, row in chain.iterrows():
        # Check if the residue number has already been updated
        if row['residue_number'] not in residue_seen:
            residue_seen.add(row['residue_number'])
            if current_residue_number < len(new_residue_numbers):
                # Update all atoms of the current residue
                pdb_data.df['ATOM'].loc[(pdb_data.df['ATOM']['chain_id'] == chain_id) & (pdb_data.df['ATOM']['residue_number'] == row['residue_number']), 'residue_number'] = new_residue_numbers[current_residue_number]
                current_residue_number += 1

update_residue_numbers('A', new_residue_numbers_a)
update_residue_numbers('B', new_residue_numbers_b)

# Save the updated DataFrame back to a new PDB file
pdb_data.to_pdb(path='updated_structure.pdb', records=None, gz=False, append_newline=True)

  pdb_data.df['ATOM'].loc[(pdb_data.df['ATOM']['chain_id'] == chain_id) & (pdb_data.df['ATOM']['residue_number'] == row['residue_number']), 'residue_number'] = new_residue_numbers[current_residue_number]


# Usiang ANARCI from github

In [10]:
!conda install -c conda-forge openmm pdbfixer biopython -y
!conda install -c bioconda hmmer=3.3.2 -y

Channels:
 - conda-forge
 - defaults
 - bioconda
Platform: linux-64
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/matyus/miniconda3/envs/haddock-antibody

  added / updated specs:
    - biopython
    - openmm
    - pdbfixer


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    biopython-1.83             |   py39hd1e30aa_0         2.6 MB  conda-forge
    openmm-8.1.1               |   py39hd38b9cb_1        10.7 MB  conda-forge
    scipy-1.12.0               |   py39h474f0d3_2        15.6 MB  conda-forge
    ------------------------------------------------------------
                                           Total:        29.0 MB

The following NEW packages will be INSTALLED:

  cudatoolkit        conda-forge/linux-64::cudatoolkit-11.8.0-h4ba93d1_13 
  ocl-icd            conda-forge/linux-64::ocl-icd-2.3.2-hd590

In [11]:
!git clone https://github.com/oxpig/ANARCI.git

Cloning into 'ANARCI'...
remote: Enumerating objects: 785, done.[K
remote: Counting objects: 100% (325/325), done.[K
remote: Compressing objects: 100% (220/220), done.[K
remote: Total 785 (delta 122), reused 269 (delta 89), pack-reused 460[K
Receiving objects: 100% (785/785), 10.50 MiB | 2.37 MiB/s, done.
Resolving deltas: 100% (421/421), done.


In [12]:
%cd ANARCI
!python setup.py install
%cd ..

/home/matyus/Downloads/HADDOCK-antibody-antigen/Test_data/ANARCI


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()
INFO: ANARCI lives in:  /home/matyus/miniconda3/envs/haddock-antibody/lib/python3.9/site-packages/anarci
INFO: Downloading germlines from IMGT and building HMMs...
INFO: running 'RUN_pipeline.sh', this will take a couple a minutes.
Parsed and saved Homo+sapiens HV
Parsed and saved Mus HV
Parsed and saved Rattus+norvegicus HV
Parsed and saved Oryctolagus+cuniculus HV
Parsed and saved Macaca+mulatta HV
Parsed and saved Sus+scrofa HV
Parsed and saved Vicugna+pacos HV
Parsed and saved Bos+taurus HV
Parsed and saved Homo+sapiens HJ
Parsed and saved Mus 

In [20]:
from anarci.anarci import anarci

results = anarci([chain_b_sequence], scheme='chothia')

# Process and display the results
for result in results:
    print(result)

ValueError: too many values to unpack (expected 2)

In [19]:
import sys
sys.path.append('/path/to/ANARCI')

from anarci.anarci import anarci

# Using ABNumber to numbering resiude

In [6]:
!conda config --add channels defaults
!conda config --add channels bioconda
!conda config --add channels conda-forge
!conda config --set channel_priority strict



In [7]:
!conda install abnumber -y

Channels:
 - conda-forge
 - bioconda
 - defaults
Platform: linux-64
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/matyus/miniconda3

  added / updated specs:
    - abnumber


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    abnumber-0.3.2             |     pyhdfd78af_0          35 KB  bioconda
    anarci-2020.04.23          |             py_3         1.1 MB  bioconda
    pandas-2.2.0               |  py310hcc13569_0        12.4 MB  conda-forge
    python-tzdata-2024.1       |     pyhd8ed1ab_0         141 KB  conda-forge
    pytz-2024.1                |     pyhd8ed1ab_0         184 KB  conda-forge
    ------------------------------------------------------------
                                           Total:        13.8 MB

The following NEW packages will be INSTALLED:

  abnumber           bioconda/noarch::

In [8]:
from abnumber import Chain
import pandas as pd


In [9]:
chain_a_number_abnum = Chain(chain_a_sequence, scheme='chothia')
chain_a_number_abnum

EVQLVESGGGLVQPGGSLRLSCAASGFNIKDTYIHWVRQAPGKGLEWVARIYPTNGYTRYADSVKGRFTISADTSKNTAYLQMNSLRAEDTAVYYCSRFSNVNYYAFAYWGQGTLVTVSS
                         ^^^^^^^                   ^^^^^^                                         ^^^^^^^^^^^           

In [10]:
chain_b_number_abnum = Chain(chain_b_sequence, scheme='chothia')
chain_b_number_abnum

DIQMTQSPSSLSASVGDRVTITCRASQDVNTAVAWYQQKPGKAPKLLIYSASFLYSGVPSRFSGSRSGTDFTLTISSLQPEDFATYYCQQHYTTPPTFGQGTKVEIK
                       ^^^^^^^^^^^               ^^^^^^^                                ^^^^^^^^^          

In [11]:
chain_b_number_abnum.print_tall()

       L1 D          L23 C          L45 K          L67 S     CDR3 L89 Q
       L2 I     CDR1 L24 R          L46 L          L68 G     CDR3 L90 Q
       L3 Q     CDR1 L25 A          L47 L          L69 T     CDR3 L91 H
       L4 M     CDR1 L26 S          L48 I          L70 D     CDR3 L92 Y
       L5 T     CDR1 L27 Q          L49 Y          L71 F     CDR3 L93 T
       L6 Q     CDR1 L28 D     CDR2 L50 S          L72 T     CDR3 L94 T
       L7 S     CDR1 L29 V     CDR2 L51 A          L73 L     CDR3 L95 P
       L8 P     CDR1 L30 N     CDR2 L52 S          L74 T     CDR3 L96 P
       L9 S     CDR1 L31 T     CDR2 L53 F          L75 I     CDR3 L97 T
      L10 S     CDR1 L32 A     CDR2 L54 L          L76 S          L98 F
      L11 L     CDR1 L33 V     CDR2 L55 Y          L77 S          L99 G
      L12 S     CDR1 L34 A     CDR2 L56 S          L78 L         L100 Q
      L13 A          L35 W          L57 G          L79 Q         L101 G
      L14 S          L36 Y          L58 V          L80 P        

In [26]:
for pos, aa in chain_b_number_abnum.raw[50:60]:
    print(pos)

L51
L52
L53
L54
L55
L56
L57
L58
L59
L60


In [13]:
chain_a_number_abnum.print_tall()

       H1 E          H25 S          H49 A          H72 D          H93 S
       H2 V     CDR1 H26 G          H50 R          H73 T          H94 R
       H3 Q     CDR1 H27 F          H51 I          H74 S     CDR3 H95 F
       H4 L     CDR1 H28 N     CDR2 H52 Y          H75 K     CDR3 H96 S
       H5 V     CDR1 H29 I    CDR2 H52A P          H76 N     CDR3 H97 N
       H6 E     CDR1 H30 K     CDR2 H53 T          H77 T     CDR3 H98 V
       H7 S     CDR1 H31 D     CDR2 H54 N          H78 A     CDR3 H99 N
       H8 G     CDR1 H32 T     CDR2 H55 G          H79 Y    CDR3 H100 Y
       H9 G          H33 Y     CDR2 H56 Y          H80 L    CDR3 H100A Y
      H10 G          H34 I          H57 T          H81 Q    CDR3 H100B A
      H11 L          H35 H          H58 R          H82 M    CDR3 H100C F
      H12 V          H36 W          H59 Y         H82A N    CDR3 H101 A
      H13 Q          H37 V          H60 A         H82B S    CDR3 H102 Y
      H14 P          H38 R          H61 D         H82C L     

In [28]:
for pos, aa in chain_a_number_abnum.raw[50:60]:
    print(pos)

H51
H52
H52A
H53
H54
H55
H56
H57
H58
H59


In [35]:
numbered_residues_b = [str(pos)[1:] for pos, aa in chain_b_number_abnum]
print(numbered_residues_b)

['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107']


In [34]:
# If using the string representation directly works for getting the full identifier including any letters
numbered_residues_a = [str(pos)[1:] for pos, aa in chain_a_number_abnum]
print(numbered_residues_a)

['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '52A', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '82A', '82B', '82C', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '100A', '100B', '100C', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113']


In [37]:
print(len(numbered_residues_b))

107


using biopandas to edit the pdb file

In [39]:
from biopandas.pdb import PandasPdb

# Load your PDB data
pdb_data = PandasPdb().read_pdb(pdb_file_path)

# The new residue numbers provided
new_residue_numbers_a = numbered_residues_a
new_residue_numbers_b = numbered_residues_b

def update_residue_numbers(chain_id, new_residue_numbers):
    # Filter the DataFrame for chain_id
    chain = pdb_data.df['ATOM'][pdb_data.df['ATOM']['chain_id'] == chain_id]
    
    # Update the residue_number for chain_id
    residue_seen = set()
    current_residue_number = 0
    for index, row in chain.iterrows():
        # Check if the residue number has already been updated
        if row['residue_number'] not in residue_seen:
            residue_seen.add(row['residue_number'])
            if current_residue_number < len(new_residue_numbers):
                # Update all atoms of the current residue
                pdb_data.df['ATOM'].loc[(pdb_data.df['ATOM']['chain_id'] == chain_id) & (pdb_data.df['ATOM']['residue_number'] == row['residue_number']), 'residue_number'] = new_residue_numbers[current_residue_number]
                current_residue_number += 1

update_residue_numbers('A', new_residue_numbers_a)
update_residue_numbers('B', new_residue_numbers_b)

# Save the updated DataFrame back to a new PDB file
pdb_data.to_pdb(path='updated_structure_abnumber.pdb', records=None, gz=False, append_newline=True)

  pdb_data.df['ATOM'].loc[(pdb_data.df['ATOM']['chain_id'] == chain_id) & (pdb_data.df['ATOM']['residue_number'] == row['residue_number']), 'residue_number'] = new_residue_numbers[current_residue_number]
