In [1]:
from Bio.PDB import MMCIFParser, MMCIFIO


In [2]:
parser = MMCIFParser()
structure = parser.get_structure("4WPO", "4wpo.cif") 



In [4]:
from Bio.PDB import PDBList
from Bio.PDB.MMCIF2Dict import MMCIF2Dict

mmcif_dict = MMCIF2Dict("4wpo.cif")

# This will list all chain IDs and associated entity IDs
strand_ids = mmcif_dict.get("_entity_poly.pdbx_strand_id", [])
entity_ids = mmcif_dict.get("_entity_poly.entity_id", [])
polymer_names = mmcif_dict.get("_entity.pdbx_description", [])

for entity, strands, name in zip(entity_ids, strand_ids, polymer_names):
    print(f"Entity ID: {entity}, Chains: {strands}, Molecule: {name}")


Entity ID: 1, Chains: AA,CA, Molecule: 23S Ribosomal RNA
Entity ID: 2, Chains: AB,CB, Molecule: 5S Ribosomal RNA
Entity ID: 3, Chains: AC,CC, Molecule: 50S ribosomal protein L1
Entity ID: 4, Chains: AD,CD, Molecule: 50S ribosomal protein L2
Entity ID: 5, Chains: AE,CE, Molecule: 50S ribosomal protein L3
Entity ID: 6, Chains: AF,CF, Molecule: 50S ribosomal protein L4
Entity ID: 7, Chains: AG,CG, Molecule: 50S ribosomal protein L5
Entity ID: 8, Chains: AH,CH, Molecule: 50S ribosomal protein L6
Entity ID: 9, Chains: AK,CK, Molecule: 50S ribosomal protein L10
Entity ID: 10, Chains: AL,CL, Molecule: 50S ribosomal protein L11
Entity ID: 11, Chains: AN,CN, Molecule: 50S ribosomal protein L13
Entity ID: 12, Chains: AO,CO, Molecule: 50S ribosomal protein L14
Entity ID: 13, Chains: AP,CP, Molecule: 50S ribosomal protein L15
Entity ID: 14, Chains: AQ,CQ, Molecule: 50S ribosomal protein L16
Entity ID: 15, Chains: AR,CR, Molecule: 50S ribosomal protein L17
Entity ID: 16, Chains: AS,CS, Molecule: 50

In [5]:
from Bio.PDB import MMCIFParser, MMCIFIO, Select

# Load the structure
parser = MMCIFParser()
structure = parser.get_structure("4WPO", "4wpo.cif")

# Custom Select class to keep only EF-G chains
class EF_G_ChainSelect(Select):
    def __init__(self, chain_ids):
        self.chain_ids = chain_ids

    def accept_chain(self, chain):
        return chain.id in self.chain_ids

# Set up the writer
io = MMCIFIO()
io.set_structure(structure)

# Save only EF-G chains (GB and ND)
io.save("4WPO_EF-G.cif", EF_G_ChainSelect(["BZ"]))




In [7]:
import nglview as nv
import ipywidgets as widgets
from IPython.display import display
widgets.IntSlider()
view = nv.show_file("4WPO_EF-G.cif")
slider = widgets.IntSlider(min=0, max=100, step=1, value=50, description='My Slider')
view.clear_representations()
view.add_representation('cartoon', color='pink')  
view.add_representation('stick', selection='540 and PRO', color='black')
display(view)
display(slider)




NGLWidget()

IntSlider(value=50, description='My Slider')

In [8]:
from Bio.PDB import MMCIFParser, PPBuilder
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio import SeqIO

# Parse the structure
parser = MMCIFParser(QUIET=True)
structure = parser.get_structure("4WPO", "4WPO_EF-G.cif")

ppb = PPBuilder()
records = []

# Extract sequences for each chain
for model in structure:
    for chain in model:
        seq = ""
        for pp in ppb.build_peptides(chain):
            seq += str(pp.get_sequence())

        if seq:
            record = SeqRecord(Seq(seq), id=f"Chain_{chain.id}", description="")
            records.append(record)

# Print the sequences in FASTA format
for record in records:
    print(f">{record.id}\n{record.seq}")


>Chain_BZ
MKVILLEPLENLGDVGQVVDVKPGYARNYLLPRGLAVLATESNLKALEARIRAQAKRLAERKAEAERLKEILENDLKRLRNIGIAAHIDAGKTTTTERILYYTGRAAVTTCFWKDHRINIIDTPGHVDFTIEVERSMRVLDGAIVVFDSSQGVEPQSETVWRQAEKYKVPRIAFANKMDKTGADLWLVIRTMQERLGARPVVMQLPIGREDTFSGIIDVLRMKAYTYGNDLGTDIREIPIPEEYLDQAREYHEKLVEVAADFDENIMLKYLEGEEPTEEELVAAIRKGTIDLKITPVFLGSALKNKGVQLLLDAVVDYLPSPLDIPPIKGTTPEGEVVEIHPDPNGPLAALAFKIMADPYVGRLTFIRVYSGTLTSGSYVYNTTKGRKERVARLLRMHANHREEVEELKAGDLGAVVGLKETITGDTLVGEDAPRVILESIEVPEPVIDVAIEPKTKADQEKLSQALARLAEEDPTFRVSTHPETGQTIISGMGELHLEIIVDRLKREFKVDANVGKPQVAYRETITKPVDVEGKFIRQTGGRGQYGHVKIKVEPLPRGSGFEFVNAIVGGVIPKEYIPAVQKGIEEAMQSGPLIGFPVVDIKVTLYDGSYHEVDSSEMAFKIAGSMAIKEAVQKGDPVILEPIMRVEVTTPEEYMGDVIGDLNARRGQILGMEPRGNAQVIRAFVPLAEMFGYATDLRSKTQGRGSFVMFFDHYQEVPKQVQEKLIK


In [10]:
parser = MMCIFParser()
structure = parser.get_structure("4v5f", "4v5f.cif") 



In [11]:
from Bio.PDB.MMCIF2Dict import MMCIF2Dict

mmcif_dict = MMCIF2Dict("4v5f.cif")

# This will list all chain IDs and associated entity IDs
strand_ids = mmcif_dict.get("_entity_poly.pdbx_strand_id", [])
entity_ids = mmcif_dict.get("_entity_poly.entity_id", [])
polymer_names = mmcif_dict.get("_entity.pdbx_description", [])

for entity, strands, name in zip(entity_ids, strand_ids, polymer_names):
    print(f"Entity ID: {entity}, Chains: {strands}, Molecule: {name}")


Entity ID: 1, Chains: AA,CA, Molecule: 16S ribosomal RNA
Entity ID: 2, Chains: AB,CB, Molecule: 30S RIBOSOMAL PROTEIN S2
Entity ID: 3, Chains: AC,CC, Molecule: 30S RIBOSOMAL PROTEIN S3
Entity ID: 4, Chains: AD,CD, Molecule: 30S RIBOSOMAL PROTEIN S4
Entity ID: 5, Chains: AE,CE, Molecule: 30S RIBOSOMAL PROTEIN S5
Entity ID: 6, Chains: AF,CF, Molecule: 30S RIBOSOMAL PROTEIN S6
Entity ID: 7, Chains: AG,CG, Molecule: 30S RIBOSOMAL PROTEIN S7
Entity ID: 8, Chains: AH,CH, Molecule: 30S RIBOSOMAL PROTEIN S8
Entity ID: 9, Chains: AI,CI, Molecule: 30S RIBOSOMAL PROTEIN S9
Entity ID: 10, Chains: AJ,CJ, Molecule: 30S RIBOSOMAL PROTEIN S10
Entity ID: 11, Chains: AK,CK, Molecule: 30S RIBOSOMAL PROTEIN S11
Entity ID: 12, Chains: AL,CL, Molecule: 30S RIBOSOMAL PROTEIN S12
Entity ID: 13, Chains: AM,CM, Molecule: 30S RIBOSOMAL PROTEIN S13
Entity ID: 14, Chains: AN,CN, Molecule: 30S RIBOSOMAL PROTEIN S14 TYPE Z
Entity ID: 15, Chains: AO,CO, Molecule: 30S RIBOSOMAL PROTEIN S15
Entity ID: 16, Chains: AP,CP

In [12]:
from Bio.PDB import MMCIFParser, MMCIFIO, Select

# Load the structure
parser = MMCIFParser()
structure = parser.get_structure("4v5f", "4v5f.cif")

# Custom Select class to keep only EF-G chains
class EF_G_ChainSelect(Select):
    def __init__(self, chain_ids):
        self.chain_ids = chain_ids

    def accept_chain(self, chain):
        return chain.id in self.chain_ids

# Set up the writer
io = MMCIFIO()
io.set_structure(structure)

# Save only EF-G chains 
io.save("4v5f_EF-G.cif", EF_G_ChainSelect(["CY"]))




In [14]:
## for finding proline number in 4v5f structure:
from Bio.PDB import MMCIFParser

# Parse the structure
parser = MMCIFParser()
structure = parser.get_structure('4V5F', '4v5f_EF-G.cif')

# Iterate over models, chains, and residues to list amino acids and their residue numbers
for model in structure:
    for chain in model:
        print(f"Chain ID: {chain.id}")
        for residue in chain:
            # residue.id is a tuple like (' ', resseq, insertion_code)
            resseq = residue.id[1]  # residue sequence number
            resname = residue.get_resname()  # 3-letter amino acid code
            print(f"Residue {resseq}: {resname}")
        print()  # Blank line between chains


Chain ID: CY
Residue 4: LYS
Residue 5: VAL
Residue 6: GLU
Residue 7: TYR
Residue 8: ASP
Residue 9: LEU
Residue 10: LYS
Residue 11: ARG
Residue 12: LEU
Residue 13: ARG
Residue 14: ASN
Residue 15: ILE
Residue 16: GLY
Residue 17: ILE
Residue 18: ALA
Residue 19: ALA
Residue 20: HIS
Residue 21: ILE
Residue 22: ASP
Residue 23: ALA
Residue 24: GLY
Residue 25: LYS
Residue 26: THR
Residue 27: THR
Residue 28: THR
Residue 29: THR
Residue 30: GLU
Residue 31: ARG
Residue 32: ILE
Residue 33: LEU
Residue 34: TYR
Residue 35: TYR
Residue 36: THR
Residue 37: GLY
Residue 38: ARG
Residue 39: ILE
Residue 40: HIS
Residue 41: LYS
Residue 42: ILE
Residue 43: GLY
Residue 64: THR
Residue 65: ILE
Residue 66: THR
Residue 67: ALA
Residue 68: ALA
Residue 69: VAL
Residue 70: THR
Residue 71: THR
Residue 72: CYS
Residue 73: PHE
Residue 74: TRP
Residue 75: LYS
Residue 76: ASP
Residue 77: HIS
Residue 78: ARG
Residue 79: ILE
Residue 80: ASN
Residue 81: ILE
Residue 82: ILE
Residue 83: ASP
Residue 84: THR
Residue 85: PRO
R

In [13]:
import nglview as nv
import ipywidgets as widgets
from IPython.display import display
view = nv.show_file("4v5f_EF-G.cif")
slider = widgets.IntSlider(min=0, max=100, step=1, value=50, description='My Slider')
view.clear_representations()
view.add_representation('cartoon', color='silver')
selection_string = "648:CY"
view.add_representation('ball+stick', selection=selection_string)
view.add_representation('label', sele=selection_string, labelType='format', labelFormat='%(resname)s%(resno)s', color='red', xOffset=1, fixedSize=True)




display(view)
display(slider)

NGLWidget()

IntSlider(value=50, description='My Slider')

In [15]:
from Bio.PDB import MMCIFParser, Superimposer, MMCIFIO, Model, Structure

# Parse structures
parser = MMCIFParser()
structure1 = parser.get_structure('s1', '4wpo_EF-G.cif')
structure2 = parser.get_structure('s2', '4v5f_EF-G.cif')

model1 = structure1[0]
model2 = structure2[0]

chain1_id = 'BZ'
chain2_id = 'CY'

atoms1 = [res['CA'] for res in model1[chain1_id] if 'CA' in res]
atoms2 = [res['CA'] for res in model2[chain2_id] if 'CA' in res]

min_len = min(len(atoms1), len(atoms2))
atoms1 = atoms1[:min_len]
atoms2 = atoms2[:min_len]

# Superimpose
sup = Superimposer()
sup.set_atoms(atoms1, atoms2)
sup.apply(model2.get_atoms())
print("RMSD after superimposition:", sup.rms)

# Create new structure with 2 models
combined_structure = Structure.Structure("combined")

# Add first model as model 0
combined_structure.add(model1)

# Rename second model to 1 and add (Bio.PDB requires unique model ids)
model2.id = 1
combined_structure.add(model2)

# Save combined multi-model CIF
io = MMCIFIO()
io.set_structure(combined_structure)
io.save("combined_aligned.cif")


RMSD after superimposition: 33.220390795845105


In [17]:
import nglview as nv
import ipywidgets as widgets
from IPython.display import display

view = nv.show_file("combined_aligned.cif")

slider = widgets.IntSlider(min=0, max=100, step=1, value=50, description='My Slider')

view.clear_representations()

# Cartoon for chain CY in silver
view.add_representation('cartoon', selection=':CY', color='silver')

# Cartoon for chain BZ in pink
view.add_representation('cartoon', selection=':BZ', color='pink')

# Highlight residue 648 in chain CY as red ball+stick + label
selection_string = "648:CY"
view.add_representation('ball+stick', selection=selection_string, color='red')
view.add_representation('label', sele=selection_string, labelType='format', labelFormat='%(resname)s%(resno)s', color='red', xOffset=1, fixedSize=True)

display(view)
display(slider)



NGLWidget()

IntSlider(value=50, description='My Slider')

In [10]:
import nglview as nv
import ipywidgets as widgets
from IPython.display import display

# Load CIF file from AlphaFold 3 server
view = nv.show_file("e.coli36099ef-g.cif")
view.clear_representations()
view.add_representation('cartoon', color='pink')
selection_string = "659:A.CA"
view.add_representation('ball+stick', selection=selection_string, color='red')
view.add_representation('label', sele=selection_string, labelType='format', labelFormat='%(resname)s%(resno)s', color='red', xOffset=1, fixedSize=True)

display(view)


NGLWidget()

In [11]:
import nglview as nv
import ipywidgets as widgets
from IPython.display import display

# Load CIF file from AlphaFold 3 server
view = nv.show_file("e.coli3609910xmicef-g.cif")
view.clear_representations()
view.add_representation('cartoon', color='siver')
selection_string = "659:A.CA"
view.add_representation('ball+stick', selection=selection_string, color='red')
view.add_representation('label', sele=selection_string, labelType='format', labelFormat='%(resname)s%(resno)s', color='red', xOffset=1, fixedSize=True)

display(view)

NGLWidget()

In [12]:
from Bio.PDB import MMCIFParser, Superimposer, MMCIFIO, Model, Structure

# Parse structures
parser = MMCIFParser()
structure1 = parser.get_structure('s1', 'e.coli36099ef-g.cif')
structure2 = parser.get_structure('s2', 'e.coli3609910xmicef-g.cif')

model1 = structure1[0]
model2 = structure2[0]

chain1_id = 'A'
chain2_id = 'A'

atoms1 = [res['CA'] for res in model1[chain1_id] if 'CA' in res]
atoms2 = [res['CA'] for res in model2[chain2_id] if 'CA' in res]

min_len = min(len(atoms1), len(atoms2))
atoms1 = atoms1[:min_len]
atoms2 = atoms2[:min_len]

# Superimpose
sup = Superimposer()
sup.set_atoms(atoms1, atoms2)
sup.apply(model2.get_atoms())
print("RMSD after superimposition:", sup.rms)

# Create new structure with 2 models
combined_structure = Structure.Structure("combined")

# Add first model as model 0
combined_structure.add(model1)

# Rename second model to 1 and add (Bio.PDB requires unique model ids)
model2.id = 1
combined_structure.add(model2)

# Save combined multi-model CIF
io = MMCIFIO()
io.set_structure(combined_structure)
io.save("combined_alignedAF3model.cif")


RMSD after superimposition: 0.5198646287552445


In [13]:
import nglview as nv
import ipywidgets as widgets
from IPython.display import display

view = nv.show_file("combined_alignedAF3model.cif")
view.clear_representations()

# Try different model selection syntaxes
try:
    view.add_representation('cartoon', selection='/0', color='pink')      # Model 0 with slash
    view.add_representation('cartoon', selection='/1', color='silver')    # Model 1 with slash
    print("Using model /0 and /1 - SUCCESS")
except Exception as e:
    print(f"Model selection failed: {e}")
    
    # Fallback: try chain selection
    try:
        view.add_representation('cartoon', selection=':A', color='pink')
        view.add_representation('cartoon', selection=':B', color='silver')
        print("Using chain A and B - SUCCESS")
    except Exception as e2:
        print(f"Chain selection failed: {e2}")
        
        # Final fallback: show everything with automatic coloring
        view.add_representation('cartoon', color='chainid')  # Colors by chain automatically
        print("Using automatic chain coloring - SUCCESS")

# Highlight residue
selection_string = "659:A"
try:
    view.add_representation('ball+stick', selection=selection_string, color='red')
    view.add_representation('label', 
                           selection=selection_string, 
                           labelType='format', 
                           labelFormat='%(resname)s%(resno)s', 
                           color='red', 
                           xOffset=1, 
                           fixedSize=True)
except Exception as e:
    print(f"Residue highlighting failed: {e}")

# Force display
display(view)

# Slider
slider = widgets.IntSlider(min=0, max=100, step=1, value=50, description='My Slider')
display(slider)

Using model /0 and /1 - SUCCESS


NGLWidget()

IntSlider(value=50, description='My Slider')

In [4]:
import nglview as nv
import ipywidgets as widgets
from IPython.display import display

# Load CIF file from AlphaFold 3 server
view = nv.show_file("e.coli36099ef-g.cif")
view.clear_representations()
view.add_representation('cartoon', color='pink')
selection_string = "593:A.CA"
view.add_representation('ball+stick', selection=selection_string, color='red')
view.add_representation('label', sele=selection_string, labelType='format', labelFormat='%(resname)s%(resno)s', color='red', xOffset=1, fixedSize=True)

display(view)


NGLWidget()

In [3]:
import nglview as nv
import ipywidgets as widgets
from IPython.display import display

# Load CIF file from AlphaFold 3 server
view = nv.show_file("ef-ge.colimg165510xmic.cif")
view.clear_representations()
view.add_representation('cartoon', color='siver')
selection_string = "593:A.CA"
view.add_representation('ball+stick', selection=selection_string, color='red')
view.add_representation('label', sele=selection_string, labelType='format', labelFormat='%(resname)s%(resno)s', color='red', xOffset=1, fixedSize=True)

display(view)

NGLWidget()

In [8]:
from Bio.PDB import MMCIFParser, Superimposer, MMCIFIO, Model, Structure

# Parse structures
parser = MMCIFParser()
structure1 = parser.get_structure('s1', 'e.coli36099ef-g.cif')
structure2 = parser.get_structure('s2', 'ef-ge.colimg165510xmic.cif')

model1 = structure1[0]
model2 = structure2[0]

chain1_id = 'A'
chain2_id = 'A'

atoms1 = [res['CA'] for res in model1[chain1_id] if 'CA' in res]
atoms2 = [res['CA'] for res in model2[chain2_id] if 'CA' in res]

min_len = min(len(atoms1), len(atoms2))
atoms1 = atoms1[:min_len]
atoms2 = atoms2[:min_len]

# Superimpose
sup = Superimposer()
sup.set_atoms(atoms1, atoms2)
sup.apply(model2.get_atoms())
print("RMSD after superimposition:", sup.rms)

# Create new structure with 2 models
combined_structure = Structure.Structure("combined")

# Add first model as model 0
combined_structure.add(model1)

# Rename second model to 1 and add (Bio.PDB requires unique model ids)
model2.id = 1
combined_structure.add(model2)

# Save combined multi-model CIF
io = MMCIFIO()
io.set_structure(combined_structure)
io.save("combined_alignedAF3model2.cif")


RMSD after superimposition: 0.4638705762984491


In [14]:
import nglview as nv
import ipywidgets as widgets
from IPython.display import display

view = nv.show_file("combined_alignedAF3model2.cif")
view.clear_representations()

# Try different model selection syntaxes
try:
    view.add_representation('cartoon', selection='/0', color='pink')      # Model 0 with slash
    view.add_representation('cartoon', selection='/1', color='silver')    # Model 1 with slash
    print("Using model /0 and /1 - SUCCESS")
except Exception as e:
    print(f"Model selection failed: {e}")
    
    # Fallback: try chain selection
    try:
        view.add_representation('cartoon', selection=':A', color='pink')
        view.add_representation('cartoon', selection=':B', color='silver')
        print("Using chain A and B - SUCCESS")
    except Exception as e2:
        print(f"Chain selection failed: {e2}")
        
        # Final fallback: show everything with automatic coloring
        view.add_representation('cartoon', color='chainid')  # Colors by chain automatically
        print("Using automatic chain coloring - SUCCESS")

# Highlight residue
selection_string = "593:A"
try:
    view.add_representation('ball+stick', selection=selection_string, color='red')
    view.add_representation('label', 
                           selection=selection_string, 
                           labelType='format', 
                           labelFormat='%(resname)s%(resno)s', 
                           color='red', 
                           xOffset=1, 
                           fixedSize=True)
except Exception as e:
    print(f"Residue highlighting failed: {e}")

# Force display
display(view)
# Slider
slider = widgets.IntSlider(min=0, max=100, step=1, value=50, description='My Slider')
display(slider)

Using model /0 and /1 - SUCCESS


NGLWidget()

IntSlider(value=50, description='My Slider')

In [23]:
parser = MMCIFParser()
structure = parser.get_structure("4v5f", "4v5f.cif") 



In [29]:
from Bio.PDB import MMCIFParser, MMCIFIO, Select

# Load the structure
parser = MMCIFParser()
structure = parser.get_structure("4v5f", "4v5f.cif")

# Custom Select class to keep only EF-G_S12 chains
class EF_G_S12_ChainSelect(Select):
    def __init__(self, chain_ids):
        self.chain_ids = chain_ids

    def accept_chain(self, chain):
        return chain.id in self.chain_ids

# Set up the writer
io = MMCIFIO()
io.set_structure(structure)

# Save only EF-G_S12 chains 
io.save("4v5f_EF-G_S12.cif", EF_G_S12_ChainSelect(["CY", "CL"]))




In [75]:
## for finding proline number in 4v5f structure:
from Bio.PDB import MMCIFParser

# Parse the structure
parser = MMCIFParser()
structure = parser.get_structure('4V5F', '4v5f_EF-G_S12.cif')

# Iterate over models, chains, and residues to list amino acids and their residue numbers
for model in structure:
    for chain in model:
        print(f"Chain ID: {chain.id}")
        for residue in chain:
            # residue.id is a tuple like (' ', resseq, insertion_code)
            resseq = residue.id[1]  # residue sequence number
            resname = residue.get_resname()  # 3-letter amino acid code
            print(f"Residue {resseq}: {resname}")
        print()  # Blank line between chains


Chain ID: CL
Residue 5: PRO
Residue 6: THR
Residue 7: ILE
Residue 8: ASN
Residue 9: GLN
Residue 10: LEU
Residue 11: VAL
Residue 12: ARG
Residue 13: LYS
Residue 14: GLY
Residue 15: ARG
Residue 16: GLU
Residue 17: LYS
Residue 18: VAL
Residue 19: ARG
Residue 20: LYS
Residue 21: LYS
Residue 22: SER
Residue 23: LYS
Residue 24: VAL
Residue 25: PRO
Residue 26: ALA
Residue 27: LEU
Residue 28: LYS
Residue 29: GLY
Residue 30: ALA
Residue 31: PRO
Residue 32: PHE
Residue 33: ARG
Residue 34: ARG
Residue 35: GLY
Residue 36: VAL
Residue 37: CYS
Residue 38: THR
Residue 39: VAL
Residue 40: VAL
Residue 41: ARG
Residue 42: THR
Residue 43: VAL
Residue 44: THR
Residue 45: PRO
Residue 46: LYS
Residue 47: LYS
Residue 48: PRO
Residue 49: ASN
Residue 50: SER
Residue 51: ALA
Residue 52: LEU
Residue 53: ARG
Residue 54: LYS
Residue 55: VAL
Residue 56: ALA
Residue 57: LYS
Residue 58: VAL
Residue 59: ARG
Residue 60: LEU
Residue 61: THR
Residue 62: SER
Residue 63: GLY
Residue 64: TYR
Residue 65: GLU
Residue 66: VAL


In [19]:
parser = MMCIFParser()
structure = parser.get_structure("4v53", "4v53.cif") 



In [20]:
from Bio.PDB import PDBList
from Bio.PDB.MMCIF2Dict import MMCIF2Dict

mmcif_dict = MMCIF2Dict("4v53.cif")

# This will list all chain IDs and associated entity IDs
strand_ids = mmcif_dict.get("_entity_poly.pdbx_strand_id", [])
entity_ids = mmcif_dict.get("_entity_poly.entity_id", [])
polymer_names = mmcif_dict.get("_entity.pdbx_description", [])

for entity, strands, name in zip(entity_ids, strand_ids, polymer_names):
    print(f"Entity ID: {entity}, Chains: {strands}, Molecule: {name}")

Entity ID: 1, Chains: AA,CA, Molecule: 16S rRNA
Entity ID: 2, Chains: AC,CC, Molecule: 30S ribosomal protein S3
Entity ID: 3, Chains: AD,CD, Molecule: 30S ribosomal protein S4
Entity ID: 4, Chains: AE,CE, Molecule: 30S ribosomal protein S5
Entity ID: 5, Chains: AF,CF, Molecule: 30S ribosomal protein S6
Entity ID: 6, Chains: AG,CG, Molecule: 30S ribosomal protein S7
Entity ID: 7, Chains: AH,CH, Molecule: 30S ribosomal protein S8
Entity ID: 8, Chains: AI,CI, Molecule: 30S ribosomal protein S9
Entity ID: 9, Chains: AJ,CJ, Molecule: 30S ribosomal protein S10
Entity ID: 10, Chains: AK,CK, Molecule: 30S ribosomal protein S11
Entity ID: 11, Chains: AL,CL, Molecule: 30S ribosomal protein S12
Entity ID: 12, Chains: AM,CM, Molecule: 30S ribosomal protein S13
Entity ID: 13, Chains: AN,CN, Molecule: 30S ribosomal protein S14
Entity ID: 14, Chains: AO,CO, Molecule: 30S ribosomal protein S15
Entity ID: 15, Chains: AP,CP, Molecule: 30S ribosomal protein S16
Entity ID: 16, Chains: AQ,CQ, Molecule: 30S

In [25]:
from Bio.PDB import MMCIFParser, MMCIFIO, Select

# Load the structure
parser = MMCIFParser()
structure = parser.get_structure("4v53", "4v53.cif")

# Custom Select class to keep only S12 chains
class S12_ChainSelect(Select):
    def __init__(self, chain_ids):
        self.chain_ids = chain_ids

    def accept_chain(self, chain):
        return chain.id in self.chain_ids

# Set up the writer
io = MMCIFIO()
io.set_structure(structure)

# Save only S12 chains 
io.save("4v53_S12.cif", S12_ChainSelect(["CL"]))




In [31]:
from Bio.PDB import FastMMCIFParser

parser = FastMMCIFParser(QUIET=True)
structure1 = parser.get_structure('s1', '4v5f_EF-G_S12.cif')
structure2 = parser.get_structure('s2', '4v53_S12.cif')


model1 = structure1[0]
model2 = structure2[0]

chain1_id = ["CY", "CL"]  
chain2_id = "CL"

atoms1 = [res["CA"] 
          for cid in chain1_id 
          for res in model1[cid] 
          if "CA" in res]

atoms2 = [res['CA'] for res in model2[chain2_id] if 'CA' in res]

min_len = min(len(atoms1), len(atoms2))
atoms1 = atoms1[:min_len]
atoms2 = atoms2[:min_len]

# Superimpose
sup = Superimposer()
sup.set_atoms(atoms1, atoms2)
sup.apply(model2.get_atoms())
print("RMSD after superimposition:", sup.rms)

# Create new structure with 2 models
combined_structure = Structure.Structure("combined")

# Add first model as model 0
combined_structure.add(model1)

# Rename second model to 1 and add (Bio.PDB requires unique model ids)
model2.id = 1
combined_structure.add(model2)

# Save combined multi-model CIF
io = MMCIFIO()
io.set_structure(combined_structure)
io.save("alignedS12+EF-G.cif")

RMSD after superimposition: 20.623224232638357


In [34]:
# Rename overlapping chains in model2 to avoid coloring clashes
existing_chains = {chain.id for chain in model1}
for chain in model2:
    if chain.id in existing_chains:
        chain.id = chain.id + "_2"  # e.g., "CL" -> "CL_2"

# Create combined structure
combined_structure = Structure.Structure("combined")
combined_structure.add(model1)
model2.id = 1  # second model
combined_structure.add(model2)

# Save combined CIF
io = MMCIFIO()
io.set_structure(combined_structure)
io.save("alignedS12+EF-G_renamed.cif")

In [37]:
import nglview as nv
view = nv.show_file("alignedS12+EF-G_renamed.cif")
view.clear_representations()

#color by chain names
view.add_representation("cartoon", selection=":CY", color="pink")
view.add_representation("cartoon", selection=":CL", color="blue")
view.add_representation("cartoon", selection=":CL_2", color="silver")

from IPython.display import display
display(view)


NGLWidget()

In [60]:
from Bio.PDB import MMCIFParser, MMCIFIO, Superimposer, Structure

# Load the previously superimposed structure
parser = MMCIFParser(QUIET=True)
aligned_structure = parser.get_structure('aligned', 'alignedS12+EF-G_renamed.cif')
model2 = aligned_structure[0]  # first model (contains CL_2)

# Load the new structure
new_structure = parser.get_structure('4V53', '4V53.cif')
model_new = new_structure[0]  # first model of 4V53

# Select chains to use for superimposition
chain_old = "CL_2"  # chain from previously superimposed structure
chain_new = "CL"    # chain in 4V53

# Extract CA atoms for alignment
atoms_old = [res["CA"] for res in model2[chain_old] if "CA" in res]
atoms_new = [res["CA"] for res in model_new[chain_new] if "CA" in res]

# Match lengths
min_len = min(len(atoms_old), len(atoms_new))
atoms_old = atoms_old[:min_len]
atoms_new = atoms_new[:min_len]

# Superimpose
sup = Superimposer()
sup.set_atoms(atoms_old, atoms_new)
sup.apply(model_new.get_atoms())
print("RMSD after superimposition:", sup.rms)

# Rename chain in new model if needed to avoid duplicates
if chain_new in [c.id for c in model2]:
    for chain in model_new:
        if chain.id == chain_new:
            chain.id = chain_new + "_new"

# Assign unique model IDs before combining
model2.id = 0
model_new.id = 1

# Combine structures into one
combined_structure = Structure.Structure("super_aligned_2")
combined_structure.add(model2)
combined_structure.add(model_new)

# Save combined structure
io = MMCIFIO()
io.set_structure(combined_structure)
io.save("superimposed_combined.cif")


RMSD after superimposition: 0.0005234103095913419


In [71]:
import nglview as nv
from IPython.display import display
import matplotlib.pyplot as plt


# Load the CIF file directly
view = nv.show_file("superimposed_combined.cif")

# Clear default representations if you want custom colors
view.clear_representations()

# Add cartoon representations for chains
view.add_representation("cartoon", selection=":CY", color="pink")




# Parse the CIF file
parser = MMCIFParser()
structure = parser.get_structure("super", "superimposed_combined.cif")

# Extract chain IDs for model 1
model_index = 0  # Python index
chains_model1 = [chain.id for chain in structure[model_index]]

# Assign colors
colors = [plt.cm.tab20(i % 20) for i in range(len(chains_model1))]

# Load into NGLView directly
view = nv.show_file("superimposed_combined.cif")
view.clear_representations()

# Add cartoon representation for each chain with colors
for chain_id, color in zip(chains_model1, colors):
    view.add_representation("cartoon", selection=f":{chain_id}", color=color)

from IPython.display import display
display(view)


NGLWidget()

In [96]:
import nglview as nv
from Bio.PDB import MMCIFParser
from IPython.display import display

# Parse structure and identify all hetero atoms (potential ligands)
parser = MMCIFParser()
structure = parser.get_structure("super", "superimposed_combined.cif")

# Find all hetero atoms/ligands
ligands = []
for model in structure:
    for chain in model:
        for residue in chain:
            if residue.get_id()[0] != ' ':  # hetero atoms
                if residue.get_id()[0] not in ['W', 'H_']:  # exclude water and hydrogens
                    ligands.append((model.id, chain.id, residue.get_resname(), residue.get_id()))

print("Found ligands/hetero atoms:")
for model_id, chain_id, resname, res_id in ligands:
    print(f"Model {model_id}, Chain {chain_id}: {resname} {res_id}")



# Print detailed residue information for debugging
print("\nDetailed residue information:")
for model in structure:
    print(f"Model {model.id}:")
    for chain in model:
        print(f"  Chain {chain.id}:")
        hetero_residues = [res for res in chain if res.get_id()[0] != ' ']
        if hetero_residues:
            for res in hetero_residues:
                print(f"    {res.get_resname()} {res.get_id()}")
        else:
            print("    No hetero residues found")

Found ligands/hetero atoms:
Model 0, Chain CY: MG ('H_MG', 701, ' ')
Model 0, Chain CY: FUA ('H_FUA', 702, ' ')
Model 0, Chain CY: GDP ('H_GDP', 703, ' ')
Model 0, Chain AA: MG ('H_MG', 2001, ' ')
Model 0, Chain AA: MG ('H_MG', 2002, ' ')
Model 0, Chain AA: MG ('H_MG', 2003, ' ')
Model 0, Chain AA: MG ('H_MG', 2004, ' ')
Model 0, Chain AA: MG ('H_MG', 2005, ' ')
Model 0, Chain AA: MG ('H_MG', 2006, ' ')
Model 0, Chain AA: MG ('H_MG', 2007, ' ')
Model 0, Chain AA: MG ('H_MG', 2008, ' ')
Model 0, Chain AA: MG ('H_MG', 2009, ' ')
Model 0, Chain AA: MG ('H_MG', 2010, ' ')
Model 0, Chain AA: MG ('H_MG', 2011, ' ')
Model 0, Chain AA: MG ('H_MG', 2012, ' ')
Model 0, Chain AA: MG ('H_MG', 2013, ' ')
Model 0, Chain AA: MG ('H_MG', 2014, ' ')
Model 0, Chain AA: MG ('H_MG', 2015, ' ')
Model 0, Chain AA: MG ('H_MG', 2016, ' ')
Model 0, Chain AA: MG ('H_MG', 2017, ' ')
Model 0, Chain AA: MG ('H_MG', 2018, ' ')
Model 0, Chain AA: MG ('H_MG', 2019, ' ')
Model 0, Chain AA: MG ('H_MG', 2020, ' ')
Model

In [95]:
import nglview as nv
from Bio.PDB import MMCIFParser
from IPython.display import display

# Parse the CIF file to get chain information
parser = MMCIFParser()
structure = parser.get_structure("super", "superimposed_combined.cif")

# Get all chain IDs from both models
all_chains = []
for model in structure:
    for chain in model:
        all_chains.append(chain.id)

print("Available chains:", all_chains)

# Load the structure in NGLView
view = nv.show_file("superimposed_combined.cif")
view.clear_representations()

# Define colors for each chain (using NGLView compatible color names)
chain_colors = {
   'CY': 'pink', 'CL_new': 'green',
    # Add more chains with different colors as needed
    'AA': 'orange',
    'DB': 'green',
    'CA': 'purple',
    'BB': 'cyan',
    'BA': 'red',
    'BB': 'brown',
    'BL': 'gray',
    'DB': 'magenta'
}


# Add cartoon representations for protein chains
for chain_id in all_chains:
    if chain_id in chain_colors:
        color = chain_colors[chain_id]
    else:
        # Use a default color for unlisted chains
        color = 'green'
    
    view.add_representation("cartoon", 
                           selection=f":{chain_id} and polymer", 
                           color=color,
                           opacity=0.8)
# Highlight PHE:582 in chain CY as red stick
selection_string = "582:CY"
view.add_representation('ball+stick', selection=selection_string, color='red')
view.add_representation('label', sele=selection_string, labelType='format', labelFormat='%(resname)s%(resno)s', color='red', xOffset=1, fixedSize=True)

print("Added PHE:582 in chain CY as green stick")

# Highlight gentamicin (LLL) in red
# Try different possible selections for the ligand
gentamicin_selections = [
    "LLL",  # residue name
    "[LLL]",  # alternative syntax
    "hetero and not water",  # all hetero atoms except water
    "ligand"  # generic ligand selection
]

for selection in gentamicin_selections:
    try:
        view.add_representation("ball+stick", 
                               selection=selection, 
                               color="red",
                               radius=0.5)
        print(f"Added gentamicin representation with selection: {selection}")
        break
    except:
        continue

#Optional: Add surface representation for better visualization
#view.add_representation("surface", 
#                       selection="protein", 
#                        opacity=0.3,
#                        color="white")

# Set background and camera
view.background = "white"
view.camera = "perspective"

# Add labels (optional)
view.add_label(text="Gentamicin", selection="LLL", color="black", size=1)

#RNA visualisation
#view.add_representation("tube", selection="nucleic", color="orange", opacity=0.8)


display(view)

# Print information about the structure
print("\nStructure information:")
for i, model in enumerate(structure):
    print(f"Model {i}:")
    for chain in model:
        residue_count = len([res for res in chain if res.get_id()[0] == ' '])  # protein residues
        hetero_count = len([res for res in chain if res.get_id()[0] != ' '])   # hetero atoms
        print(f"  Chain {chain.id}: {residue_count} protein residues, {hetero_count} hetero atoms")

Available chains: ['CL', 'CY', 'CL_2', 'AA', 'AC', 'AD', 'AE', 'AF', 'AG', 'AH', 'AI', 'AJ', 'AK', 'AL', 'AM', 'AN', 'AO', 'AP', 'AQ', 'AR', 'AS', 'AT', 'AB', 'AU', 'BA', 'BB', 'BI', 'BC', 'BD', 'BK', 'BP', 'BE', 'BY', 'B0', 'B4', 'B1', 'B3', 'BV', 'B2', 'BL', 'BM', 'BX', 'BH', 'BJ', 'BN', 'BO', 'BQ', 'BS', 'BU', 'BF', 'BG', 'BR', 'BT', 'BZ', 'BW', 'CA', 'CC', 'CD', 'CE', 'CF', 'CG', 'CH', 'CI', 'CJ', 'CK', 'CL_new', 'CM', 'CN', 'CO', 'CP', 'CQ', 'CR', 'CS', 'CT', 'CB', 'CU', 'DA', 'DB', 'DI', 'DC', 'DD', 'DK', 'DP', 'DE', 'DY', 'D0', 'D4', 'D1', 'D3', 'DV', 'D2', 'DL', 'DM', 'DX', 'DH', 'DJ', 'DN', 'DO', 'DQ', 'DS', 'DU', 'DF', 'DG', 'DR', 'DT', 'DZ', 'DW']
Added PHE:582 in chain CY as green stick
Added gentamicin representation with selection: LLL


NGLWidget()


Structure information:
Model 0:
  Chain CL: 125 protein residues, 0 hetero atoms
  Chain CY: 667 protein residues, 3 hetero atoms
  Chain CL_2: 123 protein residues, 2 hetero atoms
  Chain AA: 1530 protein residues, 355 hetero atoms
  Chain AC: 206 protein residues, 0 hetero atoms
  Chain AD: 205 protein residues, 0 hetero atoms
  Chain AE: 150 protein residues, 1 hetero atoms
  Chain AF: 100 protein residues, 0 hetero atoms
  Chain AG: 150 protein residues, 0 hetero atoms
  Chain AH: 129 protein residues, 0 hetero atoms
  Chain AI: 127 protein residues, 0 hetero atoms
  Chain AJ: 98 protein residues, 0 hetero atoms
  Chain AK: 117 protein residues, 1 hetero atoms
  Chain AL: 123 protein residues, 2 hetero atoms
  Chain AM: 114 protein residues, 0 hetero atoms
  Chain AN: 96 protein residues, 2 hetero atoms
  Chain AO: 88 protein residues, 0 hetero atoms
  Chain AP: 82 protein residues, 0 hetero atoms
  Chain AQ: 80 protein residues, 0 hetero atoms
  Chain AR: 55 protein residues, 0 h