# Protein Structure Visualization and Analysis

This notebook demonstrates how to visualize protein structures using `py3Dmol`, highlight specific residues, and analyze sequence embeddings. It integrates data processing with pandas and interactive 3D molecular visualization, enabling exploration of structural features and residue-specific properties within protein datasets.

In [None]:
import py3Dmol

def view_custom_pdb(pdb_file, residue_num, chain='A'):
    """
    Visualize a PDB structure from a file, highlight a specific residue with thicker sticks,
    show the backbone for the selected residue, and add an arrow-like indicator.
    
    Parameters:
    - pdb_file: str, path to the local PDB file.
    - residue_num: int, residue number to highlight.
    - chain: str, the chain identifier (default 'A').
    """
    # Read the PDB file content
    with open(pdb_file, 'r') as f:
        pdb_content = f.read()

    # Initialize py3Dmol viewer
    viewer = py3Dmol.view(width=800, height=600)
    
    # Add the PDB structure to the viewer
    viewer.addModel(pdb_content, 'pdb')
    
    # Apply cartoon style for the overall structure
    viewer.setStyle({'cartoon': {'color': 'cyan'}})
    
    # Highlight the specified residue by number and chain with larger sticks and backbone
    viewer.setStyle({'chain': chain, 'resi': str(residue_num)}, 
                    {'stick': {'colorscheme': 'orangeCarbon', 'radius': 0.6},  # Larger sticks
                     'cartoon': {'color': 'orange', 'style': 'trace'}})  # Backbone trace
    
    # Add molecular surface for the rest of the structure
    viewer.addSurface(py3Dmol.VDW, {'opacity': 0.5})
    
    # Add an arrow-like indicator near the selected residue
    viewer.addLabel(f'Residue {residue_num}', 
                    {'position': {'resi': residue_num, 'chain': chain}, 
                     'backgroundColor': 'white', 'fontColor': 'black', 'fontSize': 16,
                     'alignment': 'bottomCenter'})
    
    # Set zoom and background color
    viewer.zoomTo()
    viewer.setBackgroundColor('white')
    
    # Show the structure
    return viewer.show()



In [33]:
print( df.pdbfile.values)

['/home/dmoi/datasets/afdbclusters/structfams/A0A011N458//structs/G6F3X8.pdb'
 '/home/dmoi/datasets/afdbclusters/structfams/A0A011N458//structs/A0A0H3KU55.pdb'
 '/home/dmoi/datasets/afdbclusters/structfams/A0A011N458//structs/A9HPZ2.pdb'
 ...
 '/home/dmoi/datasets/afdbclusters/structfams/A0A010R7U6//structs/A0A2G5F175.pdb'
 '/home/dmoi/datasets/afdbclusters/structfams/A0A010R7U6//structs/A0A453NZA8.pdb'
 '/home/dmoi/datasets/afdbclusters/structfams/A0A010R7U6//structs/A0A1U8J7P2.pdb']


In [38]:
#select a few chars from embedding and visualize them

inputchars = """0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ! " # $ % & ' ( ) * + , / : ; < = > @ [ \ ] ^ _ { | } ~""".split()
selected = inputchars[10]
print(selected)

#find selected residues in sequence
df['selected'] = df['encoded_aln_foldmason'].apply(lambda x: x.find(selected))
maxcount = 7
found = df[df['selected'] != -1].sample(maxcount)
count = 0
for i, row in found.iterrows():
    count += 1
    view_custom_pdb(row['pdbfile'], row['selected'], chain='A')

A


  inputchars = """0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ! " # $ % & ' ( ) * + , / : ; < = > @ [ \ ] ^ _ { | } ~""".split()
