## Define visualization function

In [6]:
import pandas as pd
import py3Dmol
import os


# Function to visualize a PDB file
def visualize_pdb(pdb_file, n_term_coord, c_term_coord):
    view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js', width=800, height=400)
    view.addModel(open(pdb_file, 'r').read(), 'pdb')
    
    # Set N and C terminal spheres
    view.addSphere({'center': {'x': n_term_coord[0], 'y': n_term_coord[1], 'z': n_term_coord[2]}, 'radius': 1.0, 'color':'red'})
    view.addSphere({'center': {'x': c_term_coord[0], 'y': c_term_coord[1], 'z': c_term_coord[2]}, 'radius': 1.0, 'color':'blue'})

    view.setStyle({'model': -1}, {"cartoon": {'color': 'spectrum'}})
    view.zoomTo()
    view.show()


## Show 5 shortest distance models with Long Linkers

In [7]:
import re

# Load DataFrame from CSV
df = pd.read_csv('sameEnds/long_Linker_distances.csv')

# Sort by distance and select top 5
df = df.sort_values('Distance').head(5)

# Iterate over top 10 shortest distances and visualize each one
for _, row in df.iterrows():
    sequence = row['File Name'].rstrip('.pdb')

    print("Sequence", sequence)
    
    fragments = re.split('GSGTGSG', sequence)
    print("Epitopes used: ", [item for item in fragments if item])

    print("Distance: " + str(row['Distance']))

    pdb_file = os.path.join("sameEnds/longLinkers/", row['File Name'])  # Replace with the actual directory path
    n_term_coord = eval(row['N-Term Coords'])
    c_term_coord = eval(row['C-Term Coords'])
    visualize_pdb(pdb_file, n_term_coord, c_term_coord)


Sequence GSGTGSGTEIYQAGSTPCNGVEGFGSGTGSGPSKPSKRSFIEDLLFNKVGSGTGSGDKYFKNHTSPDVDLGSGTGSG
Epitopes used:  ['TEIYQAGSTPCNGVEGF', 'PSKPSKRSFIEDLLFNKV', 'DKYFKNHTSPDVDL']
Distance: 3.5468106


Sequence GSGTGSGLFRKSNLKPFERDISTEGSGTGSGKRSFIEDLLFNKGSGTGSGDKYFKNHTSPDVDLGSGTGSG
Epitopes used:  ['LFRKSNLKPFERDISTE', 'KRSFIEDLLFNK', 'DKYFKNHTSPDVDL']
Distance: 3.980982


Sequence GSGTGSGTEIYQAGSTPCNGVEGFNCYFGSGTGSGDKYFKNHTSPDVDLGSGTGSGPSKPSKRSFIEDLLFNKVTLADAGFGSGTGSG
Epitopes used:  ['TEIYQAGSTPCNGVEGFNCYF', 'DKYFKNHTSPDVDL', 'PSKPSKRSFIEDLLFNKVTLADAGF']
Distance: 4.1293316


Sequence GSGTGSGLFRKSNLKPFERDISTEGSGTGSGDKYFKNHTSPDVDLGSGTGSGPSKRSFIEDLLFNKVGSGTGSG
Epitopes used:  ['LFRKSNLKPFERDISTE', 'DKYFKNHTSPDVDL', 'PSKRSFIEDLLFNKV']
Distance: 4.5969195


Sequence GSGTGSGTEIYQAGSTPCNGVEGFGSGTGSGDKYFKNHTSPDVDLGSGTGSGPSKPSKRSFIEDLLFGSGTGSG
Epitopes used:  ['TEIYQAGSTPCNGVEGF', 'DKYFKNHTSPDVDL', 'PSKPSKRSFIEDLLF']
Distance: 4.616605


## Show 5 shortest distance models with Short Linkers

In [8]:
# Load DataFrame from CSV
df = pd.read_csv('sameEnds/short_Linker_distances.csv')

# Sort by distance and select top 5
df = df.sort_values('Distance').head(5)

# Iterate over top 10 shortest distances and visualize each one
for _, row in df.iterrows():
    sequence = row['File Name'].rstrip('.pdb')

    print("Sequence", sequence)
    
    fragments = re.split('GSG', sequence)
    print("Epitopes used: ", [item for item in fragments if item])

    print("Distance: " + str(row['Distance']))

    pdb_file = os.path.join("sameEnds/shortLinkers", row['File Name'])  # Replace with the actual directory path
    n_term_coord = eval(row['N-Term Coords'])
    c_term_coord = eval(row['C-Term Coords'])
    visualize_pdb(pdb_file, n_term_coord, c_term_coord)

Sequence GSGTEIYQAGSTPCNGVEGFGSGPSKPSKRSFIEDLLFNKVGSGDKYFKNHTSPDVDLGSG
Epitopes used:  ['TEIYQAGSTPCNGVEGF', 'PSKPSKRSFIEDLLFNKV', 'DKYFKNHTSPDVDL']
Distance: 3.5468106


Sequence GSGLFRKSNLKPFERDISTEGSGKRSFIEDLLFNKGSGDKYFKNHTSPDVDLGSG
Epitopes used:  ['LFRKSNLKPFERDISTE', 'KRSFIEDLLFNK', 'DKYFKNHTSPDVDL']
Distance: 3.980982


Sequence GSGTEIYQAGSTPCNGVEGFNCYFGSGDKYFKNHTSPDVDLGSGPSKPSKRSFIEDLLFNKVTLADAGFGSG
Epitopes used:  ['TEIYQAGSTPCNGVEGFNCYF', 'DKYFKNHTSPDVDL', 'PSKPSKRSFIEDLLFNKVTLADAGF']
Distance: 4.1293316


Sequence GSGLFRKSNLKPFERDISTEGSGDKYFKNHTSPDVDLGSGPSKRSFIEDLLFNKVGSG
Epitopes used:  ['LFRKSNLKPFERDISTE', 'DKYFKNHTSPDVDL', 'PSKRSFIEDLLFNKV']
Distance: 4.5969195


Sequence GSGTEIYQAGSTPCNGVEGFGSGDKYFKNHTSPDVDLGSGPSKPSKRSFIEDLLFGSG
Epitopes used:  ['TEIYQAGSTPCNGVEGF', 'DKYFKNHTSPDVDL', 'PSKPSKRSFIEDLLF']
Distance: 4.616605


## Show 5 **longest** distance models with Long Linkers

In [9]:
import re

# Load DataFrame from CSV
df = pd.read_csv('sameEnds/long_Linker_distances.csv')

# Sort by distance and select top 5
df = df.sort_values('Distance').tail(5)

# Iterate over top 10 shortest distances and visualize each one
for _, row in df.iterrows():
    sequence = row['File Name'].rstrip('.pdb')

    print("Sequence", sequence)
    
    fragments = re.split('GSGTGSG', sequence)
    print("Epitopes used: ", [item for item in fragments if item])

    print("Distance: " + str(row['Distance']))

    pdb_file = os.path.join("sameEnds/longLinkers", row['File Name'])  # Replace with the actual directory path
    n_term_coord = eval(row['N-Term Coords'])
    c_term_coord = eval(row['C-Term Coords'])
    # visualize_pdb(pdb_file, n_term_coord, c_term_coord)


Sequence GSGTGSGLDSFKEELDKYFGSGTGSGPSKPSKRSFIEDLLFGSGTGSGLFRKSNLKPFERDISTEGSGTGSG
Epitopes used:  ['LDSFKEELDKYF', 'PSKPSKRSFIEDLLF', 'LFRKSNLKPFERDISTE']
Distance: 53.178673
Sequence GSGTGSGPSKPSKRSFIEDLLFNKVTLADAGFGSGTGSGKEELDKYFKNHTSPDVDGSGTGSGIYQAGSTPCNGVEGFNCYFPLQSYGSGTGSG
Epitopes used:  ['PSKPSKRSFIEDLLFNKVTLADAGF', 'KEELDKYFKNHTSPDVD', 'IYQAGSTPCNGVEGFNCYFPLQSY']
Distance: 53.861996
Sequence GSGTGSGPSKPSKRSFIEDLLFNKVGSGTGSGKEELDKYFKNHTSPDVDGSGTGSGIYQAGSTPCNGVEGFNCYFPLQSYGSGTGSG
Epitopes used:  ['PSKPSKRSFIEDLLFNKV', 'KEELDKYFKNHTSPDVD', 'IYQAGSTPCNGVEGFNCYFPLQSY']
Distance: 54.07787
Sequence GSGTGSGIYQAGSTPCNGVEGFNCYFPLQSYGSGTGSGPSKPSKRSFIEDLLFNKVGSGTGSGSFKEELDKYFGSGTGSG
Epitopes used:  ['IYQAGSTPCNGVEGFNCYFPLQSY', 'PSKPSKRSFIEDLLFNKV', 'SFKEELDKYF']
Distance: 54.164852
Sequence GSGTGSGDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLGSGTGSGKRSFIEDLLFNKGSGTGSGQPELDSFKEELDKYFKNHTSPGSGTGSG
Epitopes used:  ['DISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVL', 'KRSFIEDLLFNK', 'QPELDSFK

## Show 5 **Longest** distance models with Short Linkers

In [10]:
# Load DataFrame from CSV
df = pd.read_csv('sameEnds/short_Linker_distances.csv')

# Sort by distance and select top 5
df = df.sort_values('Distance').tail(5)

# Iterate over top 10 shortest distances and visualize each one
for _, row in df.iterrows():
    sequence = row['File Name'].rstrip('.pdb')

    print("Sequence", sequence)
    
    fragments = re.split('GSG', sequence)
    print("Epitopes used: ", [item for item in fragments if item])

    print("Distance: " + str(row['Distance']))

    pdb_file = os.path.join("sameEnds/shortLinkers", row['File Name'])  # Replace with the actual directory path
    n_term_coord = eval(row['N-Term Coords'])
    c_term_coord = eval(row['C-Term Coords'])
    # visualize_pdb(pdb_file, n_term_coord, c_term_coord)

Sequence GSGLDSFKEELDKYFGSGPSKPSKRSFIEDLLFGSGLFRKSNLKPFERDISTEGSG
Epitopes used:  ['LDSFKEELDKYF', 'PSKPSKRSFIEDLLF', 'LFRKSNLKPFERDISTE']
Distance: 53.178673
Sequence GSGPSKPSKRSFIEDLLFNKVTLADAGFGSGKEELDKYFKNHTSPDVDGSGIYQAGSTPCNGVEGFNCYFPLQSYGSG
Epitopes used:  ['PSKPSKRSFIEDLLFNKVTLADAGF', 'KEELDKYFKNHTSPDVD', 'IYQAGSTPCNGVEGFNCYFPLQSY']
Distance: 53.861996
Sequence GSGPSKPSKRSFIEDLLFNKVGSGKEELDKYFKNHTSPDVDGSGIYQAGSTPCNGVEGFNCYFPLQSYGSG
Epitopes used:  ['PSKPSKRSFIEDLLFNKV', 'KEELDKYFKNHTSPDVD', 'IYQAGSTPCNGVEGFNCYFPLQSY']
Distance: 54.07787
Sequence GSGIYQAGSTPCNGVEGFNCYFPLQSYGSGPSKPSKRSFIEDLLFNKVGSGSFKEELDKYFGSG
Epitopes used:  ['IYQAGSTPCNGVEGFNCYFPLQSY', 'PSKPSKRSFIEDLLFNKV', 'SFKEELDKYF']
Distance: 54.164852
Sequence GSGDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLGSGKRSFIEDLLFNKGSGQPELDSFKEELDKYFKNHTSPGSG
Epitopes used:  ['DISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVL', 'KRSFIEDLLFNK', 'QPELDSFKEELDKYFKNHTSP']
Distance: 56.092518
