##

# Structure Vizualization


In [2]:
!pip install py3Dmol

Collecting py3Dmol
  Downloading py3Dmol-2.0.4-py2.py3-none-any.whl (12 kB)
Installing collected packages: py3Dmol
Successfully installed py3Dmol-2.0.4


In [3]:
import py3Dmol
import pandas as pd # for reading csvs
import ast # for parsing strings to literals
import unittest

## Enter File Name, PDB ID, and AbAg index of Protein of Interest Below
Enter PDB ID, and filename of csv, formatted as exported by process_AbAg.py. If the protein of interest has multiple associated Ab-like and Ag-like pairs, enter the index of the pair you'd like to visulize, starting with zero.

In [7]:
pdb_id = '5kbs'
AbAg_pair_index = 0 # if multiple pairs present for PDB_id
csv_file_path = 'regions_for_vis.csv' # ensure that this represents the correct location of the csv
# IF RUNNING FROM SRC DIRECTORY, uncomment the following lines to access csvs in the data direcotry
#import os
#current_dir = os.getcwd()
# Navigate to the 'data' directory relative to the current directory
#data_dir = os.path.join(current_dir, '..', 'data')  # '..' refers to the parent directory
# Construct the path to your CSV file
#csv_file_path = os.path.join(data_dir, 'regions_for_vis.csv')

## Run Below Code Block to Define Functions

In [5]:
def read_csv(filename, pdb_id, AbAg_index=0):
    try:
        # Read CSV file into a DataFrame
        AbAg_df = pd.read_csv(filename)

        # Check if 'pdb_id' column exists in the DataFrame
        if 'pdb_id' not in AbAg_df.columns:
            raise ValueError("CSV file does not contain 'pdb_id' column.")

        # Filter rows based on the provided pdb_id
        AbAg_filtered_df = AbAg_df[AbAg_df['pdb_id'] == pdb_id]

        # Check if the filtered DataFrame is empty
        if AbAg_filtered_df.empty:
            return None, None, None, None

        # Check for necessary columns in the filtered DataFrame
        necessary_columns = ['ab-like integers', 'ab-like chains', 'ag-like integers', 'ag-like chains']
        missing_columns = [col for col in necessary_columns if col not in AbAg_filtered_df.columns]

        if missing_columns:
            missing_cols_str = ', '.join(missing_columns)
            raise ValueError(f"CSV file missing necessary columns: {missing_cols_str}")

        # Check if AbAg_index is within bounds
        if AbAg_index >= len(AbAg_filtered_df):
            raise ValueError("AbAg_index out of bounds.")

        # Extract Ab-like and Ag-like indices and chain letters based on the AbAg_index
        ab_indices = AbAg_filtered_df.iloc[AbAg_index]['ab-like integers']
        ab_chains = AbAg_filtered_df.iloc[AbAg_index]['ab-like chains']

        ag_indices = AbAg_filtered_df.iloc[AbAg_index]['ag-like integers']
        ag_chains = AbAg_filtered_df.iloc[AbAg_index]['ag-like chains']

        return ab_indices, ab_chains, ag_indices, ag_chains

    except FileNotFoundError as fnf_err:
        raise FileNotFoundError(f"File '{filename}' not found.") from fnf_err
    except ValueError as ve:
        raise ve  # Re-raise the caught ValueError
    except Exception as e:
        raise e  # Re-raise other exceptions for debugging



def string_literal_to_list(string_literal):
    """
    Converts a string literal representing a list to a list of elements. Turns the string "['A','A','A','A','A']"
    into the list ['A','A','A','A','A']. Turns "[1,2,3]" to list containing the elements 1, 2 and 3.

    Parameters:
    - string_literal (str): String literal representing a list.

    Returns:
    - elements_list (list): List of elements.

    """
    try:
        # Parse the string literal into a Python object
        elements_list = ast.literal_eval(string_literal)
        return elements_list
    except (SyntaxError, ValueError):
        # Handle the case where the string is not a valid literal
        return None

def visualize_AbAg_regions(pdb_id, ab_chains, ab_indices, ag_chains, ag_indices):
    """
    Vizualize the protein
    ab_chains - (list of chars) representing the chains of each index of each residue in the Ab-like region
    ab_indices - (list of integers) representing the indicies of each residue in the Ab-like region
    ag_chains - (list of chars) representing the chains of each index of each residue in the Ag-like region
    ag_indices - (list of integers) representing the indicies of each residue in the Ag-like region
    """
    if not len(ab_chains) == len(ab_indices) or not len(ag_indices) == len(ag_chains):
        raise ValueError("Invalid input: Lengths of chains and indices do not match.")

    try:
        view = py3Dmol.view(query='pdb:'+ pdb_id)
    except Exception as e:
        print("Error retrieving PDB data:", e)
        return

    try:
        view.setStyle({'model': -1},{'cartoon': {'color': '#C7FFEE'}})
        view.addStyle({'chain': ab_chains, 'resi': ab_indices},{'cartoon': {'color': 'magenta'}})
        view.addStyle({'chain': ag_chains, 'resi': ag_indices},{'cartoon': {'color': 'blue'}})
    except Exception as e:
        print("Error setting style:", e)
        return

    view.show()

## Run Below Function Calls to Produce the Visualization

In [8]:
ab_indices_string, ab_chains_string, ag_indices_string, ag_chains_string = read_csv(csv_file_path, pdb_id, AbAg_pair_index)

ab_indices = string_literal_to_list(ab_indices_string)
ag_indices = string_literal_to_list(ag_indices_string)
ab_chains = string_literal_to_list(ab_chains_string)
ag_chains = string_literal_to_list(ag_chains_string)

visualize_AbAg_regions(pdb_id, ab_chains, ab_indices, ag_chains, ag_indices)

## Run below to test code
Ensure that the test data files: regions_for_vis.csv and vis_test_bad.csv are present in the data subdirectory.

In [11]:
def main():
    class TestReadCSVFunction(unittest.TestCase):
        def test_read_csv_with_valid_input(self):
            # Provide the test data file
            pdb_id = '5kbs'
            ab_indices, ab_chains, ag_indices, ag_chains = read_csv(csv_file_path, pdb_id)
            expected_ab_indices = '[305, 306, 307, 308]'
            expected_ab_chains = "['C', 'C', 'C', 'C']"
            expected_ag_indices = '[303, 304, 325, 71, 299, 323, 328, 329, 309]'
            expected_ag_chains = "['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C']"
            self.assertEqual(ab_indices, expected_ab_indices)
            self.assertEqual(ab_chains, expected_ab_chains)
            self.assertEqual(ag_indices, expected_ag_indices)
            self.assertEqual(ag_chains, expected_ag_chains)

        def test_read_csv_with_missing_pdb_id_column(self):
            # Test when the CSV file doesn't contain 'pdb_id' column
            valid_pdb_id = '5kbs'
            with self.assertRaises(ValueError):
                read_csv('vis_test_bad.csv', valid_pdb_id)

        def test_read_csv_with_missing_columns(self):
            # Test when the CSV file is missing necessary columns
            valid_pdb_id = '5kbs'
            with self.assertRaises(ValueError):
                read_csv('vis_test_bad.csv', valid_pdb_id)

        def test_read_csv_with_invalid_index(self):
            # Test when AbAg_index is out of bounds
            valid_pdb_id = '5kbs' # only one match for this pdb_id
            # Use try-except to capture the exception and print additional information
            try:
                read_csv('csv_file_path.csv', valid_pdb_id, AbAg_index=1)
            except ValueError as e:
                print("Caught ValueError:", e)
                raise e  # Re-raise the exception to see the traceback in the test output

        def test_read_csv_with_file_not_found(self):
            # Test when the CSV file is not found
            valid_pdb_id = '5kbs'
            with self.assertRaises(FileNotFoundError):
                read_csv('doesnt_exist.csv', valid_pdb_id)

    class TestStringLiteralToListFunction(unittest.TestCase):
        def test_string_literal_to_list_with_valid_input(self):
            valid_string_literal_1 = "['a', 'b', 'c']"
            expected_output_1 = ['a', 'b', 'c']
            valid_string_literal_2 = "[1, 2, 3]"
            expected_output_2 = [1, 2, 3]

            output_1 = string_literal_to_list(valid_string_literal_1)
            output_2 = string_literal_to_list(valid_string_literal_2)

            self.assertEqual(output_1, expected_output_1)
            self.assertEqual(output_2, expected_output_2)

        def test_string_literal_to_list_with_invalid_input(self):
            # Test with invalid string literals
            invalid_string_literal = "'a', 'b']"
            output = string_literal_to_list(invalid_string_literal)
            self.assertIsNone(output)


if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False) # Necessary to execute in ipynb


----------------------------------------------------------------------
Ran 0 tests in 0.000s

OK
