In [None]:
# Step 1: Install Necessary Libraries
!pip install py3Dmol biopython scikit-learn

# Step 2: Download Multiple PDB Files
pdb_ids = ['1A2B', '4HHB', '1C1D']  # Add more PDB IDs as needed

# Download PDB files
for pdb_id in pdb_ids:
    !wget https://files.rcsb.org/download/{pdb_id}.pdb

# Step 3: Visualize Multiple PDB Files
import py3Dmol

def visualize_multiple_pdb(pdb_filenames):
    for pdb_filename in pdb_filenames:
        with open(pdb_filename, 'r') as pdb_file:
            pdb_data = pdb_file.read()

        view = py3Dmol.view(width=800, height=600)
        view.addModel(pdb_data, 'pdb')  # Load PDB data into viewer
        view.setStyle({'cartoon': {'color': 'spectrum'}})  # Use cartoon style
        view.zoomTo()  # Zoom to the structure
        view.show()  # Display the 3D view

# Visualize the downloaded PDB files
visualize_multiple_pdb([f'{pdb_id}.pdb' for pdb_id in pdb_ids])

# Step 4: Extract Features from PDB Files
from Bio import PDB

def extract_features(pdb_file):
    parser = PDB.PDBParser()
    structure = parser.get_structure('protein', pdb_file)

    features = {}
    molecular_weight = 0.0
    disulfide_bonds = 0

    for model in structure:
        for chain in model:
            for residue in chain:
                # Calculate molecular weight based on the atoms
                for atom in residue:
                    molecular_weight += atom.mass

                # Check for disulfide bonds (CYS residues)
                if residue.get_resname() == 'CYS':
                    disulfide_bonds += 1

    features['Molecular Weight'] = molecular_weight
    features['Disulfide Bonds'] = disulfide_bonds // 2  # Two cysteines form one bond
    return features

# Extract features for all downloaded PDB files
all_features = []
for pdb_id in pdb_ids:
    features = extract_features(f'{pdb_id}.pdb')
    features['PDB ID'] = pdb_id  # Store PDB ID
    all_features.append(features)

# Create a DataFrame from the features
import pandas as pd

df = pd.DataFrame(all_features)
print("Extracted Features:")
print(df)

# Step 5: Train and Evaluate the Model
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Simulate labels for demonstration
df['Label'] = np.random.choice([0, 1], size=len(df))  # Replace with actual labels

# Split the dataset into features and labels
X = df[['Molecular Weight', 'Disulfide Bonds']]
y = df['Label']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Collecting py3Dmol
  Downloading py3Dmol-2.4.0-py2.py3-none-any.whl.metadata (1.9 kB)
Downloading py3Dmol-2.4.0-py2.py3-none-any.whl (7.0 kB)
Installing collected packages: py3Dmol
Successfully installed py3Dmol-2.4.0
--2024-10-01 08:05:05--  https://files.rcsb.org/download/1A2B.pdb
Resolving files.rcsb.org (files.rcsb.org)... 128.6.159.157
Connecting to files.rcsb.org (files.rcsb.org)|128.6.159.157|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘1A2B.pdb.1’

1A2B.pdb.1              [   <=>              ] 150.13K   264KB/s    in 0.6s    

2024-10-01 08:05:06 (264 KB/s) - ‘1A2B.pdb.1’ saved [153738]

--2024-10-01 08:05:06--  https://files.rcsb.org/download/4HHB.pdb
Resolving files.rcsb.org (files.rcsb.org)... 128.6.159.157
Connecting to files.rcsb.org (files.rcsb.org)|128.6.159.157|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘4HHB.p



Extracted Features:
   Molecular Weight  Disulfide Bonds PDB ID
0      19933.087783                2   1A2B
1      63506.325122                3   4HHB
2      83347.481345                3   1C1D
Model Accuracy: 1.00


In [None]:
# Step 1: Install Necessary Libraries
!pip install py3Dmol biopython scikit-learn




In [None]:
# Step 2: Download Multiple PDB Files
pdb_ids = ['1A2B', '4HHB', '1C1D']  # Add more PDB IDs as needed

# Download PDB files
for pdb_id in pdb_ids:
    !wget https://files.rcsb.org/download/{pdb_id}.pdb


--2024-10-01 08:06:31--  https://files.rcsb.org/download/1A2B.pdb
Resolving files.rcsb.org (files.rcsb.org)... 128.6.159.245
Connecting to files.rcsb.org (files.rcsb.org)|128.6.159.245|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘1A2B.pdb.2’

1A2B.pdb.2              [  <=>               ] 150.13K   282KB/s    in 0.5s    

2024-10-01 08:06:32 (282 KB/s) - ‘1A2B.pdb.2’ saved [153738]

--2024-10-01 08:06:32--  https://files.rcsb.org/download/4HHB.pdb
Resolving files.rcsb.org (files.rcsb.org)... 128.6.159.245
Connecting to files.rcsb.org (files.rcsb.org)|128.6.159.245|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘4HHB.pdb.2’

4HHB.pdb.2              [   <=>              ] 462.67K   521KB/s    in 0.9s    

2024-10-01 08:06:34 (521 KB/s) - ‘4HHB.pdb.2’ saved [473769]

--2024-10-01 08:06:34--  https://files.rcsb.org/download/1C1D.pdb
Re

In [None]:
# Step 3: Visualize Multiple PDB Files
import py3Dmol

def visualize_multiple_pdb(pdb_filenames):
    for pdb_filename in pdb_filenames:
        with open(pdb_filename, 'r') as pdb_file:
            pdb_data = pdb_file.read()

        view = py3Dmol.view(width=800, height=600)
        view.addModel(pdb_data, 'pdb')  # Load PDB data into viewer
        view.setStyle({'cartoon': {'color': 'spectrum'}})  # Use cartoon style
        view.zoomTo()  # Zoom to the structure
        view.show()  # Display the 3D view

# Visualize the downloaded PDB files
visualize_multiple_pdb([f'{pdb_id}.pdb' for pdb_id in pdb_ids])


In [None]:
# Step 4: Extract Features from PDB Files
from Bio import PDB

def extract_features(pdb_file):
    parser = PDB.PDBParser()
    structure = parser.get_structure('protein', pdb_file)

    features = {}
    molecular_weight = 0.0
    disulfide_bonds = 0

    for model in structure:
        for chain in model:
            for residue in chain:
                # Calculate molecular weight based on the atoms
                for atom in residue:
                    molecular_weight += atom.mass

                # Check for disulfide bonds (CYS residues)
                if residue.get_resname() == 'CYS':
                    disulfide_bonds += 1

    features['Molecular Weight'] = molecular_weight
    features['Disulfide Bonds'] = disulfide_bonds // 2  # Two cysteines form one bond
    return features

# Extract features for all downloaded PDB files
all_features = []
for pdb_id in pdb_ids:
    features = extract_features(f'{pdb_id}.pdb')
    features['PDB ID'] = pdb_id  # Store PDB ID
    all_features.append(features)

# Create a DataFrame from the features
import pandas as pd

df = pd.DataFrame(all_features)
print("Extracted Features:")
print(df)


Extracted Features:
   Molecular Weight  Disulfide Bonds PDB ID
0      19933.087783                2   1A2B
1      63506.325122                3   4HHB
2      83347.481345                3   1C1D




In [None]:
# Step 5: Train and Evaluate the Model
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Simulate labels for demonstration
df['Label'] = np.random.choice([0, 1], size=len(df))  # Replace with actual labels

# Split the dataset into features and labels
X = df[['Molecular Weight', 'Disulfide Bonds']]
y = df['Label']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 1.00
