In [None]:
# Install necessary libraries
!pip install py3Dmol biopython scikit-learn


Collecting py3Dmol
  Downloading py3Dmol-2.4.0-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading py3Dmol-2.4.0-py2.py3-none-any.whl (7.0 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: py3Dmol, biopython
Successfully installed biopython-1.84 py3Dmol-2.4.0


In [None]:
# Download a PDB file from the RCSB Protein Data Bank
!wget https://files.rcsb.org/download/1A2B.pdb


--2024-10-01 08:10:14--  https://files.rcsb.org/download/1A2B.pdb
Resolving files.rcsb.org (files.rcsb.org)... 128.6.159.100
Connecting to files.rcsb.org (files.rcsb.org)|128.6.159.100|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘1A2B.pdb’

1A2B.pdb                [ <=>                ] 150.13K  --.-KB/s    in 0.07s   

2024-10-01 08:10:14 (2.03 MB/s) - ‘1A2B.pdb’ saved [153738]



In [None]:
import py3Dmol

def visualize_pdb(pdb_filename):
    with open(pdb_filename, 'r') as pdb_file:
        pdb_data = pdb_file.read()

    view = py3Dmol.view(width=800, height=600)
    view.addModel(pdb_data, 'pdb')  # Load PDB data into viewer
    view.setStyle({'cartoon': {'color': 'spectrum'}})  # Use cartoon style
    view.zoomTo()  # Zoom to the structure
    view.show()  # Display the 3D view

# Visualize the downloaded PDB file
visualize_pdb('1A2B.pdb')


In [None]:
import py3Dmol

def visualize_pdb(pdb_filename):
    with open(pdb_filename, 'r') as pdb_file:
        pdb_data = pdb_file.read()

    view = py3Dmol.view(width=800, height=600)
    view.addModel(pdb_data, 'pdb')  # Load PDB data into viewer
    view.setStyle({'cartoon': {'color': 'spectrum'}})  # Use cartoon style
    view.zoomTo()  # Zoom to the structure
    view.show()  # Display the 3D view

# Visualize the downloaded PDB file
visualize_pdb('1A2B.pdb')


In [None]:
from Bio import PDB

def extract_features(pdb_file):
    parser = PDB.PDBParser()
    structure = parser.get_structure('protein', pdb_file)

    features = {}
    molecular_weight = 0.0
    disulfide_bonds = 0

    for model in structure:
        for chain in model:
            for residue in chain:
                # Calculate molecular weight based on the atoms
                for atom in residue:
                    molecular_weight += atom.mass

                # Check for disulfide bonds (CYS residues)
                if residue.get_resname() == 'CYS':
                    disulfide_bonds += 1

    features['Molecular Weight'] = molecular_weight
    features['Disulfide Bonds'] = disulfide_bonds // 2  # Two cysteines form one bond
    return features


In [None]:
import pandas as pd
import numpy as np

# Simulate feature data for the classification problem
data = {
    'Molecular Weight': [100000, 65000],  # Example values
    'Disulfide Bonds': [2, 1],  # Example values
    'Label': [1, 0]  # 1 for enzyme, 0 for non-enzyme
}

df = pd.DataFrame(data)
print(df)


   Molecular Weight  Disulfide Bonds  Label
0            100000                2      1
1             65000                1      0


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Split the dataset into features and labels
X = df[['Molecular Weight', 'Disulfide Bonds']]
y = df['Label']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 0.00


In [None]:
def predict_enzyme(pdb_file):
    # Extract features from the new PDB file
    new_features = extract_features(pdb_file)

    # Prepare features for prediction
    feature_vector = np.array([[new_features['Molecular Weight'], new_features['Disulfide Bonds']]])

    # Make prediction
    prediction = clf.predict(feature_vector)

    # Interpret the result
    return "Enzyme" if prediction[0] == 1 else "Non-Enzyme"

# Example prediction on the downloaded PDB file
result = predict_enzyme('1A2B.pdb')
print(f"The protein is predicted to be: {result}")


The protein is predicted to be: Enzyme


