# üß¨ Automated Molecular Docking Pipeline on Colab (GPU)
**Author:** Dip Kumar Ghosh

This notebook provides a complete pipeline for molecular docking using **AutoDock Vina-GPU 2.1**. It leverages the free GPU resources (Tesla T4) on Colab for accelerated docking.

## Features:
- **Setup**: Automatically installs Conda, OpenBabel, and compiles Vina-GPU 2.1.
- **Preparation**: Converts PDB receptors and SDF ligands to PDBQT.
- **Auto-Box**: Calculates the docking box center and size automatically.
- **Docking**: Runs parallel GPU docking.
- **Analysis**: summarizing scores and generating plots.

## 1. Environment Setup
Installing dependencies (OpenBabel, Boost, CUDA tools) and building Vina-GPU 2.1.

In [None]:
# @title Install Dependencies & Build Vina-GPU
import os
import sys
import shutil

# 1. Install System Tools
!apt-get update -qq
!apt-get install -y -qq openbabel libboost-all-dev swig3.0

# 2. Clone Vina-GPU 2.1
if not os.path.exists("Vina-GPU-2.1"):
    !git clone https://github.com/DeltaGroupNJUPT/Vina-GPU-2.1.git

# 3. Compile Vina-GPU 2.1 (AutoDock-Vina-GPU-2.1)
%cd /content/Vina-GPU-2.1/AutoDock-Vina-GPU-2.1
# Fix Makefile for Colab paths if needed (usually defaults work, but we ensure Boost is found)
# We use sed to ensure lines are correct for Ubuntu
!sed -i 's|OPENCL_VERSION = -DOPENCL_3_0|OPENCL_VERSION = -DOPENCL_3_0|g' Makefile
# Build
!make clean
!make source

VINA_GPU_BIN = "/content/Vina-GPU-2.1/AutoDock-Vina-GPU-2.1/AutoDock-Vina-GPU-2-1"
if os.path.exists(VINA_GPU_BIN):
    print("‚úÖ Vina-GPU 2.1 built successfully!")
else:
    print("‚ùå Build failed. Please check logs.")

%cd /content

## 2. Input Data
Upload your **Receptor (PDB)** and **Ligands (SDF/MOL2)**.

In [None]:
import os
from google.colab import files

# Create directories
os.makedirs("input/receptor", exist_ok=True)
os.makedirs("input/ligands", exist_ok=True)
os.makedirs("output", exist_ok=True)

print("‚¨áÔ∏è Please upload your Receptor file (.pdb):")
uploaded_rec = files.upload()
for fn in uploaded_rec.keys():
    shutil.move(fn, f"input/receptor/{fn}")
    RECEPTOR_FILE = f"input/receptor/{fn}"
    print(f"Receptor saved: {RECEPTOR_FILE}")

print("\n‚¨áÔ∏è Please upload your Ligand files (.sdf or .mol2):")
uploaded_ligs = files.upload()
for fn in uploaded_ligs.keys():
    shutil.move(fn, f"input/ligands/{fn}")
    print(f"Ligand saved: input/ligands/{fn}")

## 3. Preparation & Auto-Box
Prepare the receptor and calculate the docking box.

In [None]:
# @title Prepare Receptor & Calculate Box
import subprocess

if 'RECEPTOR_FILE' not in locals():
    print("‚ùå No receptor file uploaded!")
else:
    # 1. Prepare Receptor (PDB -> PDBQT)
    rec_pdbqt = RECEPTOR_FILE.replace(".pdb", ".pdbqt")
    !obabel -ipdb "$RECEPTOR_FILE" -opdbqt -O "$rec_pdbqt" -xr -h
    print(f"‚úÖ Receptor prepared: {rec_pdbqt}")

    # 2. Auto-Calculate Box
    # Using a simple python script to parse PDB and find center/size
    def calculate_box(pdb_file, padding=10):
        min_coords = [9999, 9999, 9999]
        max_coords = [-9999, -9999, -9999]
        
        with open(pdb_file, 'r') as f:
            for line in f:
                if line.startswith("ATOM") or line.startswith("HETATM"):
                    x = float(line[30:38])
                    y = float(line[38:46])
                    z = float(line[46:54])
                    
                    min_coords[0] = min(min_coords[0], x)
                    min_coords[1] = min(min_coords[1], y)
                    min_coords[2] = min(min_coords[2], z)
                    max_coords[0] = max(max_coords[0], x)
                    max_coords[1] = max(max_coords[1], y)
                    max_coords[2] = max(max_coords[2], z)
        
        center = [(min_coords[i] + max_coords[i])/2 for i in range(3)]
        size = [(max_coords[i] - min_coords[i]) + padding for i in range(3)]
        return center, size

    center, size = calculate_box(RECEPTOR_FILE)
    
    # Vina-GPU works best with slightly larger boxes, but Colab T4 has 16GB VRAM
    # We can be generous. 
    print(f"‚úÖ Box Calculated:")
    print(f"   Center: {center}")
    print(f"   Size:   {size}")
    
    # Config for Vina-GPU
    with open("docking_config.txt", "w") as f:
        f.write(f"receptor = {rec_pdbqt}\n")
        f.write(f"center_x = {center[0]:.2f}\n")
        f.write(f"center_y = {center[1]:.2f}\n")
        f.write(f"center_z = {center[2]:.2f}\n")
        f.write(f"size_x = {size[0]:.2f}\n")
        f.write(f"size_y = {size[1]:.2f}\n")
        f.write(f"size_z = {size[2]:.2f}\n")
        f.write("thread = 8000\n") # Setup for T4
        f.write("num_modes = 9\n")
    
    print("‚úÖ Configuration saved to docking_config.txt")

## 4. Run Docking
Iterates through all ligands, prepares them, and docks them using Vina-GPU.

In [None]:
# @title Start Docking
import glob
import time

ligand_files = glob.glob("input/ligands/*.sdf") + glob.glob("input/ligands/*.mol2")
results = []

print(f"üöÄ Starting docking for {len(ligand_files)} ligands...")
start_time = time.time()

for lig_file in ligand_files:
    lig_name = os.path.basename(lig_file).split('.')[0]
    lig_pdbqt = f"output/{lig_name}.pdbqt"
    out_pdbqt = f"output/{lig_name}_out.pdbqt"
    log_file = f"output/{lig_name}_log.txt"
    
    # 1. Prepare Ligand
    !obabel -i "$lig_file" -opdbqt -O "$lig_pdbqt" -h 2>/dev/null
    
    # 2. Run Vina-GPU
    # AutoDock-Vina-GPU-2.1 syntax:
    # ./AutoDock-Vina-GPU-2-1 --config ... --ligand ... --out ...
    cmd = f"{VINA_GPU_BIN} --config docking_config.txt --ligand {lig_pdbqt} --out {out_pdbqt}"
    
    # Run and capture output
    try:
        result = subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        with open(log_file, "w") as f:
            f.write(result.stdout)
            
        # Extract best score
        best_score = 0.0
        with open(out_pdbqt, "r") as f:
            for line in f:
                if line.startswith("REMARK VINA RESULT"):
                    # Format: REMARK VINA RESULT:    -8.2      0.000      0.000
                    parts = line.split()
                    best_score = float(parts[3])
                    break # First one is best
        
        results.append({"Ligand": lig_name, "Affinity": best_score})
        print(f"  ‚úÖ {lig_name}: {best_score} kcal/mol")
        
    except subprocess.CalledProcessError as e:
        print(f"  ‚ùå {lig_name} Failed: {e.stderr}")

end_time = time.time()
print(f"\nüèÅ Docking Completed in {end_time - start_time:.2f} seconds.")

# Save to CSV
import pandas as pd
df = pd.DataFrame(results)
df.to_csv("output/summary_results.csv", index=False)
print("üìÑ Summary saved to output/summary_results.csv")

## 5. Visualization
Generate plots for the results.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Load results
df = pd.read_csv("output/summary_results.csv")

if not df.empty:
    # 1. Histogram
    plt.figure(figsize=(10, 6))
    sns.histplot(df['Affinity'], bins=10, kde=True, color='skyblue')
    plt.axvline(df['Affinity'].mean(), color='red', linestyle='--', label=f"Mean: {df['Affinity'].mean():.2f}")
    plt.title("Distribution of Docking Scores")
    plt.xlabel("Affinity (kcal/mol)")
    plt.legend()
    plt.savefig("output/histogram.png")
    plt.show()
    
    # 2. Bar Chart (Sorted)
    plt.figure(figsize=(12, 8))
    df_sorted = df.sort_values("Affinity")
    sns.barplot(x="Ligand", y="Affinity", data=df_sorted, palette="viridis")
    plt.xticks(rotation=45, ha='right')
    plt.title("Docking Scores by Ligand")
    plt.tight_layout()
    plt.savefig("output/barchart.png")
    plt.show()
else:
    print("No results to plot.")

In [None]:
# @title Download Results
!zip -r docking_results.zip output/
files.download("docking_results.zip")