In [None]:
import os
import zipfile
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import subprocess
import psutil  
import time

# IMPORTANT: Replace the following paths with your own directory paths
# Directory to save docking results
results_dir = r'YOUR_PATH_HERE\Docking results'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    print(f"Created results directory: {results_dir}")
print(f"Results will be saved to: {results_dir}")

# Set the path for Vina executable. Replace with your Vina executable path
vina_path = r'YOUR_PATH_HERE' 
vina_file = os.path.join(vina_path, 'vina.exe')  

# Set the directories for receptor and ligand files. Replace with your own paths
receptor_dir = r'YOUR_PATH_HERE\receptors'  
ligand_dir = r'YOUR_PATH_HERE\ligands'  
conf_dir = r'YOUR_PATH_HERE\conf'  

# Get the list of receptor and ligand files
receptor_files = glob.glob(os.path.join(receptor_dir, '*.pdbqt'))
ligand_files = glob.glob(os.path.join(ligand_dir, '*.pdbqt'))
param_files = glob.glob(os.path.join(conf_dir, '*.txt'))

# Function to read docking parameters from a file
def read_docking_params(param_file):
    params = {}
    with open(param_file, 'r') as f:
        for line in f:
            if '=' in line:
                key, value = line.split('=')
                params[key.strip()] = float(value.strip())
    return params

# Prompt user for the number of interconnection modes
num_modes = int(input("Please enter the number of interconnection modes (default 10): ") or 10)

# Function to run docking
def run_docking(receptor, ligand, params):
    output_file = os.path.join(results_dir, 
        f"{os.path.splitext(os.path.basename(receptor))[0]}_{os.path.splitext(os.path.basename(ligand))[0]}_docked.pdbqt")
    log_file = f"{output_file}.log"

    cmd = f'"{vina_file}" --receptor "{receptor}" --ligand "{ligand}" --center_x {params["center_x"]} --center_y {params["center_y"]} --center_z {params["center_z"]} --size_x {params["size_x"]} --size_y {params["size_y"]} --size_z {params["size_z"]} --num_modes {num_modes} --out "{output_file}"'
    
    print(f"Executing command: {cmd}")
    
    # Use subprocess.Popen to capture output and write to log file
    with open(log_file, 'w') as f:
        process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
        # Read output in real-time and write to both log file and console
        for line in process.stdout:
            f.write(line)
            print(line.strip())  # Display progress in console
    
    exit_code = process.wait()

    if exit_code != 0:
        print(f"Docking failure: {cmd}")
        if os.path.exists(log_file):
            with open(log_file, 'r') as log:
                log_content = log.read()
                print("Log file content:")
                print(log_content)
        return

    print(f"Docking completed: {output_file}")
    print(f"Log file: {log_file}")

# Docking cycle
total_pairs = len(receptor_files) * len(ligand_files)
current_pair = 0

print("Starting the docking process...")
print(f"A total of {total_pairs} receptor-ligand pairs will be docked.")

for receptor_file in receptor_files:
    for ligand_file in ligand_files:
        current_pair += 1
        param_file = [pf for pf in param_files if os.path.splitext(os.path.basename(pf))[0] in receptor_file]
        if param_file:
            docking_params = read_docking_params(param_file[0])
            print(f"\nParameters used: {docking_params}")
            
            # Get CPU resource information
            cpu_usage = psutil.cpu_percent(interval=None)
            print(f"Current CPU usage: {cpu_usage}%")

            start_time = time.time()  # Record start time
            
            run_docking(receptor_file, ligand_file, docking_params)

            elapsed_time = time.time() - start_time  # Calculate elapsed time
            estimated_time = (total_pairs - current_pair) * (elapsed_time / current_pair)  # Estimate remaining time
            
            print(f"Elapsed time: {elapsed_time:.2f} seconds, Estimated remaining time: {estimated_time:.2f} seconds")
            print(f"Current docking progress: {current_pair/total_pairs:.2%}")

# Function to parse results and clustering
def parse_results(output_files):
    docking_results = []
    docking_details = []

    for output_file in output_files:
        full_path = os.path.join(results_dir, os.path.basename(output_file))
        if os.path.exists(full_path):
            with open(full_path, 'r') as f:
                found_score = False
                for line in f:
                    if line.startswith('REMARK VINA RESULT'):
                        score = float(line.split()[3])
                        docking_results.append(score)
                        found_score = True
                    elif line.startswith('REMARK VINA'):
                        details = line.split()
                        if len(details) > 5:
                            position = details[5:8]
                            interaction = details[8]
                            docking_details.append((position, interaction))
                if not found_score:
                    print(f"Warning: No docking score found in {full_path}")

    return docking_results, docking_details

# Generate output file names
docking_output_files = [
    f"{os.path.splitext(os.path.basename(receptor_file))[0]}_{os.path.splitext(os.path.basename(ligand_file))[0]}_docked.pdbqt"
    for receptor_file in receptor_files for ligand_file in ligand_files
]

docking_scores, docking_details = parse_results(docking_output_files)

if not docking_scores:
    print("No docking scores were obtained. Please check the output files.")
else:
    # Statistical analysis
    mean_score = np.mean(docking_scores)
    median_score = np.median(docking_scores)
    std_score = np.std(docking_scores)

    print(f"Statistical indicators - mean value: {mean_score:.2f}, median: {median_score:.2f}, std deviation: {std_score:.2f}")

    # Visualization analysis
    plt.figure(figsize=(10, 6))
    sns.histplot(docking_scores, bins=30, kde=True)
    plt.axvline(mean_score, color='r', linestyle='--', label='Mean')
    plt.axvline(median_score, color='g', linestyle='--', label='Median')
    plt.title("Docking Affinity Distribution")
    plt.xlabel("Binding Affinity (kcal/mol)")
    plt.ylabel("Frequency")
    plt.legend()
    plt.grid()
    distribution_plot = os.path.join(results_dir, 'docking_affinity_distribution.png')
    plt.savefig(distribution_plot, dpi=300)
    plt.show()

    plt.figure(figsize=(10, 6))
    sns.boxplot(x=docking_scores)
    plt.title("Docking Scores Boxplot")
    plt.xlabel("Docking Scores")
    plt.grid()
    boxplot = os.path.join(results_dir, 'boxplot_docking_scores.png')
    plt.savefig(boxplot, dpi=300)
    plt.show()

    # Clustering analysis
    if len(docking_scores) >= 2:
        scores_array = np.array(docking_scores).reshape(-1, 1)
        kmeans = KMeans(n_clusters=2, random_state=0).fit(scores_array)

        plt.figure(figsize=(10, 6))
        plt.scatter(scores_array, np.zeros_like(scores_array), c=kmeans.labels_, cmap='viridis', s=100)
        plt.title("Cluster Analysis of Docking Scores")
        plt.xlabel("Docking Scores")
        plt.yticks([])
        plt.grid()
        cluster_plot = os.path.join(results_dir, 'cluster_analysis.png')
        plt.savefig(cluster_plot, dpi=300)
        plt.show()

# Save results
docking_details_csv = os.path.join(results_dir, 'docking_details.csv')
docking_details_df = pd.DataFrame(docking_details, columns=["Binding Position", "Interaction Type"])
docking_details_df.to_csv(docking_details_csv, index=False)

scores_csv = os.path.join(results_dir, 'docking_scores.csv')
results_df = pd.DataFrame(docking_scores, columns=["Docking Score"])
results_df.to_csv(scores_csv, index=False)

# Package results
zip_path = os.path.join(results_dir, 'docking_results.zip')
with zipfile.ZipFile(zip_path, 'w') as zipf:
    for root, dirs, files in os.walk(results_dir):
        for file in files:
            if file != 'docking_results.zip':  # Avoid adding the zip file itself to the zip
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, results_dir)
                zipf.write(file_path, arcname)

print(f"Docking complete! All results have been saved to: {results_dir}")