<a href="https://colab.research.google.com/github/Mohaammed-Fouad/Ligand-Based-Virtual-Screening/blob/main/score_calculator_using_docking_and_similarity_scores.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. INSTALLATION
!pip install rdkit pandas matplotlib seaborn openpyxl

import pandas as pd
import io
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files
from rdkit import Chem

# ==========================================================
# SMART-MATCHING CONSENSUS SCORER
# ==========================================================

def canonicalize(smi):
    try:
        mol = Chem.MolFromSmiles(str(smi))
        if mol:
            return Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)
    except:
        return None
    return None

def find_column(df, keywords):
    """Finds a column name based on keywords."""
    for col in df.columns:
        if any(key.lower() in str(col).lower() for key in keywords):
            return col
    return None

def load_excel_safely(prompt):
    print(f"\n--- {prompt} ---")
    uploaded = files.upload()
    if not uploaded: return None, None
    fname = list(uploaded.keys())[0]
    return pd.read_excel(io.BytesIO(uploaded[fname])), fname

# --- STEP 1: UPLOAD FILES ---
sim_df, sim_name = load_excel_safely("UPLOAD SIMILARITY RESULTS (.xlsx)")
dock_df, dock_name = load_excel_safely("UPLOAD LOCAL DOCKING RESULTS (.xlsx)")

if sim_df is not None and dock_df is not None:
    # --- STEP 2: DYNAMIC COLUMN DETECTION ---
    sim_smi_col = find_column(sim_df, ['smile'])
    dock_smi_col = find_column(dock_df, ['smile'])
    sim_score_col = find_column(sim_df, ['similarity', 'sim'])
    dock_score_col = find_column(dock_df, ['score', 'energy', 'affinity', 'vina'])

    if not all([sim_smi_col, dock_smi_col, sim_score_col, dock_score_col]):
        print(f"‚ùå ERROR: Could not find necessary columns.")
        print(f"Columns in Similarity File: {list(sim_df.columns)}")
        print(f"Columns in Docking File: {list(dock_df.columns)}")
    else:
        print(f"‚úÖ Found SMILES: '{sim_smi_col}' & '{dock_smi_col}'")
        print(f"‚úÖ Found Scores: '{sim_score_col}' & '{dock_score_col}'")

        # --- STEP 3: STANDARDIZE & MERGE ---
        print("\nStandardizing SMILES for accurate merging...")
        sim_df['key'] = sim_df[sim_smi_col].apply(canonicalize)
        dock_df['key'] = dock_df[dock_smi_col].apply(canonicalize)

        merged_df = pd.merge(sim_df, dock_df[['key', dock_score_col]], on='key', how='inner')

        # --- STEP 4: RANKING ---
        merged_df['Rank_Sim'] = merged_df[sim_score_col].rank(ascending=False)
        merged_df['Rank_Dock'] = merged_df[dock_score_col].rank(ascending=True)
        merged_df['Consensus_Score'] = merged_df['Rank_Sim'] + merged_df['Rank_Dock']

        merged_df = merged_df.sort_values(by='Consensus_Score')
        merged_df['Selection'] = 'Remaining Candidates'
        merged_df.iloc[:100, merged_df.columns.get_loc('Selection')] = 'Top 100 Consensus Hits'

        # --- STEP 5: VISUALIZATION ---
        plt.figure(figsize=(10, 7))
        sns.set_style("whitegrid")

        plot = sns.scatterplot(
            data=merged_df,
            x=sim_score_col,
            y=dock_score_col,
            hue='Selection',
            palette={'Remaining Candidates': 'grey', 'Top 100 Consensus Hits': 'red'},
            alpha=0.7, s=80
        )

        plt.title('Consensus Selection Strategy', fontsize=16, fontweight='bold')
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.savefig("Consensus_Plot.png", dpi=300)
        plt.show()

        # --- STEP 6: EXPORT ---
        final_100 = merged_df.head(100).drop(columns=['key', 'Selection'])
        final_100.to_excel("FINAL_SUBMISSION_100_CONSENSUS.xlsx", index=False)

        files.download("FINAL_SUBMISSION_100_CONSENSUS.xlsx")
        files.download("Consensus_Plot.png")
        print(f"\nüèÜ SUCCESS: Final 100 list generated.")