# Topological Index Calculator
This notebook calculates different topological indices (Edge Density, Wiener Index, Petitjean Index) from molecular structures.

In [1]:
!pip install rdkit-pypi
import os
import sys
import requests
from urllib.parse import urlparse
from rdkit import Chem
from rdkit.Chem import AllChem
import networkx as nx

### Function 1: get_index_choice
It ask the user: **"Hello *USER*! Kindly give me a VALID index to calculate."** 

In [2]:
def get_index_choice():
    while True:
        print('Which topological index would you like to calculate?')
        print('1 - Edge Density')
        print('2 - Wiener Index')
        print('3 - Petitjean Index')
        print('4 - All of the above')

        choice = input('Enter your choice (1-4): ').strip()

        options = {
            '1': 'edge',
            '2': 'wiener',
            '3': 'petitjean',
            '4': 'all'
        }

        selected = options.get(choice)
        if selected:
            return selected
        print('Invalid choice. Please select 1, 2, 3, or 4.')


### Function 2: get_input_path
It ask the user: **"*USER!* Give me a path to .mol file or .sdf file or a folder to take the data from and calculate your desired index."**

In [3]:
def get_input_path():
    input_path = input('Enter the path to a .mol file, a folder of .mol files, or a .sdf file, or a URL: ').strip()

    if input_path.startswith('http'):
        return download_from_url(input_path)
    elif os.path.exists(input_path):
        return input_path
    else:
        print('The specified path does not exist.')
        sys.exit(1)


### Function 3: download_from_url
Suppose the ***USER*** enter a URL, hmmm! Easy, This function comes in this scenario and fetch or open the URL in order to read its content.

In [4]:
def download_from_url(url):
    """
    Downloads a file from the given URL and returns the file path.
    If the URL is invalid or file cannot be downloaded, raises an error.
    """
    try:
        filename = os.path.basename(urlparse(url).path)
        response = requests.get(url)
        if response.status_code == 200:
            with open(filename, 'wb') as file:
                file.write(response.content)
            print(f'File downloaded: {filename}')
            return filename
        else:
            print(f'Failed to download file. Status code: {response.status_code}')
            sys.exit(1)
    except Exception as e:
        print(f'Error downloading the file: {e}')
        sys.exit(1)


### Function 4: detect_input_type
The **USER** has the freedom to enter a file with a specific type. This function does the following:
  - Identifies the type of the input (whether it's a file, folder, or URL),
  - Takes the appropriate processing steps.

In [5]:
def detect_input_type(input_path):
    if os.path.isdir(input_path):
        return 'mol_folder'
    elif input_path.endswith('MOL'):
        return 'mol_file'
    elif input_path.endswith('SDF'):
        return 'sdf_file'
    else:
        print('Unsupported file type. Please provide a .mol file, folder, or .sdf file.')
        sys.exit(1)


### Example Usage
The following code demonstrates how to use the functions to select an index, input a file or folder, and determine the file type. It also shows how to combine all functions to compute the selected topological index.

In [6]:
def read_molecules(input_path, input_type):
    molecules = []
    if input_type == 'mol_file':
        mol = Chem.MolFromMolFile(input_path)
        if mol:
            name = mol.GetProp('_Name') if mol.HasProp('_Name') else os.path.basename(input_path)
            molecules.append((name, mol))
    elif input_type == 'mol_folder':
        for filename in os.listdir(input_path):
            if filename.lower().endswith('.mol'):
                full_path = os.path.join(input_path, filename)
                mol = Chem.MolFromMolFile(full_path)
                if mol:
                    name = mol.GetProp('_Name') if mol.HasProp('_Name') else filename
                    molecules.append((name, mol))
    elif input_type == 'sdf_file':
        suppl = Chem.SDMolSupplier(input_path)
        for i, mol in enumerate(suppl):
            if mol:
                name = mol.GetProp('_Name') if mol.HasProp('_Name') else f'molecule_{i+1}'
                molecules.append((name, mol))
    return molecules

In [7]:
def mol_to_graph(mol):
    G = nx.Graph()
    for atom in mol.GetAtoms():
        G.add_node(atom.GetIdx())
    for bond in mol.GetBonds():
        G.add_edge(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
    return G

In [8]:
def edge_density(G):
    n = G.number_of_nodes()
    m = G.number_of_edges()
    if n <= 1:
        return 0
    return (2 * m) / (n * (n - 1))

In [9]:
def wiener_index(G):
    lengths = dict(nx.all_pairs_shortest_path_length(G))
    total = sum(dist for src in lengths for dist in lengths[src].values())
    return total // 2

In [10]:
def petitjean_index(G):
    if not nx.is_connected(G):
        return None
    eccentricities = nx.eccentricity(G)
    diameter = max(eccentricities.values())
    radius = min(eccentricities.values())
    if diameter == 0:
        return 0
    return (diameter - radius) / diameter

In [11]:
def calculate_indices(molecules, index_choice):
    for name, mol in molecules:
        G = mol_to_graph(mol)
        print(f'\n{name}:')
        if index_choice in ('edge', 'all'):
            print(f'  Edge Density: {edge_density(G):.4f}')
        if index_choice in ('wiener', 'all'):
            print(f'  Wiener Index: {wiener_index(G)}')
        if index_choice in ('petitjean', 'all'):
            pj = petitjean_index(G)
            if pj is not None:
                print(f'  Petitjean Index: {pj:.4f}')
            else:
                print('  Petitjean Index: Cannot be computed (molecule may be disconnected)')

In [12]:
def main():
    index_choice = get_index_choice()
    input_path = get_input_path()
    input_type = detect_input_type(input_path)
    molecules = read_molecules(input_path, input_type)
    calculate_indices(molecules, index_choice)

# Run the main program
main()