In [None]:
!pip install Flask
!pip install pyngrok
!pip install rdkit
!pip install torch_geometric
!pip install ogb
!pip install datasets

[Authtoken ngronk](https://dashboard.ngrok.com/get-started/your-authtoken)
If u are running this code on Colab u need a ngrok authtoken

In [None]:
from pyngrok import ngrok
# Replace 'your-auth-token' with your actual ngrok auth token
ngrok.set_auth_token("2hzxS76lhN6xT0gz4pZbFvRxy8V_728ggUQRoa9P7Sa9cacha")

MAIN

In [None]:
from flask import Flask, request, render_template, jsonify
from werkzeug.utils import secure_filename
import os
import pandas as pd
import sys
import torch
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import networkx as nx
from rdkit import Chem
from rdkit.Chem import Draw
import torch.nn.functional as F
import io
import base64

# Use Agg backend for Matplotlib
matplotlib.use('Agg')

# Check if running in Google Colab
try:
    from google.colab import drive
    from pyngrok import ngrok
    COLAB = True
except ImportError:
    COLAB = False

# Add the path to the directory containing the Python script
if COLAB:
    sys.path.append('/content/Platform/scriptSAR')
else:
    sys.path.append('/path/to/your/local/Platform/scriptSAR')

# Import the functions from the script
from smiles2mol_script import process_SMILES_df, save_pickle
from SAR_functions_scripts import load_pickle_file, extract_best_model, extract_metrics, plot_graph_feature_importance, visualize_molecular_graph
from SAR_functions_scripts import k_fold_balanced, k_fold_no_balanced, no_balanced_scatterfold, balanced_scatterfold

# Initialize Flask app
if COLAB:
    app = Flask(__name__, template_folder='/content/Platform/templatesSARFlaskApp')
    app.config['UPLOAD_FOLDER'] = '/content/uploads'
else:
    app = Flask(__name__, template_folder='./templatesSARFlaskApp')  # Adjust the template folder path if necessary
    app.config['UPLOAD_FOLDER'] = './uploads'  # Adjust the upload folder path if necessary

os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

# Global variables to track progress and data
progress_status = {
    "status": "idle",
    "progress": 0,
    "message": ""
}
data = None  # To store processed data
smiles_list = None  # To store SMILES strings
target_list = None  # To store target values

# Define the processing function using the imported function
def process_smiles_dataset(file_path):
    df = pd.read_csv(file_path)
    processed_df = process_SMILES_df(df)
    return processed_df, df['smiles'].tolist(), df['target'].tolist()

@app.route('/')
def home():
    return render_template('upload.html')

@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return "No file part", 400
    file = request.files['file']
    if file.filename == '':
        return "No selected file", 400
    if file:
        filename = secure_filename(file.filename)
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(file_path)
        processed_df, smiles, targets = process_smiles_dataset(file_path)

        # Instead of saving to a pickle, directly process the DataFrame
        global data  # Use a global variable to store the processed data
        global smiles_list  # Use a global variable to store the SMILES strings
        global target_list  # Use a global variable to store the target values
        data = processed_df
        smiles_list = smiles
        target_list = targets

        return "File processed successfully. You can now select a method and model variable for further processing."

@app.route('/process', methods=['POST'])
def process_data():
    method = request.form['method']
    model_var = int(request.form['model_var'])
    k_value = int(request.form['k_value'])
    iteration = int(request.form['iteration'])

    # Use the globally stored data
    if data is None:
        return "No data available. Please upload and process a SMILES dataset first.", 400

    # Update progress status
    global progress_status
    progress_status["status"] = "processing"
    progress_status["progress"] = 0
    progress_status["message"] = "Training started"

    # Assuming the functions and their required parameters are correctly set
    num_features = [119, 5, 12, 12, 9, 6, 6, 2, 2]
    hidden_channels = 70

    if method == "balanced_scatterfold":
        all_best_models = balanced_scatterfold(data, num_features, hidden_channels, model_var)
    elif method == "no_balanced_scatterfold":
        all_best_models = no_balanced_scatterfold(data, num_features, hidden_channels, model_var)
    elif method == "k_fold_balanced":
        all_best_models = k_fold_balanced(data, num_features, hidden_channels, iteration, k_value, model_var)
    elif method == "k_fold_no_balanced":
        all_best_models = k_fold_no_balanced(data, num_features, hidden_channels, iteration, k_value, model_var)
    else:
        progress_status["status"] = "idle"
        return "Invalid method selected.", 400

    global best_model  # Use a global variable to store the best model
    best_model = extract_best_model(all_best_models)
    max_auc, mean_auc, std_auc = extract_metrics(all_best_models)

    # Update progress status to complete
    progress_status["status"] = "complete"
    progress_status["progress"] = 100
    progress_status["message"] = "Training complete"

    return jsonify({
        "message": "Data processed successfully using {}. You can now plot the graph using the best model.".format(method),
        "max_auc": max_auc,
        "mean_auc": mean_auc,
        "std_auc": std_auc
    })

@app.route('/progress', methods=['GET'])
def get_progress():
    global progress_status
    return jsonify(progress_status)

@app.route('/visualize', methods=['POST'])
def visualize_molecular_graph_route():
    smiles = request.form['smiles']

    if smiles not in smiles_list:
        return "SMILES string not found in the dataset", 400

    data_index = smiles_list.index(smiles)
    num_features = [119, 5, 12, 12, 9, 6, 6, 2, 2]

    # Use the globally stored data
    if data is None:
        return "No data available. Please upload and process a SMILES dataset first.", 400

    # Use the globally stored best model
    if best_model is None:
        return "No best model available. Please process the data first.", 400

    # Compute feature importance and elements
    elements, node_importance_dict = plot_graph_feature_importance(data[data_index], num_features, best_model)

    # Get the target value
    target_value = target_list[data_index]

    # Visualize the molecular graph with scaled node importance
    img = visualize_molecular_graph(smiles, elements, node_importance_dict)

    # Save image to a BytesIO object
    buf = io.BytesIO()
    img.save(buf, format='PNG')
    buf.seek(0)

    # Encode plot to base64
    mol_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    buf.close()

    return jsonify({'image': mol_base64, 'target': target_value})

if __name__ == "__main__":
    data = None  # Initialize the global variable to store processed data
    smiles_list = None  # Initialize the global variable to store SMILES strings
    target_list = None  # Initialize the global variable to store target values
    best_model = None  # Initialize the global variable to store the best model
    if COLAB:
        public_url = ngrok.connect(5000)
        print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:5000\"".format(public_url))
        app.run(host="0.0.0.0", port=5000)
    else:
        app.run(host="127.0.0.1", port=5000)
