In [1]:
!pip install Flask
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.1.6


In [2]:
!pip install rdkit
!pip install torch_geometric
!pip install ogb
!pip install datasets

Collecting rdkit
  Downloading rdkit-2023.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.9/34.9 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit
Successfully installed rdkit-2023.9.6
Collecting torch_geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.5.3
Collecting ogb
  Downloading ogb-1.3.6-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting outdated>=0.2.0 (from ogb)
  Downloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Collecting littleutils (from outdated>=0.2.0->ogb)
  Downloading littleutils-0.2.2.tar.gz (6.6 kB)
  Preparin

[Authtoken ngronk](https://dashboard.ngrok.com/get-started/your-authtoken)

In [8]:
from pyngrok import ngrok
# Replace 'your-auth-token' with your actual ngrok auth token
ngrok.set_auth_token("your-auth-token")

In [6]:
# Mount Google Drive if not already mounted
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
from flask import Flask, request, render_template, jsonify
from werkzeug.utils import secure_filename
from pyngrok import ngrok
import os
import pandas as pd
import sys
import torch
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import networkx as nx
from rdkit import Chem
import torch.nn.functional as F
import io
import base64

# Use Agg backend for Matplotlib
matplotlib.use('Agg')

# Add the path to the directory containing the Python script
sys.path.append('./SARflaskAPP/scriptSAR')

# Import the functions from the script
from smiles2mol_script import process_SMILES_df, save_pickle
from SAR_functions_scripts import load_pickle_file, extract_best_model, extract_metrics, plot_graph_feature_importance
from SAR_functions_scripts import k_fold_balanced, k_fold_no_balanced, no_balanced_scatterfold, balanced_scatterfold

# Initialize Flask app
app = Flask(__name__, template_folder='./SARflaskAPP/templatesSARFlaskApp')
app.config['UPLOAD_FOLDER'] = './uploads'
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

# Global variables to track progress
progress_status = {
    "status": "idle",
    "progress": 0,
    "message": ""
}

# Define the processing function using the imported function
def process_smiles_dataset(file_path):
    df = pd.read_csv(file_path)
    processed_df = process_SMILES_df(df)
    return processed_df

@app.route('/')
def home():
    return render_template('upload.html')

@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return "No file part", 400
    file = request.files['file']
    if file.filename == '':
        return "No selected file", 400
    if file:
        filename = secure_filename(file.filename)
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(file_path)
        processed_df = process_smiles_dataset(file_path)

        # Instead of saving to a pickle, directly process the DataFrame
        global data  # Use a global variable to store the processed data
        data = processed_df

        return "File processed successfully. You can now select a method and model variable for further processing."

@app.route('/process', methods=['POST'])
def process_data():
    method = request.form['method']
    model_var = int(request.form['model_var'])
    k_value = int(request.form['k_value'])
    iteration = int(request.form['iteration'])

    # Use the globally stored data
    if data is None:
        return "No data available. Please upload and process a SMILES dataset first.", 400

    # Update progress status
    global progress_status
    progress_status["status"] = "processing"
    progress_status["progress"] = 0
    progress_status["message"] = "Training started"

    # Assuming the functions and their required parameters are correctly set
    num_classes = [119, 5, 12, 12, 9, 6, 6, 2, 2]
    hidden_channels = 70

    if method == "balanced_scatterfold":
        all_best_models = balanced_scatterfold(data, num_classes, hidden_channels, model_var)
    elif method == "no_balanced_scatterfold":
        all_best_models = no_balanced_scatterfold(data, num_classes, hidden_channels, model_var)
    elif method == "k_fold_balanced":
        all_best_models = k_fold_balanced(data, num_classes, hidden_channels, iteration, k_value, model_var)
    elif method == "k_fold_no_balanced":
        all_best_models = k_fold_no_balanced(data, num_classes, hidden_channels, iteration, k_value, model_var)
    else:
        progress_status["status"] = "idle"
        return "Invalid method selected.", 400

    global best_model  # Use a global variable to store the best model
    best_model = extract_best_model(all_best_models)
    max_auc, mean_auc, std_auc = extract_metrics(all_best_models)

    # Update progress status to complete
    progress_status["status"] = "complete"
    progress_status["progress"] = 100
    progress_status["message"] = "Training complete"

    return jsonify({
        "message": "Data processed successfully using {}. You can now plot the graph using the best model.".format(method),
        "max_auc": max_auc,
        "mean_auc": mean_auc,
        "std_auc": std_auc
    })


@app.route('/progress', methods=['GET'])
def get_progress():
    global progress_status
    return jsonify(progress_status)

@app.route('/plot', methods=['POST'])
def plot_graph():
    data_index = int(request.form['data_index'])

    # Use the globally stored data
    if data is None:
        return "No data available. Please upload and process a SMILES dataset first.", 400

    # Use the globally stored best model
    if best_model is None:
        return "No best model available. Please process the data first.", 400

    # Assuming the functions and their required parameters are correctly set
    num_classes = [119, 5, 12, 12, 9, 6, 6, 2, 2]

    # Plot the graph
    fig, ax = plt.subplots()
    plot_graph_feature_importance(data[data_index], num_classes, best_model)

    # Save plot to a BytesIO object
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close(fig)
    buf.seek(0)

    # Encode plot to base64
    graph_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    buf.close()

    return jsonify({'image': graph_base64})

if __name__ == "__main__":
    public_url = ngrok.connect(5000)
    print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:5000\"".format(public_url))
    data = None  # Initialize the global variable to store processed data
    best_model = None  # Initialize the global variable to store the best model
    app.run(host="0.0.0.0", port=5000)


 * ngrok tunnel "NgrokTunnel: "https://b440-35-245-191-241.ngrok-free.app" -> "http://localhost:5000"" -> "http://127.0.0.1:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [18/Jun/2024 08:59:17] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [18/Jun/2024 08:59:17] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [18/Jun/2024 08:59:23] "POST /upload HTTP/1.1" 200 -


iter: 0
9
size 0 data: 112
size 1 data: 112
size total dataset: 224
224
Epoch: 1, Train Acc: 0.4925, Val Acc: 0.5111
Epoch: 2, Train Acc: 0.5075, Val Acc: 0.5111
Epoch: 3, Train Acc: 0.5075, Val Acc: 0.5111
Epoch: 4, Train Acc: 0.5075, Val Acc: 0.5111
Epoch: 5, Train Acc: 0.5075, Val Acc: 0.5111
Epoch: 6, Train Acc: 0.5299, Val Acc: 0.6444
Epoch: 7, Train Acc: 0.5522, Val Acc: 0.5111
Epoch: 8, Train Acc: 0.6418, Val Acc: 0.6222
Epoch: 9, Train Acc: 0.5522, Val Acc: 0.7556
Epoch: 10, Train Acc: 0.7537, Val Acc: 0.6889
Epoch: 11, Train Acc: 0.7015, Val Acc: 0.7333
Epoch: 12, Train Acc: 0.7687, Val Acc: 0.6889
Epoch: 13, Train Acc: 0.7015, Val Acc: 0.6889
Epoch: 14, Train Acc: 0.6567, Val Acc: 0.6889
Epoch: 15, Train Acc: 0.7463, Val Acc: 0.8000
Epoch: 16, Train Acc: 0.7313, Val Acc: 0.7556
Epoch: 17, Train Acc: 0.7612, Val Acc: 0.8444
Epoch: 18, Train Acc: 0.8060, Val Acc: 0.8444
Epoch: 19, Train Acc: 0.7910, Val Acc: 0.8222
Epoch: 20, Train Acc: 0.8060, Val Acc: 0.7778
Epoch: 21, Train 

INFO:werkzeug:127.0.0.1 - - [18/Jun/2024 08:59:31] "POST /process HTTP/1.1" 200 -


Epoch: 46, Train Acc: 0.6716, Val Acc: 0.7556
Epoch: 47, Train Acc: 0.8060, Val Acc: 0.7556
Early stopping at epoch 47
Final Test Accuracy: 0.7778
Accuracy: 0.7777777777777778
Precision: 0.8181818181818182
Recall: 0.75
F1-Score: 0.7826086956521738
AUC: 0.8438735177865613


INFO:werkzeug:127.0.0.1 - - [18/Jun/2024 08:59:33] "GET /progress HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [18/Jun/2024 08:59:39] "POST /plot HTTP/1.1" 200 -
