In [1]:
from flask import Flask, request, render_template
from threading import Thread
import joblib 
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

app = Flask(__name__)

@app.route('/')
def index():
    return render_template('testnow.html') # html file to be rendered

@app.route('/process', methods=['POST'])

def process():
       
     # store data from the form to a variable

    ## PERSONAL INFORMATION
    Age = request.form['age']
    Gender  = request.form['gender']
    MaritalStatus = request.form['marital-status']
    Ethnicity = request.form['ethnicity']

    ## MACROSCOPIC
    Clarity  = request.form['clarity']
    M_Color = request.form['color'] # Color is a constant
    Transparency  = request.form['transparency']

    ## CHEMICAL
    SpecificGravity = request.form['specific-gravity']
    PHLevel = request.form['ph-level']
    Leukocytes = request.form['leukocytes']
    Nitrite = request.form['nitrite']
    Protein = request.form['protein']
    Glucose = request.form['glucose']
    Ketones = request.form['ketones']
    Urobilinogen = request.form['urobilinogen']
    Bilirubin = request.form['bilirubin']
    Blood = request.form['blood']

    ## MICROSCOPIC
    Bacteria = request.form['bacteria']
    EpithilialCells = request.form['epithelial-cells']
    MucusThreads = request.form['mucus-threads']
    AmorphousUrate = request.form['amorphous-urate']
    RedBC = request.form['rbc']
    WhiteBC = request.form['wbc']

    ## USE DATA ABOVE TO ACCESS THEM FOR THE MODEL

    # Run Model Here
    trained_xgb = joblib.load("trained_xgb_model.pkl")
    trained_lgbm = joblib.load("trained_lgbm_model.pkl")
    trained_svm = joblib.load("trained_svm_model.pkl")


    ensemble_weights = joblib.load("ensemble_weights.pkl")
    weight_xgb = ensemble_weights['weight_xgb']
    weight_lgbm = ensemble_weights['weight_lgbm']
    weight_svc = ensemble_weights['weight_svc']

    scaler = StandardScaler()

    sample_data = {
        'Age': [int(Age)],
        'Sex': [Gender.upper()],
        'Ethnicity': [Ethnicity.upper()],
        'Marital Status': [MaritalStatus.upper()],
        'Clarity': [Clarity.upper()],
        'Color': [M_Color.upper()],
        'Transparency': [Transparency.upper()],
        'Spec Gravity': [float(SpecificGravity)],
        'pH Level': [float(PHLevel)],
        'Leukocytes': [Leukocytes.upper()],
        'Nitrite': [Nitrite.upper()],
        'Protein': [Protein.upper()],
        'Glucose': [Glucose.upper()],
        'Ketones': [Ketones.upper()],
        'Urobilinogen': [Urobilinogen.upper()],
        'Bilirubin': [Bilirubin.upper()],
        'Blood': [Blood.upper()],
        'Bacteria': [Bacteria.upper()],
        'Epithelial Cells': [EpithilialCells.upper()],
        'RBC': [RedBC.upper()],
        'WBC': [WhiteBC.upper()],
        'Mucus Threads': [MucusThreads.upper()],
        'Amorphous Urate/Phosphate': [AmorphousUrate.upper()],
    }

    sample_df = pd.DataFrame(sample_data)

    # START OF PRE-PROCESSING DATA ===========================================================================================
    sample_df['Urobilinogen'] = sample_df['Urobilinogen'].replace('NORMAL', 1)
    sample_df['Urobilinogen'] = sample_df['Urobilinogen'].apply(
        lambda x: int(float(str(x).split()[0])) if isinstance(x, str) and any(char.isdigit() for char in x) else x
    )

    binary_columns = {
        'Clarity': {'INCREASED': 1, 'DECREASED': 0}
    }

    for col, mapping in binary_columns.items():
        sample_df[col] = sample_df[col].map(mapping)

    ordinal_mappings = {
        'Color': ['CLEAR', 'PALE YELLOW', 'STRAW', 'LIGHT YELLOW', 'YELLOW', 'DARK YELLOW', 'AMBER', 'BROWN', 'RED'],
        'Transparency': ['CLEAR',  'HAZY', 'CLOUDY', 'MILKY', 'TURBID', 'RED'],
        'Nitrite': ['NEGATIVE', 'TRACE', 'POSITIVE'],
        'Protein': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Leukocytes': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Glucose': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Ketones': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Bilirubin': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Blood': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Bacteria': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Epithelial Cells': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'RBC': ['0-2', ' 5-10', ' 10-20', '20-30', 'TNTC'],
        'WBC': ['0-2', ' 5-10', ' 10-20', '20-30', 'TNTC'],
        'Mucus Threads': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Amorphous Urate/Phosphate': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC']
    }

    for col, order in ordinal_mappings.items():
        sample_df[col] = pd.Categorical(sample_df[col], categories=order, ordered=True).codes

    sample_df = pd.get_dummies(sample_df, columns=['Marital Status'], drop_first=False)

    expected_columns = ['Marital Status_MARRIED', 'Marital Status_SINGLE', 'Marital Status_WIDOWED']
    for col in expected_columns:
        if col not in sample_df.columns:
            sample_df[col] = 0 

    sample_df['FEMALE'] = sample_df['Sex'].apply(lambda x: 1 if x == 'FEMALE' else 0)
    sample_df = sample_df.drop(columns=['Sex'])

    sample_df['FILIPINO'] = sample_df['Ethnicity'].apply(lambda x: 1 if x == 'FILIPINO' else 0)
    sample_df = sample_df.drop(columns=['Ethnicity'])

    # END OF PRE-PROCESSING DATA ===========================================================================================
    # print(f"Pre-processed Data: {sample_df}")
    print(sample_df)

    def ensemble_predict(X_test):
        xgb_predicted_proba = trained_xgb.predict_proba(X_test)
        lgbm_predicted_proba = trained_lgbm.predict_proba(X_test)
        svm_predicted_proba = trained_svm.predict_proba(X_test)

        print("XGB Probabilities:", xgb_predicted_proba[:5])
        print("LGBM Probabilities:", lgbm_predicted_proba[:5])
        print("SVM Probabilities:", svm_predicted_proba[:5])
        
        top3_waverage_predictions = (
            weight_lgbm * lgbm_predicted_proba +
            weight_xgb * xgb_predicted_proba +
            weight_svc * svm_predicted_proba
        ) / (weight_lgbm + weight_xgb + weight_svc)

        
        top3_labeled_predictions = np.argmax(top3_waverage_predictions, axis=1)

        print("Ensemble Prediction Probabilities:", top3_waverage_predictions)
        return top3_labeled_predictions
    
    predictions = ensemble_predict(sample_df)

    # Edit response to show the diagnosis
    response = f'Data Succesfully Collected! Going to the model now... {predictions}'
    return render_template('testnow.html', response=response)

# Run Flask in a separate thread to keep the notebook responsive
def run_app():
    app.run(port=5000)

thread = Thread(target=run_app)
thread.start()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [18/Nov/2024 00:19:32] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [18/Nov/2024 00:19:32] "GET /static/styles.css HTTP/1.1" 304 -
  sample_df['Urobilinogen'] = sample_df['Urobilinogen'].replace('NORMAL', 1)
127.0.0.1 - - [18/Nov/2024 00:20:13] "POST /process HTTP/1.1" 200 -
127.0.0.1 - - [18/Nov/2024 00:20:13] "GET /static/styles.css HTTP/1.1" 304 -


   Age  Clarity  Color  Transparency  Spec Gravity  pH Level  Leukocytes  \
0   21        0      4             1         1.025       5.0           0   

   Nitrite  Protein  Glucose  ...  Epithelial Cells  RBC  WBC  Mucus Threads  \
0        0        1        0  ...                 1    0    0              1   

   Amorphous Urate/Phosphate  Marital Status_SINGLE  Marital Status_MARRIED  \
0                          0                   True                       0   

   Marital Status_WIDOWED  FEMALE  FILIPINO  
0                       0       0         1  

[1 rows x 25 columns]
XGB Probabilities: [[0.27242994 0.72757006]]
LGBM Probabilities: [[0.09012589 0.90987411]]
SVM Probabilities: [[0.48211239 0.51788761]]
Ensemble Prediction Probabilities: [[0.28155608 0.71844394]]
