In [3]:
from flask import Flask, request, render_template
import joblib
import numpy as np
import pandas as pd
import traceback
from sklearn.preprocessing import StandardScaler

app = Flask(__name__)

column_order = joblib.load("column_order.pkl")

# Load the reduced models
trained_xgb = joblib.load("trained_xgb_model.pkl")
trained_xgbr = joblib.load("trained_xgb_r_model.pkl")
trained_lgbmr = joblib.load("trained_lgbm_r_model.pkl")
trained_logregr = joblib.load("trained_logreg_r_model.pkl")

# Ensemble weights
weight_xgbr = 0.33  # Replace with ensemble_weights['weight_xgb_r']
weight_lgbmr = 0.33  # Replace with ensemble_weights['weight_lgbm_r']
weight_logregr = 0.33  # Replace with ensemble_weights['weight_logreg_r']
weight_xgb = 0.33  # Replace with ensemble_weights['weight_xgb']

# Define top 10 features for each reduced model
top_features = {
    'xgbr': [
        'Amorphous Urate/Phosphate', 'WBC', 'Leukocytes', 'Bacteria', 'Protein',
        'Blood', 'Age', 'FEMALE', 'RBC', 'Spec Gravity'
    ],
    'lgbmr': [
        'Amorphous Urate/Phosphate', 'WBC', 'Leukocytes', 'Bacteria', 'RBC',
        'Mucus Threads', 'Epithelial Cells', 'Blood', 'Spec Gravity', 'Age'
    ],
    'logregr': [
        'Bacteria', 'Nitrite', 'Amorphous Urate/Phosphate', 'WBC', 'Ketones',
        'Blood', 'Color', 'pH Level', 'Age', 'Glucose'
    ]
}

scaler = StandardScaler()

@app.route('/')
def index():
    return render_template('testnow.html')

@app.route('/process', methods=['POST'])
def process():
    try:
        # Collect form data
        model_choice = request.form.get('model', '').lower()
        Age = int(request.form.get('age', 0))
        Gender = request.form.get('gender', '').upper()
        MaritalStatus = request.form.get('marital-status', '').upper()
        Ethnicity = request.form.get('ethnicity', '').upper()
        Clarity = request.form.get('clarity', '').upper()
        M_Color = request.form.get('color', '').upper()
        Transparency = request.form.get('transparency', '').upper()
        SpecificGravity = float(request.form.get('specific-gravity', 1.0))
        PHLevel = float(request.form.get('ph-level', 7.0))
        Leukocytes = request.form.get('leukocytes', '').upper()
        Nitrite = request.form.get('nitrite', '').upper()
        Protein = request.form.get('protein', '').upper()
        Glucose = request.form.get('glucose', '').upper()
        Ketones = request.form.get('ketones', '').upper()
        Urobilinogen = request.form.get('urobilinogen', '').upper()
        Bilirubin = request.form.get('bilirubin', '').upper()
        Blood = request.form.get('blood', '').upper()
        Bacteria = request.form.get('bacteria', '').upper()
        EpithilialCells = request.form.get('epithelial-cells', '').upper()
        MucusThreads = request.form.get('mucus-threads', '').upper()
        AmorphousUrate = request.form.get('amorphous-urate', '').upper()
        RedBC = request.form.get('rbc', '').upper()
        WhiteBC = request.form.get('wbc', '').upper()

        # Prepare data for prediction
        sample_data = {
            'Age': [Age],
            'Sex': [Gender],
            'Ethnicity': [Ethnicity],
            'Marital Status': [MaritalStatus],
            'Clarity': [Clarity],
            'Color': [M_Color],
            'Transparency': [Transparency],
            'Spec Gravity': [SpecificGravity],
            'pH Level': [PHLevel],
            'Leukocytes': [Leukocytes],
            'Nitrite': [Nitrite],
            'Protein': [Protein],
            'Glucose': [Glucose],
            'Ketones': [Ketones],
            'Urobilinogen': [Urobilinogen],
            'Bilirubin': [Bilirubin],
            'Blood': [Blood],
            'Bacteria': [Bacteria],
            'Epithelial Cells': [EpithilialCells],
            'RBC': [RedBC],
            'WBC': [WhiteBC],
            'Mucus Threads': [MucusThreads],
            'Amorphous Urate/Phosphate': [AmorphousUrate]
        }
        sample_df = pd.DataFrame(sample_data)
        sample_df_1 = sample_df.copy()
        sample_df_2 = sample_df.copy()
        sample_df_3 = sample_df.copy()

        # Preprocess the data for each model
        xgbr_prepped_df = preprocess_data_xgbr(sample_df)
        lgbm_prepped_df = preprocess_data_lgbm(sample_df_1)
        logreg_prepped_df = preprocess_data_logreg(sample_df_2)
        xgb_prepped_df = preprocess_data(sample_df_3)

        # Debug: Print the preprocessed data
        print("Preprocessed DataFrame for prediction:")
        print("XGBR DataFrame:")
        print(xgb_prepped_df)
        print("LGBMR DataFrame:")
        print(lgbm_prepped_df)
        print("LogReg DataFrame:")
        print(logreg_prepped_df)

        # Make predictions for each model
        xgbr_prediction = trained_xgbr.predict_proba(xgbr_prepped_df)
        lgbm_prediction = trained_lgbmr.predict_proba(lgbm_prepped_df)
        
        # Scale the log regression features
        scaled_logreg_features = scaler.fit_transform(logreg_prepped_df)

        # Scale the XGB features
        logreg_prediction = trained_logregr.predict_proba(scaled_logreg_features)
        xgb_prediction = trained_xgb.predict_proba(xgb_prepped_df)

        
        # Choose ensemble model based on user selection
        if model_choice == 'fullreduced':
            # Print individual model probabilities
            print("\nXGBR Prediction Probabilities:")
            for i, prob in enumerate(xgbr_prediction[0]):
                print(f"Class {i}: {prob * 100:.2f}%")
            
            print("\nLGBMR Prediction Probabilities:")
            for i, prob in enumerate(lgbm_prediction[0]):
                print(f"Class {i}: {prob * 100:.2f}%")
            
            print("\nLogReg Reduced Prediction Probabilities:")
            for i, prob in enumerate(logreg_prediction[0]):
                print(f"Class {i}: {prob * 100:.2f}%")
            # Weighted average ensemble - Full Reduced Model
            prediction = (
                weight_xgbr * xgb_prediction +
                weight_lgbmr * lgbm_prediction +
                weight_logregr * logreg_prediction
            )
        elif model_choice == 'top3':
            # Print individual model probabilities
            print("\nXGBR Prediction Probabilities:")
            for i, prob in enumerate(xgbr_prediction[0]):
                print(f"Class {i}: {prob * 100:.2f}%")
            
            print("\nXGB Prediction Probabilities:")
            for i, prob in enumerate(xgb_prediction[0]):
                print(f"Class {i}: {prob * 100:.2f}%")
            
            print("\nLogReg Reduced Prediction Probabilities:")
            for i, prob in enumerate(logreg_prediction[0]):
                print(f"Class {i}: {prob * 100:.2f}%")
            # Ensemble of top 3 overall models (assuming weights may differ)
            prediction = (
                weight_xgbr * xgbr_prediction +
                weight_xgb * xgb_prediction +
                weight_logregr * logreg_prediction
            )
        else:
            print("AHWAWHHAWHWAHAW" + model_choice + "AHWAWHHAWHWAHAW")
            return "Invalid model selection. Please choose a valid model."

        # Print the prediction probabilities for each class
        print("\nEnsemble Prediction Probabilities:")
        for i, prob in enumerate(prediction[0]):
            print(f"Class {i}: {prob * 100:.2f}%")

        # Assuming you want to return the class with the highest probability
        predicted_class = np.argmax(prediction)
        response = f"Predicted Class: {predicted_class}"
        print("Prediction made successfully.")
        return render_template('testnow.html', response=response)

    except Exception as e:
        print(f"Error: {e}")
        print(traceback.format_exc())
        return "An error occurred during processing."
    
def preprocess_data(df):
    # Convert binary categorical columns
    binary_columns = {'Clarity': {'INCREASED': 1, 'DECREASED': 0, 'CLEAR': 0}}
    for col, mapping in binary_columns.items():
        df[col] = df[col].map(mapping)

    # Handle 'Urobilinogen' column specifically
    df['Urobilinogen'] = df['Urobilinogen'].replace('NORMAL', 1)
    # Convert any other non-numeric values to 0 (or adjust as needed)
    df['Urobilinogen'] = pd.to_numeric(df['Urobilinogen'], errors='coerce').fillna(0)

    # Convert ordinal categorical columns to numerical codes
    ordinal_mappings = {
        'Color': ['CLEAR', 'PALE YELLOW', 'STRAW', 'LIGHT YELLOW', 'YELLOW', 'DARK YELLOW', 'AMBER', 'BROWN', 'RED'],
        'Transparency': ['CLEAR', 'HAZY', 'CLOUDY', 'MILKY', 'TURBID', 'RED'],
        'Nitrite': ['NEGATIVE', 'TRACE', 'POSITIVE'],
        'Protein': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Leukocytes': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Glucose': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Ketones': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Bilirubin': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Blood': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Bacteria': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Epithelial Cells': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'RBC': ['0-2', '5-10', '10-20', '20-30', 'TNTC'],
        'WBC': ['0-2', '5-10', '10-20', '20-30', 'TNTC'],
        'Mucus Threads': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Amorphous Urate/Phosphate': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC']
    }
    for col, order in ordinal_mappings.items():
        df[col] = pd.Categorical(df[col], categories=order, ordered=True).codes

    # Label encode binary categorical columns
    df['FEMALE'] = df['Sex'].apply(lambda x: 1 if x == 'FEMALE' else 0)
    df.drop(columns=['Sex'], inplace=True)
    
    df['FILIPINO'] = df['Ethnicity'].apply(lambda x: 1 if x == 'FILIPINO' else 0)
    df.drop(columns=['Ethnicity'], inplace=True)

    # One-hot encode the 'Marital Status' column
    df = pd.get_dummies(df, columns=['Marital Status'], drop_first=False)

    # Rename the columns if necessary
    if 'Marital Status_WIDOWED' in df.columns:
        df.rename(columns={'Marital Status_WIDOWED': 'Marital Status_WIDOW'}, inplace=True)
    
    for col in column_order:
        if col not in df.columns:
            df[col] = 0  # Add missing columns with default value 0

    # Reorder the columns to match the training set
    df = df[column_order]
    print("XGBoost Dataframe:")
    print(df)
    return df

def preprocess_data_xgbr(df):
    # Convert binary categorical columns
    df['FEMALE'] = df['Sex'].apply(lambda x: 1 if x == 'FEMALE' else 0)
    df.drop(columns=['Sex'], inplace=True)

    # Convert ordinal categorical columns to numerical codes
    ordinal_mappings = {
        'Leukocytes': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Protein': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Blood': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Bacteria': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'RBC': ['0-2', '5-10', '10-20', '20-30', 'TNTC'],
        'WBC': ['0-2', '5-10', '10-20', '20-30', 'TNTC'],
        'Amorphous Urate/Phosphate': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC']
    }
    for col, order in ordinal_mappings.items():
        if col in df.columns:
            df[col] = pd.Categorical(df[col], categories=order, ordered=True).codes

    return df[top_features['xgbr']]

def preprocess_data_lgbm(df):
    # Convert binary categorical columns
    df['FEMALE'] = df['Sex'].apply(lambda x: 1 if x == 'FEMALE' else 0)
    df.drop(columns=['Sex'], inplace=True)

    # Convert ordinal categorical columns to numerical codes
    ordinal_mappings = {
        'Leukocytes': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Blood': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Bacteria': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'RBC': ['0-2', '5-10', '10-20', '20-30', 'TNTC'],
        'WBC': ['0-2', '5-10', '10-20', '20-30', 'TNTC'],
        'Amorphous Urate/Phosphate': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Mucus Threads': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Epithelial Cells': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC']
    }
    for col, order in ordinal_mappings.items():
        if col in df.columns:
            df[col] = pd.Categorical(df[col], categories=order, ordered=True).codes

    return df[top_features['lgbmr']]

def preprocess_data_logreg(df):
    # Convert binary categorical columns
    df['FEMALE'] = df['Sex'].apply(lambda x: 1 if x == 'FEMALE' else 0)
    df.drop(columns=['Sex'], inplace=True)

    # Convert ordinal categorical columns to numerical codes
    ordinal_mappings = {
        'Bacteria': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Leukocytes': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Blood': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'WBC': ['0-2', '5-10', '10-20', '20-30', 'TNTC'],
        'Amorphous Urate/Phosphate': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Nitrite': ['NEGATIVE', 'POSITIVE'],
        'Ketones': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Glucose': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Color': ['YELLOW', 'STRAW', 'RED', 'BROWN', 'GREEN']
    }
    for col, order in ordinal_mappings.items():
        if col in df.columns:
            df[col] = pd.Categorical(df[col], categories=order, ordered=True).codes

    return df[top_features['logregr']]

if __name__ == '__main__':
    app.run(port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
