In [1]:
from flask import Flask, request, render_template
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

app = Flask(__name__)

# Load models and ensemble weights
trained_xgb = joblib.load("trained_xgb_model.pkl")
trained_xgbr = joblib.load("trained_xgb_r_model.pkl")
trained_lgbmr = joblib.load("trained_lgbm_r_model.pkl")
trained_logregr = joblib.load("trained_logreg_r_model.pkl")

ensemble_weights = joblib.load("ensemble_weights.pkl")
weight_xgb = ensemble_weights['weight_xgb']
weight_xgbr = ensemble_weights['weight_xgb_r']
weight_lgbmr = ensemble_weights['weight_lgbm_r']
weight_logregr = ensemble_weights['weight_logreg_r']

# Load the pre-fitted scaler
scaler = joblib.load("scaler.pkl")

column_order = joblib.load("column_order.pkl")

# Define top 10 features for each model
top_features = {
    'logregr': [
        'Bacteria', 'Nitrite', 'Amorphous Urate/Phosphate', 'WBC', 'Ketones',
        'Blood', 'Color', 'pH Level', 'Age', 'Glucose'
    ],
    'xgbr': [
        'Amorphous Urate/Phosphate', 'WBC', 'Leukocytes', 'Bacteria', 'Protein',
        'Blood', 'Age', 'FEMALE', 'RBC', 'Spec Gravity'
    ],
    'lgbmr': [
        'Amorphous Urate/Phosphate', 'WBC', 'Leukocytes', 'Bacteria', 'RBC',
        'Mucus Threads', 'Epithelial Cells', 'Blood', 'Spec Gravity', 'Age'
    ]
}

@app.route('/')
def index():
    return render_template('testnow.html')

@app.route('/process', methods=['POST'])
def process():
    try:
        # Collect form data
        Age = int(request.form.get('age', 0))
        Sex = request.form.get('sex', '').upper()
        MaritalStatus = request.form.get('marital-status', '').upper()
        Ethnicity = request.form.get('ethnicity', '').upper()
        Clarity = request.form.get('clarity', '').upper()
        M_Color = request.form.get('color', '').upper()
        Transparency = request.form.get('transparency', '').upper()
        SpecificGravity = float(request.form.get('specific-gravity', 1.0))
        PHLevel = float(request.form.get('ph-level', 7.0))
        Leukocytes = request.form.get('leukocytes', '').upper()
        Nitrite = request.form.get('nitrite', '').upper()
        Protein = request.form.get('protein', '').upper()
        Glucose = request.form.get('glucose', '').upper()
        Ketones = request.form.get('ketones', '').upper()
        Urobilinogen = request.form.get('urobilinogen', '').upper()
        Bilirubin = request.form.get('bilirubin', '').upper()
        Blood = request.form.get('blood', '').upper()
        Bacteria = request.form.get('bacteria', '').upper()
        EpithilialCells = request.form.get('epithelial-cells', '').upper()
        MucusThreads = request.form.get('mucus-threads', '').upper()
        AmorphousUrate = request.form.get('amorphous-urate', '').upper()
        RedBC = request.form.get('rbc', '').upper()
        WhiteBC = request.form.get('wbc', '').upper()

        # Prepare data for prediction
        sample_data = {
            'Age': [Age],
            'Sex': [Sex],
            'Ethnicity': [Ethnicity],
            'Marital Status': [MaritalStatus],
            'Clarity': [Clarity],
            'Color': [M_Color],
            'Transparency': [Transparency],
            'Spec Gravity': [SpecificGravity],
            'pH Level': [PHLevel],
            'Leukocytes': [Leukocytes],
            'Nitrite': [Nitrite],
            'Protein': [Protein],
            'Glucose': [Glucose],
            'Ketones': [Ketones],
            'Urobilinogen': [Urobilinogen],
            'Bilirubin': [Bilirubin],
            'Blood': [Blood],
            'Bacteria': [Bacteria],
            'Epithelial Cells': [EpithilialCells],
            'RBC': [RedBC],
            'WBC': [WhiteBC],
            'Mucus Threads': [MucusThreads],
            'Amorphous Urate/Phosphate': [AmorphousUrate]
        }
        sample_df = pd.DataFrame(sample_data)

        # Preprocess data for each model (create a copy of sample_df for each model)
        xgbr_data = preprocess_data(sample_df.copy(), 'xgbr')
        lgbmr_data = preprocess_data(sample_df.copy(), 'lgbmr')
        logregr_data = preprocess_data(sample_df.copy(), 'logregr')

        # Check DataFrame before prediction
        print("DataFrame for XGBR Model:\n", xgbr_data.head())
        print("DataFrame for LGBMR Model:\n", lgbmr_data.head())
        print("DataFrame for LOGREG Model:\n", logregr_data.head())

        # Make predictions
        print("Making predictions...")
        xgbr_pred = trained_xgbr.predict_proba(xgbr_data)
        lgbmr_pred = trained_lgbmr.predict_proba(lgbmr_data)
        logregr_pred = trained_logregr.predict_proba(logregr_data)

        # Calculate the ensemble prediction
        weighted_avg = (weight_xgbr * xgbr_pred + weight_lgbmr * lgbmr_pred + weight_logregr * logregr_pred) / (
            weight_xgb + weight_lgbmr + weight_logregr)

        # Return the predicted class
        predictions = np.argmax(weighted_avg, axis=1)
        response = f"Predicted Class: {predictions}"
        return render_template('testnow.html', response=response)

    except Exception as e:
        print(f"Error: {e}")
        return "An error occurred during processing."

def preprocess_data(df, model_name):
    # Work on a copy of the DataFrame to avoid altering the original
    df = df.copy()

    # Convert binary categorical columns
    binary_columns = {'Clarity': {'INCREASED': 1, 'DECREASED': 0, 'CLEAR': 0}}
    for col, mapping in binary_columns.items():
        if col in df.columns:
            df[col] = df[col].map(mapping)

    # Handle 'Urobilinogen' column specifically
    if 'Urobilinogen' in df.columns:
        df['Urobilinogen'] = df['Urobilinogen'].replace('NORMAL', 1).infer_objects(copy=False)
        df['Urobilinogen'] = pd.to_numeric(df['Urobilinogen'], errors='coerce').fillna(0)

    # Convert ordinal categorical columns to numerical codes
    ordinal_mappings = {
        'Color': ['CLEAR', 'PALE YELLOW', 'STRAW', 'LIGHT YELLOW', 'YELLOW', 'DARK YELLOW', 'AMBER', 'BROWN', 'RED'],
        'Transparency': ['CLEAR', 'HAZY', 'CLOUDY', 'MILKY', 'TURBID', 'RED'],
        'Nitrite': ['NEGATIVE', 'TRACE', 'POSITIVE'],
        'Protein': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Leukocytes': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Glucose': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Ketones': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Bilirubin': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Blood': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
        'Bacteria': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Epithelial Cells': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'RBC': ['0-2', '5-10', '10-20', '20-30', 'TNTC'],
        'WBC': ['0-2', '5-10', '10-20', '20-30', 'TNTC'],
        'Mucus Threads': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
        'Amorphous Urate/Phosphate': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC']
    }
    for col, order in ordinal_mappings.items():
        if col in df.columns:
            df[col] = pd.Categorical(df[col], categories=order, ordered=True).codes

    # Label encode binary categorical columns
    if 'Sex' in df.columns:
        df['FEMALE'] = df['Sex'].apply(lambda x: 1 if x == 'FEMALE' else 0)
        df.drop(columns=['Sex'], inplace=True)

    if 'Ethnicity' in df.columns:
        df['FILIPINO'] = df['Ethnicity'].apply(lambda x: 1 if x == 'FILIPINO' else 0)
        df.drop(columns=['Ethnicity'], inplace=True)

    # One-hot encode the 'Marital Status' column
    df = pd.get_dummies(df, columns=['Marital Status'], drop_first=False)

    # Rename the columns if necessary
    if 'Marital Status_WIDOWED' in df.columns:
        df.rename(columns={'Marital Status_WIDOWED': 'Marital Status_WIDOW'}, inplace=True)

    # Ensure all required columns are present by adding missing columns with zeros
    for col in column_order:
        if col not in df.columns:
            df[col] = 0  # Add missing columns with default value 0

    # Reorder the columns to match the training set
    df = df[column_order]

    # Select only the top features for the specified model
    df_model = df[top_features[model_name]].copy()  # Make a copy to avoid modifying the original DataFrame
    
    print(f"Preprocessed data for model {model_name}:\n", df_model)
    
    return df_model


if __name__ == '__main__':
    app.run(port=5000)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
  df['Urobilinogen'] = df['Urobilinogen'].replace('NORMAL', 1).infer_objects(copy=False)
  df['Urobilinogen'] = df['Urobilinogen'].replace('NORMAL', 1).infer_objects(copy=False)
  df['Urobilinogen'] = df['Urobilinogen'].replace('NORMAL', 1).infer_objects(copy=False)


Preprocessed data for model xgbr:
    Amorphous Urate/Phosphate  WBC  Leukocytes  Bacteria  Protein  Blood  Age  \
0                          1    0           0         0        0      0   21   

   FEMALE  RBC  Spec Gravity  
0       0    0         1.025  
Preprocessed data for model lgbmr:
    Amorphous Urate/Phosphate  WBC  Leukocytes  Bacteria  RBC  Mucus Threads  \
0                          1    0           0         0    0              0   

   Epithelial Cells  Blood  Spec Gravity  Age  
0                 1      0         1.025   21  
Preprocessed data for model logregr:
    Bacteria  Nitrite  Amorphous Urate/Phosphate  WBC  Ketones  Blood  Color  \
0         0        0                          1    0        0      0      4   

   pH Level  Age  Glucose  
0       5.0   21        0  
DataFrame for XGBR Model:
    Amorphous Urate/Phosphate  WBC  Leukocytes  Bacteria  Protein  Blood  Age  \
0                          1    0           0         0        0      0   21   

   FEMALE 

127.0.0.1 - - [24/Nov/2024 22:19:01] "POST /process HTTP/1.1" 200 -
127.0.0.1 - - [24/Nov/2024 22:19:01] "GET /static/styles.css HTTP/1.1" 304 -
