In [96]:
import joblib 
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [97]:
trained_xgb = joblib.load("trained_xgb_model.pkl")
trained_lgbm = joblib.load("trained_lgbm_model.pkl")
trained_svm = joblib.load("trained_svm_model.pkl")

In [98]:
ensemble_weights = joblib.load("ensemble_weights.pkl")
weight_xgb = ensemble_weights['weight_xgb']
weight_lgbm = ensemble_weights['weight_lgbm']
weight_svc = ensemble_weights['weight_svc']

In [99]:
def ensemble_predict(X_test):
    xgb_predicted_proba = trained_xgb.predict_proba(X_test)
    lgbm_predicted_proba = trained_lgbm.predict_proba(X_test)
    svm_predicted_proba = trained_svm.predict_proba(X_test)

    print("XGB Probabilities:", xgb_predicted_proba[:5])
    print("LGBM Probabilities:", lgbm_predicted_proba[:5])
    print("SVM Probabilities:", svm_predicted_proba[:5])
    
    top3_waverage_predictions = (
        weight_lgbm * lgbm_predicted_proba +
        weight_xgb * xgb_predicted_proba +
        weight_svc * svm_predicted_proba
    ) / (weight_lgbm + weight_xgb + weight_svc)

    
    top3_labeled_predictions = np.argmax(top3_waverage_predictions, axis=1)

    print("Ensemble Prediction Probabilities:", top3_waverage_predictions)
    return top3_labeled_predictions

In [100]:
scaler = StandardScaler()

In [101]:
sample_data = {
    'Age': [20],
    'Sex': ['MALE'],
    'Ethnicity': ['FILIPINO'],
    'Marital Status': ['SINGLE'],
    'Clarity': ['DECREASED'],
    'Color': ['LIGHT YELLOW'],
    'Transparency': ['CLEAR'],
    'Spec Gravity': [1.025],
    'pH Level': [5.0],
    'Leukocytes': ['NEGATIVE'],
    'Nitrite': ['NEGATIVE'],
    'Protein': ['NEGATIVE'],
    'Glucose': ['NEGATIVE'],
    'Ketones': ['NEGATIVE'],
    'Urobilinogen': ['NORMAL'],
    'Bilirubin': ['NEGATIVE'],
    'Blood': ['NEGATIVE'],
    'Bacteria': ['RARE'],
    'Epithelial Cells': ['RARE'],
    'RBC': ['0-2'],
    'WBC': ['0-2'],
    'Mucus Threads': ['RARE'],
    'Amorphous Urate/Phosphate': ['RARE'],
}

sample_df = pd.DataFrame(sample_data)


In [102]:


sample_df['Urobilinogen'] = sample_df['Urobilinogen'].replace('NORMAL', 1)
sample_df['Urobilinogen'] = sample_df['Urobilinogen'].apply(
    lambda x: int(float(str(x).split()[0])) if isinstance(x, str) and any(char.isdigit() for char in x) else x
)

binary_columns = {
    'Clarity': {'INCREASED': 1, 'DECREASED': 0}
}

for col, mapping in binary_columns.items():
    sample_df[col] = sample_df[col].map(mapping)

ordinal_mappings = {
    'Color': ['CLEAR', 'PALE YELLOW', 'STRAW', 'LIGHT YELLOW', 'YELLOW', 'DARK YELLOW', 'AMBER', 'BROWN', 'RED'],
    'Transparency': ['CLEAR',  'HAZY', 'CLOUDY', 'MILKY', 'TURBID', 'RED'],
    'Nitrite': ['NEGATIVE', 'TRACE', 'POSITIVE'],
    'Protein': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
    'Leukocytes': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
    'Glucose': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
    'Ketones': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
    'Bilirubin': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
    'Blood': ['NEGATIVE', 'TRACE', '1+', '2+', '3+', '4+'],
    'Bacteria': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
    'Epithelial Cells': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
    'RBC': ['0-2', ' 5-10', ' 10-20', '20-30', 'TNTC'],
    'WBC': ['0-2', ' 5-10', ' 10-20', '20-30', 'TNTC'],
    'Mucus Threads': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC'],
    'Amorphous Urate/Phosphate': ['RARE', 'FEW', 'MODERATE', 'MANY', 'TNTC']
}

for col, order in ordinal_mappings.items():
    sample_df[col] = pd.Categorical(sample_df[col], categories=order, ordered=True).codes

sample_df = pd.get_dummies(sample_df, columns=['Marital Status'], drop_first=False)

expected_columns = ['Marital Status_MARRIED', 'Marital Status_SINGLE', 'Marital Status_WIDOWED']
for col in expected_columns:
    if col not in sample_df.columns:
        sample_df[col] = 0 

sample_df['FEMALE'] = sample_df['Sex'].apply(lambda x: 1 if x == 'FEMALE' else 0)
sample_df = sample_df.drop(columns=['Sex'])

sample_df['FILIPINO'] = sample_df['Ethnicity'].apply(lambda x: 1 if x == 'FILIPINO' else 0)
sample_df = sample_df.drop(columns=['Ethnicity'])

print(sample_df)

   Age  Clarity  Color  Transparency  Spec Gravity  pH Level  Leukocytes  \
0   20        0      3             0         1.025       5.0           0   

   Nitrite  Protein  Glucose  ...  Epithelial Cells  RBC  WBC  Mucus Threads  \
0        0        0        0  ...                 0    0    0              0   

   Amorphous Urate/Phosphate  Marital Status_SINGLE  Marital Status_MARRIED  \
0                          0                   True                       0   

   Marital Status_WIDOWED  FEMALE  FILIPINO  
0                       0       0         1  

[1 rows x 25 columns]


In [103]:
predictions = ensemble_predict(sample_df)
print("Sample Predictions:", predictions)

XGB Probabilities: [[0.27631485 0.72368515]]
LGBM Probabilities: [[0.10183562 0.89816438]]
SVM Probabilities: [[0.30585291 0.69414709]]
Ensemble Prediction Probabilities: [[0.22800113 0.77199889]]
Sample Predictions: [1]


