In [1]:
import sys

sys.path.append("../")

In [2]:
from scripts.configuration import FINAL_EXPORT_PATH, MODEL_PATH
from scripts.saving import save_model
from modeling.base import EvaluationMetrics
from modeling.rf import RandomForest

In [3]:
import pandas as pd

In [4]:
df = pd.read_csv(FINAL_EXPORT_PATH)

In [5]:
df_copy = df.copy()

In [6]:
df_copy.sample(5)

Unnamed: 0,Age,Gender,Blood Type,Medical Condition,Insurance Provider,Billing Amount,Room Number,Admission Type,Medication,Test Results,Length Of Stay,Age Group,Admission Season,Cost Per Day
47771,24,0,3,2,0,6325.695,237,1,2,2,10,4,3,632.57
11174,38,0,6,5,2,24192.134,165,0,4,2,29,0,0,834.212
722,44,1,5,0,1,19691.421,264,0,0,1,25,0,1,787.657
34880,85,0,6,5,3,13024.522,474,1,3,2,25,3,3,520.981
31299,48,1,6,0,3,12756.307,456,0,0,1,16,0,2,797.269


In [7]:
X = df.drop(["Test Results"], axis=1)
y = df["Test Results"]

In [8]:
model = RandomForest()

In [9]:
model.train(X, y)

In [10]:
model.prediction()

array([0, 0, 0, ..., 0, 0, 0])

In [11]:
metrics = model.evaluate()

In [12]:
accuracy = metrics.accuracy
recall = metrics.recall
precision = metrics.precision
f1 = metrics.f1
confusion_matrix = metrics.confusion_matrix 

In [13]:
EvaluationMetrics(accuracy, recall, precision, f1, confusion_matrix).display()

Model Performance Metrics:
-------------------------
Accuracy: 0.44
Recall: 0.44
Precision: 0.44
F1-Score: 0.44

Confusion Matrix:
[[1690 1047 1017]
 [1058 1575  984]
 [1088  999 1642]]


In [14]:
save_model(model, MODEL_PATH)

Model is saved at E:\project4\model\model.pkl


In [11]:
from flask import Flask, request, jsonify, render_template
from scripts.configuration import MODEL_PATH, ENCODER_PATH
from scripts.features import new_features
from typing import List
import pandas as pd
import joblib


class ModelPredictor:
    COLUMNS = [
        "Gender",
        "Blood Type",
        "Medical Condition",
        "Insurance Provider",
        "Admission Type",
        "Medication",
    ]

    FEATURES = [
        "Age",
        "Gender",
        "Blood Type",
        "Medical Condition",
        "Date of Admission",
        "Insurance Provider",
        "Billing Amount",
        "Room Number",
        "Admission Type",
        "Discharge Date",
        "Medication",
    ]

    REVERSE_MAP = {0: "Normal", 1: "Inconclusive", 2: "Abnormal"}

    def __init__(self, model_path: str, encoder_path: str):
        self.model = joblib.load(model_path)
        self.encoder = joblib.load(encoder_path)

    def _prepare_features(self, input_data: List) -> pd.DataFrame:
        df = pd.DataFrame([dict(zip(self.FEATURES, input_data))])

        df = new_features(df)

        df = df.drop(["Date of Admission", "Discharge Date"], axis=1)

        df[self.COLUMNS] = self.encoder.transform(df[self.COLUMNS])

        return df

    def predict(self, input_data: List) -> str:
        features_df = self._prepare_features(input_data)

        prediction = self.model.predict_client(features_df)[0]

        return self.REVERSE_MAP[prediction]


model = ModelPredictor(MODEL_PATH, ENCODER_PATH)

In [12]:
in_df = [30,"Male","B-","Cancer","2024-01-31","Blue Cross",18856.281305978155,328,"Urgent","2024-02-02","Paracetamol"]

In [13]:
model._prepare_features(in_df)

Unnamed: 0,Age,Gender,Blood Type,Medical Condition,Insurance Provider,Billing Amount,Room Number,Admission Type,Medication,Length Of Stay,Age Group,Admission Season,Cost Per Day
0,30,1,5,2,1,18856.281306,328,2,3,2,1,0,9428.141
