In [2]:
import pandas as pd
import os
import mlflow
import mlflow.sklearn
from sklearn.preprocessing import OrdinalEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import f_classif,SelectKBest
from sklearn.pipeline import Pipeline

In [3]:
def load_data(path):
    data=pd.read_csv(path)
    return data

def save_data(data,path):
    dirname="TEST"
    dirpath=os.path.join(path,dirname)
    os.makedirs(dirpath)
    data.to_csv(os.path.join(dirpath,"TRAIN.csv"),index=False)

In [4]:
data=load_data(r"C:\Users\Avijit\Desktop\TEST\data\healthcare_dataset.csv")

In [5]:
del data['Doctor']

In [13]:
data.columns.tolist()

['Name',
 'Age',
 'Gender',
 'Blood Type',
 'Medical Condition',
 'Date of Admission',
 'Doctor',
 'Hospital',
 'Insurance Provider',
 'Billing Amount',
 'Room Number',
 'Admission Type',
 'Discharge Date',
 'Medication',
 'Test Results']

In [14]:
data=data.drop(columns=['Name',
 'Date of Admission',
 'Hospital',
 'Insurance Provider',
 'Admission Type',
 'Discharge Date'])

In [6]:
data.to_csv(r"C:\Users\Avijit\Desktop\TEST\data\healthcare_dataset.csv",index=False)

In [75]:
def processing_data(data):
    numcol=data.select_dtypes(include=['number']).columns.tolist()
    catcol=data.select_dtypes(include=['object']).columns.tolist()
    catpipe=Pipeline(steps=[("LABELENCODING",OrdinalEncoder())])
    numpipe=Pipeline(steps=[("SCALING",StandardScaler())])
    mergepipe=ColumnTransformer(transformers=[("CATEGORICAL_DATA",catpipe,catcol),("NUMERICAL_DATA",numpipe,numcol)],remainder="passthrough")
    finalpipe=Pipeline(steps=[('DATA_PROCESSING',mergepipe)])
    df=finalpipe.fit_transform(data)
    df1=pd.DataFrame(df,columns=catcol+numcol)
    temp=df1.pop("Test Results")
    df1.insert(len(df1.columns.tolist()),"Test Results",temp)
    return df1

In [76]:
df=processing_data(data)

In [78]:
x=df.iloc[:,:-1]
y=df.iloc[:,-1:]

In [83]:
fc=SelectKBest(score_func=f_classif,k=4)

In [84]:
f1=fc.fit_transform(x,y)

  y = column_or_1d(y, warn=True)


In [86]:
dff=pd.DataFrame(f1,columns=x.columns[fc.get_support()])

In [88]:
dff=pd.concat([dff,y],axis=1)

In [89]:
dff

Unnamed: 0,Gender,Medication,Age,Billing Amount,Test Results
0,1.0,3.0,-1.098824,-0.470261,2.0
1,1.0,1.0,0.533639,0.570250,1.0
2,0.0,0.0,1.247842,0.169990,2.0
3,0.0,1.0,-1.200853,0.870465,0.0
4,0.0,4.0,-0.435636,-0.795211,0.0
...,...,...,...,...,...
55495,0.0,4.0,-0.486651,-1.610589,0.0
55496,0.0,0.0,0.482625,0.416462,2.0
55497,0.0,1.0,-0.690708,0.146464,0.0
55498,1.0,1.0,-0.435636,0.486357,0.0


In [90]:
from xgboost import XGBClassifier

xg=XGBClassifier()

In [None]:
Age,Gender,Blood Type,Medical Condition,Billing Amount,Room Number,Medication,Test Results
57,Male,O+,Diabetes,3616.898449997269,339,Aspirin,Inconclusive

In [19]:
data = {
    "Age": [57],
    "Gender": ["Male"],
    "Blood Type": ["O+"],
    "Medical Condition": ["Diabetes"],
    "Billing Amount": [3616.898449997269],
    "Room Number": [339],
    "Medication": ["Aspirin"]
    # Excluding "Test Results"
}

# Create DataFrame
df = pd.DataFrame(data)

In [20]:
import joblib as jb
pipe=jb.load("../processpipe/trainpipe.pkl")

In [21]:
data=processing_data(df)

In [22]:
data

Unnamed: 0,Age,Billing Amount,Room Number,Gender,Blood Type,Medical Condition,Medication
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [1]:
import pandas as pd
import joblib
import os

# Example new input
data = {
    "Age": [57],
    "Gender": ["Male"],
    "Blood Type": ["O+"],
    "Medical Condition": ["Diabetes"],
    "Billing Amount": [3616.898449997269],
    "Room Number": [339],
    "Medication": ["Aspirin"]
}

df = pd.DataFrame(data)
col=df.columns.tolist()
pipe_path = "../processpipe/feature_pipe.pkl"
trainpipe = joblib.load(pipe_path)
processed_df = trainpipe.transform(df)


In [36]:
processed_df=pd.DataFrame(processed_df,columns=col)

In [88]:
feature=pd.read_csv("../process/featured/train.csv").columns.tolist()
if "Test Results" in feature:
    feature.remove("Test Results")

In [92]:
print(model.feature_names_in_)


['Gender' 'Blood Type' 'Age']


In [63]:
processed_df

Unnamed: 0,Age,Gender,Blood Type,Medical Condition,Billing Amount,Room Number,Medication
0,1.0,6.0,3.0,0.0,0.276298,-1.537734,0.327633


In [65]:
df=processed_df[feature]

In [66]:
df

Unnamed: 0,Gender,Blood Type,Age,Billing Amount,Room Number
0,6.0,3.0,1.0,0.276298,-1.537734


In [90]:
model=joblib.load("../models/model.pkl")

In [70]:
output=model.predict(df)

In [71]:
label=joblib.load("../processpipe/target_pipe.pkl")

In [75]:
output=label.inverse_transform([output])

In [77]:
output

array([['Normal']], dtype=object)

In [79]:
data=pd.read_csv("../data/healthcare_dataset.csv")

In [87]:
data['Medication'].unique().tolist()

['Paracetamol', 'Ibuprofen', 'Aspirin', 'Penicillin', 'Lipitor']

In [85]:
data['Room Number'].unique().tolist()

[328,
 265,
 205,
 450,
 458,
 389,
 277,
 316,
 249,
 394,
 288,
 134,
 309,
 182,
 465,
 114,
 449,
 260,
 115,
 295,
 327,
 119,
 109,
 162,
 401,
 157,
 223,
 293,
 371,
 108,
 245,
 494,
 285,
 228,
 481,
 212,
 113,
 272,
 478,
 196,
 418,
 410,
 300,
 211,
 413,
 138,
 456,
 234,
 492,
 180,
 250,
 296,
 330,
 405,
 306,
 333,
 244,
 325,
 378,
 468,
 368,
 263,
 489,
 241,
 231,
 377,
 407,
 135,
 131,
 102,
 255,
 422,
 320,
 273,
 395,
 152,
 321,
 428,
 482,
 268,
 120,
 318,
 144,
 226,
 459,
 208,
 227,
 402,
 442,
 425,
 373,
 290,
 361,
 251,
 440,
 414,
 424,
 307,
 476,
 388,
 326,
 178,
 177,
 302,
 130,
 430,
 133,
 104,
 408,
 376,
 331,
 275,
 480,
 233,
 384,
 380,
 310,
 406,
 213,
 427,
 500,
 451,
 485,
 267,
 154,
 466,
 453,
 261,
 167,
 179,
 490,
 258,
 483,
 202,
 198,
 308,
 278,
 103,
 400,
 192,
 128,
 238,
 136,
 218,
 348,
 486,
 147,
 126,
 314,
 271,
 341,
 498,
 168,
 189,
 438,
 286,
 266,
 392,
 156,
 315,
 322,
 184,
 472,
 398,
 435,
 174,
 137

In [3]:
tp=joblib.load(r"C:\Users\Avijit\Desktop\TEST\processpipe\target_pipe.pkl")

In [8]:
tp.named_steps['LABELENCODING'].categories_[0].tolist()

['Abnormal', 'Inconclusive', 'Normal']