In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import joblib

# Step 1: Load the dataset
data = pd.read_csv('Project_healthcare_dataset.csv')

# Step 2: Preprocessing
# Identify categorical features to encode
categorical_features = ['Gender', 'Blood Type', 'Medical Condition', 'Medication', 
                        'Admission Type', 'Doctor', 'Hospital', 'Insurance Provider']
label_encoders = {}

# Encode categorical features
for feature in categorical_features:
    le = LabelEncoder()
    data[feature] = le.fit_transform(data[feature])
    label_encoders[feature] = le

# Define features and target
features = ['Age', 'Gender', 'Blood Type', 'Medical Condition', 'Medication']
X = data[features]
y = data['Test Results']

# Encode the target variable
target_encoder = LabelEncoder()
y = target_encoder.fit_transform(y)
label_encoders['Test Results'] = target_encoder

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numeric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 3: Train the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Save the model, scaler, and encoders
joblib.dump(rf_model, 'random_forest_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')

# Step 4: Evaluate the Model
y_pred = rf_model.predict(X_test_scaled)

print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred, target_names=target_encoder.classes_))
print("Accuracy:", accuracy_score(y_test, y_pred))

# Optional: Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))



  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


Random Forest Classification Report:
              precision    recall  f1-score   support

    Abnormal       1.00      1.00      1.00      1747
Inconclusive       1.00      1.00      1.00       887
      Normal       1.00      1.00      1.00      7566

    accuracy                           1.00     10200
   macro avg       1.00      1.00      1.00     10200
weighted avg       1.00      1.00      1.00     10200

Accuracy: 1.0
Confusion Matrix:
[[1747    0    0]
 [   0  887    0]
 [   0    0 7566]]


In [3]:
def predict_new_patient(model, scaler, encoders, patient_data):
    categorical_features = ['Gender', 'Blood Type', 'Medical Condition', 'Medication']
    
    for feature in categorical_features:
        le = encoders[feature]
        if patient_data[feature] not in le.classes_:
            st.error(f"Unseen category '{patient_data[feature]}' in feature '{feature}'.")
            return f"Error: Unseen label '{patient_data[feature]}' in '{feature}'"
        patient_data[feature] = le.transform([patient_data[feature]])[0]
    
    patient_df = pd.DataFrame([patient_data])
    patient_scaled = scaler.transform(patient_df)
    
    prediction = model.predict(patient_scaled)
    result = encoders['Test Results'].inverse_transform(prediction)[0]
    return result


In [None]:
def predict_test_result(model, scaler, encoders, patient_data):
    # Categorical features to encode
    categorical_features = ['Gender', 'Blood Type', 'Medical Condition', 'Medication']
    
    # Encode categorical features
    for feature in categorical_features:
        encoder = encoders[feature]
        if patient_data[feature] not in encoder.classes_:
            raise ValueError(f"Unseen category '{patient_data[feature]}' in feature '{feature}'")
        patient_data[feature] = encoder.transform([patient_data[feature]])[0]
    
    # Create a DataFrame for the single patient
    patient_df = pd.DataFrame([patient_data])
    
    # Scale the numerical features
    patient_scaled = scaler.transform(patient_df)
    
    # Predict using the model
    prediction = model.predict(patient_scaled)
    result = encoders['Test Results'].inverse_transform(prediction)[0]
    
    return result


In [5]:
# Example new patient data with feature values only (excluding 'Test Results')
new_patient = {
    'Age': 27,
    'Gender': 'Female',
    'Blood Type': 'B+',
    'Medical Condition': 'Diabetes',
    'Medication': 'Paracetamol'
}

# Load the saved model, scaler, and encoders
loaded_model = joblib.load('random_forest_model.pkl')
loaded_scaler = joblib.load('scaler.pkl')
loaded_encoders = joblib.load('label_encoders.pkl')

# Make a prediction
try:
    result = predict_test_result(loaded_model, loaded_scaler, loaded_encoders, new_patient)
    print("Predicted Test Result:", result)
except ValueError as e:
    print("Error:", e)

Predicted Test Result: Normal
