In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
df = pd.read_csv("C:/Shalinii/ml/archive (5)/healthcare_dataset.csv")

In [5]:
columns_to_use = ['Gender', 'Blood Type', 'Medical Condition', 'Doctor', 
                  'Hospital', 'Admission Type', 'Medication', 'Insurance Provider', 'Test Results']
df = df[columns_to_use]

In [6]:
df = df.dropna() 

In [7]:
print(df.head())

   Gender Blood Type Medical Condition            Doctor  \
0    Male         B-            Cancer     Matthew Smith   
1    Male         A+           Obesity   Samantha Davies   
2  Female         A-           Obesity  Tiffany Mitchell   
3  Female         O+          Diabetes       Kevin Wells   
4  Female        AB+            Cancer    Kathleen Hanna   

                     Hospital Admission Type   Medication Insurance Provider  \
0             Sons and Miller         Urgent  Paracetamol         Blue Cross   
1                     Kim Inc      Emergency    Ibuprofen           Medicare   
2                    Cook PLC      Emergency      Aspirin              Aetna   
3  Hernandez Rogers and Vang,       Elective    Ibuprofen           Medicare   
4                 White-White         Urgent   Penicillin              Aetna   

   Test Results  
0        Normal  
1  Inconclusive  
2        Normal  
3      Abnormal  
4      Abnormal  


In [8]:
label_encoders = {}  
for column in columns_to_use[:-1]:  
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

In [9]:
target_encoder = LabelEncoder()
df['Test Results'] = target_encoder.fit_transform(df['Test Results'])

In [10]:
X = df.drop('Test Results', axis=1)
y = df['Test Results']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
model = RandomForestClassifier(random_state=42, n_estimators=100)
model.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [13]:
y_pred = model.predict(X_test)
print("Classification Report:\n")
print(classification_report(y_test, y_pred, target_names=target_encoder.classes_))

Classification Report:

              precision    recall  f1-score   support

    Abnormal       0.44      0.46      0.45      3754
Inconclusive       0.43      0.42      0.43      3617
      Normal       0.44      0.43      0.44      3729

    accuracy                           0.44     11100
   macro avg       0.44      0.44      0.44     11100
weighted avg       0.44      0.44      0.44     11100



In [27]:
new_data = {
    'Gender': 'Female',
    'Blood Type': 'A+',
    'Medical Condition': 'Obesity',
    'Doctor': 'Samantha Davies',
    'Hospital': 'Kim Inc',
    'Admission Type': 'Emergency',
    'Medication': 'Ibuprofen',
    'Insurance Provider': 'Medicare'
}

In [28]:
encoded_data = []
for column, value in new_data.items():
    if column in label_encoders:
        if value in label_encoders[column].classes_:
            encoded_value = label_encoders[column].transform([value])[0]
        else:
            encoded_value = -1
    else:
        encoded_value = value
    encoded_data.append(encoded_value)


In [29]:
input_array = np.array(encoded_data).reshape(1, -1)

In [30]:
prediction = model.predict(input_array)
predicted_class = target_encoder.inverse_transform(prediction)

print("Predicted Test Result:", predicted_class[0])

Predicted Test Result: Normal


