# **HEALTHCARE PREDICTION**



# ***Installing required libraries***

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

**Load the dataset**

In [3]:
df = pd.read_csv("healthcare_dataset.csv")

**Drop irrevelant columns**


In [5]:
df_model = df.drop(columns=["Name", "Doctor", "Hospital", "Room Number", "Date of Admission", "Discharge Date"])


# Separate features and target

In [6]:
X = df_model.drop(columns=["Medical Condition"])
y = df_model["Medical Condition"]

# Encode target

In [9]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Sample smaller data for faster training

In [10]:
df_sample = df_model.sample(n=5000, random_state=42)
X_sample = df_sample.drop(columns=["Medical Condition"])
y_sample = label_encoder.transform(df_sample["Medical Condition"])

# Identify column types


In [11]:
categorical_cols = X_sample.select_dtypes(include="object").columns.tolist()
numerical_cols = X_sample.select_dtypes(include=["int64", "float64"]).columns.tolist()


# Preprocessing pipeline

In [12]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
    ]
)


# Complete pipeline

In [13]:
clf_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter=500))
])


# Train-test split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X_sample, y_sample, test_size=0.2, random_state=45)


# Fit model

In [15]:
clf_pipeline.fit(X_train, y_train)

# Predict

In [16]:
y_pred = clf_pipeline.predict(X_test)

# Evaluate

In [17]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

In [18]:

print("Accuracy:", accuracy)
print("\nClassification Report:\n", report)

Accuracy: 0.164

Classification Report:
               precision    recall  f1-score   support

   Arthritis       0.13      0.12      0.12       176
      Asthma       0.17      0.12      0.14       154
      Cancer       0.17      0.21      0.19       179
    Diabetes       0.16      0.19      0.17       158
Hypertension       0.18      0.15      0.16       161
     Obesity       0.19      0.19      0.19       172

    accuracy                           0.16      1000
   macro avg       0.16      0.16      0.16      1000
weighted avg       0.16      0.16      0.16      1000

