âœ… Step 1: Install Required Libraries

In [1]:
!pip install pandas numpy scikit-learn joblib streamlit



âœ… Step 2: Load and Preprocess Healthcare Data
ðŸ“Œ (e.g., Diabetes Prediction Dataset)

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset from local file
df = pd.read_csv("diabetes.csv")

# Display first 5 rows
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


âœ… Step 3: Prepare Data for AI Model Training

In [3]:
# Define features and target
X = df.drop(columns=["Outcome"])  # Features
y = df["Outcome"]  # Target

# Split into training & testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("âœ… Data is preprocessed and ready for AI training!")

âœ… Data is preprocessed and ready for AI training!


âœ… Step 4: Train the AI Model

In [4]:
# Train RandomForest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate model performance
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

print(f"ðŸŽ¯ Model Accuracy: {accuracy:.2f}")

ðŸŽ¯ Model Accuracy: 0.72


âœ… Step 5: Save Model & Scaler for Deployment

In [5]:
import joblib

# Save trained model and scaler
joblib.dump(model, "healthcare_model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("âœ… Model and Scaler Saved Successfully!")

âœ… Model and Scaler Saved Successfully!


âœ… Step 6: Test the Model with Sample Patient Data

In [6]:
import numpy as np
import pandas as pd

# Example patient data
sample_patient = np.array([[2, 120, 70, 20, 85, 28.5, 0.45, 30]])

# Convert to DataFrame with column names
feature_names = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", 
                 "Insulin", "BMI", "DiabetesPedigreeFunction", "Age"]

sample_patient_df = pd.DataFrame(sample_patient, columns=feature_names)

# Load saved model and scaler
model = joblib.load("healthcare_model.pkl")
scaler = joblib.load("scaler.pkl")

# Scale input data properly
sample_patient_scaled = scaler.transform(sample_patient_df)

# Predict outcome
prediction = model.predict(sample_patient_scaled)

# Display result
if prediction[0] == 1:
    print("âš  High Risk Detected! Consult a Doctor.")
else:
    print("âœ… Low Risk - Stay Healthy!")

âœ… Low Risk - Stay Healthy!



âœ… Step 7: Run the Streamlit App