In [None]:
# prompt: transfroming healthcare with AI powered disease prediction based on patient data

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the patient data (replace 'patient_data.csv' with your actual file)
try:
    data = pd.read_csv('patient_data.csv')
except FileNotFoundError:
    print("Error: 'patient_data.csv' not found. Please upload your data file.")
    exit()


# Preprocess the data (handle missing values, convert categorical features, etc.)
# Example: Fill missing age values with the mean age
if 'age' in data.columns and data['age'].isnull().any():
    data['age'].fillna(data['age'].mean(), inplace=True)

# Assuming 'disease' is the target variable
if 'disease' not in data.columns:
    print("Error: 'disease' column not found in the dataset.")
    exit()

# One-hot encode categorical features (example: 'gender')
if 'gender' in data.columns:
    data = pd.get_dummies(data, columns=['gender'], prefix=['gender'])


# Select features (X) and target variable (y)
features = [col for col in data.columns if col != 'disease']
X = data[features]
y = data['disease']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier (you can experiment with other models)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred))


# Example prediction for a new patient
# new_patient_data = pd.DataFrame({'feature1': [value1], 'feature2': [value2], ...})
# predicted_disease = model.predict(new_patient_data)
# print(f"Predicted Disease: {predicted_disease[0]}")
