In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
df = pd.read_csv('/content/diabetes.csv')

# Handling zeros in features
for col in ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']:
    df[col] = df[col].replace(0, np.nan)
    df[col] = df[col].fillna(df[col].mean())

# Split the data into features and target variable
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Hyperparameter tuning with GridSearchCV
param_grid = {
    'n_estimators': [200, 300, 400],
    'max_depth': [10, 15, 20, 25],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

# Initialize Random Forest Classifier
rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best model from GridSearch
best_rf = grid_search.best_estimator_

# Make predictions and evaluate the model
y_pred = best_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Output results
print("Best Parameters:", grid_search.best_params_)
print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
# Personalized Health Recommendations
def health_recommendations(patient_data):
    recommendations = []

    # Example recommendations based on threshold values
    if patient_data['BMI'] > 30:
        recommendations.append("Consider weight management programs.")
    if patient_data['BloodPressure'] > 120:
        recommendations.append("Monitor blood pressure and reduce salt intake.")
    if patient_data['Glucose'] > 140:
        recommendations.append("Maintain a balanced diet low in sugar and carbs.")
    if patient_data['Insulin'] > 100:
        recommendations.append("Consult a healthcare provider about insulin levels.")

    return recommendations


#print("\nHealth Recommendations:", health_recommendations(sample_patient))
def get_patient_data():
    """Collect patient data from user input."""
    patient_data = {}
    patient_data['Pregnancies'] = int(input("Enter the number of pregnancies: "))
    patient_data['Glucose'] = float(input("Enter glucose level: "))
    patient_data['BloodPressure'] = float(input("Enter blood pressure: "))
    patient_data['SkinThickness'] = float(input("Enter skin thickness: "))
    patient_data['Insulin'] = float(input("Enter insulin level: "))
    patient_data['BMI'] = float(input("Enter BMI: "))
    patient_data['DiabetesPedigreeFunction'] = float(input("Enter diabetes pedigree function: "))
    patient_data['Age'] = int(input("Enter age: "))

    return pd.Series(patient_data)

# Collect patient data from user
sample_patient = get_patient_data()


# Scale the user input data using the same scaler fitted on training data
# Convert sample_patient to a DataFrame with the same column names as X
sample_patient_df = pd.DataFrame([sample_patient], columns=X.columns)
sample_patient_scaled = scaler.transform(sample_patient_df)

# Predict outcome for the sample patient
sample_prediction = best_rf.predict(sample_patient_scaled)
print("\nDiabetes Prediction (1=Diabetic, 0=Non-Diabetic):", sample_prediction[0])

# Generate health recommendations based on the patient data
print("\nHealth Recommendations:", health_recommendations(sample_patient))

Best Parameters: {'max_depth': 15, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}
Accuracy: 0.7662337662337663

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.80      0.81        99
           1       0.66      0.71      0.68        55

    accuracy                           0.77       154
   macro avg       0.75      0.75      0.75       154
weighted avg       0.77      0.77      0.77       154


Confusion Matrix:
 [[79 20]
 [16 39]]
Enter the number of pregnancies: 2
Enter glucose level: 95
Enter blood pressure: 130
Enter skin thickness: 70
Enter insulin level: 110
Enter BMI: 32
Enter diabetes pedigree function: 0.755
Enter age: 48

Diabetes Prediction (1=Diabetic, 0=Non-Diabetic): 0

Health Recommendations: ['Consider weight management programs.', 'Monitor blood pressure and reduce salt intake.', 'Consult a healthcare provider about insulin levels.']
