<a href="https://colab.research.google.com/github/SnehaRS10/diabetes_prediction/blob/main/Daibetes_prediction1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('/content/diabetesdataset.csv')

In [None]:
X = df.drop(columns=["ID", "Name", "Diabetes Percentage"])
y = df["Diabetes Percentage"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 2: Base Models
models = [
    RandomForestRegressor(random_state=42),
    GradientBoostingRegressor(random_state=42),
    SVR()
]

In [None]:
param_grids = [
    {
        'model__n_estimators': [100, 200, 300],
        'model__max_depth': [None, 5, 10],
        'model__min_samples_split': [2, 5, 10],
        'model__min_samples_leaf': [1, 2, 4],
        'model__max_features': ['auto', 'sqrt', 'log2', 1.0]  # Explicitly include 1.0 as well
    },
    {
        'model__n_estimators': [100, 200, 300],
        'model__learning_rate': [0.1, 0.01, 0.001],
        'model__max_depth': [3, 5, 7],
        'model__min_samples_split': [2, 5, 10],
        'model__min_samples_leaf': [1, 2, 4]
    },
    {
        'model__C': [0.1, 1, 10, 100],
        'model__gamma': ['scale', 'auto']
    }
]

best_models = []

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline

# Preprocessing step to handle categorical variables using one-hot encoding
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['Gender', 'Occupation', 'City', 'Smoking', 'Drinking',
                                  'Unhealthy Diet', 'Lack of Exercise', 'Stress',
                                  'Irregular Sleep', 'Excessive Sugar Consumption',
                                  'Family History', 'Obesity', 'Sedentary Lifestyle'])
    ],
    remainder='passthrough'
)

In [None]:
# Train base models with hyperparameter tuning
for model, param_grid in zip(models, param_grids):
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)
    ])
    grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)
    best_models.append(grid_search.best_estimator_)

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [None]:
# Step 3: Meta-Model
base_model_predictions = np.zeros((len(X_train), len(best_models)))

# Generating predictions for each base model
for i, model in enumerate(best_models):
    base_model_predictions[:, i] = model.predict(X_train)

# Fitting the meta-model
meta_model = LinearRegression()
meta_model.fit(base_model_predictions, y_train)

In [None]:
# Step 4: Stacking
def stacking_predictions(base_models, meta_model, X):
    base_model_predictions = []
    for model in base_models:
        base_model_predictions.append(model.predict(X))
    base_model_predictions = np.array(base_model_predictions).T
    return meta_model.predict(base_model_predictions)

In [None]:
# Step 5: Evaluation
stacked_predictions = stacking_predictions(best_models, meta_model, X_test)
mse_stacked = mean_squared_error(y_test, stacked_predictions)
print("MSE of Stacked Model:", mse_stacked)

MSE of Stacked Model: 25.043707333601134


In [None]:
# Assuming you have loaded and preprocessed the trained models and you have defined the function make_stacked_predictions as shown previously
import pandas as pd
import matplotlib.pyplot as plt

# Take input from the user
name = input("Enter name: ")
age = int(input("Enter age: "))
gender = input("Enter gender (Male/Female): ")
occupation = input("Enter occupation (Engineer/Doctor/Teacher/Business/Student): ")
city = input("Enter city (Mumbai/Delhi/Bangalore/Kolkata/Chennai): ")
smoking = input("Smoking? (Yes/No): ")
drinking = input("Drinking? (Yes/No): ")
unhealthy_diet = input("Unhealthy Diet? (High/Medium/Low): ")
lack_of_exercise = input("Lack of Exercise? (High/Medium/Low): ")
stress = input("Stress level? (High/Medium/Low): ")
irregular_sleep = input("Irregular Sleep? (High/Medium/Low): ")
excessive_sugar_consumption = input("Excessive Sugar Consumption? (High/Medium/Low): ")
family_history = input("Family History of diabetes? (Yes/No): ")
obesity = input("Obesity? (Yes/No): ")
sedentary_lifestyle = input("Sedentary Lifestyle? (High/Medium/Low): ")

# Create a DataFrame with user input
user_data = pd.DataFrame({
    "Name": [name],
    "Age": [age],
    "Gender": [gender],
    "Occupation": [occupation],
    "City": [city],
    "Smoking": [smoking],
    "Drinking": [drinking],
    "Unhealthy Diet": [unhealthy_diet],
    "Lack of Exercise": [lack_of_exercise],
    "Stress": [stress],
    "Irregular Sleep": [irregular_sleep],
    "Excessive Sugar Consumption": [excessive_sugar_consumption],
    "Family History": [family_history],
    "Obesity": [obesity],
    "Sedentary Lifestyle": [sedentary_lifestyle]
})

stacked_predictions_user = stacking_predictions(best_models, meta_model, user_data)

# Print the predicted diabetes percentage for the user
print("Predicted Diabetes Percentage:", stacked_predictions_user[0])

# Print recommendations based on predicted diabetes percentage
predicted_percentage = stacked_predictions_user[0]

if 5 <= predicted_percentage <= 10:
    print("Recommended Diet:")
    print("Carbohydrates: Emphasize complex carbohydrates like whole grains, legumes, and vegetables. "
          "Opt for high-fiber options to help regulate blood sugar levels.")
    print("Proteins: Include lean sources of protein such as poultry, fish, tofu, and legumes. "
          "These provide essential nutrients without adding excess saturated fats.")
    print("Fats: Choose healthy fats from sources like avocados, nuts, seeds, and olive oil. "
          "These fats support heart health and overall well-being.")
    print("Fruits and Vegetables: Aim to fill half your plate with colorful fruits and vegetables. "
          "They provide essential vitamins, minerals, and antioxidants while being low in calories.")
    print("Portion Control: Be mindful of portion sizes to prevent overeating. Use smaller plates and bowls, "
          "and pay attention to hunger cues to avoid unnecessary snacking.")

if drinking.lower() == 'yes' or smoking.lower() == 'yes':
    print("Avoid drinking or smoking.")

elif 10 <= predicted_percentage <= 15:
    print("Recommended Diet:")
    print("Carbohydrates: Moderate carbohydrate intake, focusing on whole grains like quinoa, brown rice, and oats. "
          "Limit refined carbohydrates like white bread and sugary snacks.")
    print("Proteins: Continue to include lean protein sources, but monitor portion sizes to avoid excessive intake.")
    print("Monitoring: Consider monitoring blood sugar levels regularly, especially after meals, "
          "to understand how different foods affect your body.")
    print("Physical Activity: Incorporate regular physical activity into your routine, aiming for at least 30 minutes "
          "most days of the week. Exercise helps improve insulin sensitivity and manage blood sugar levels.")

if drinking.lower() == 'yes' or smoking.lower() == 'yes':
    print("Avoid drinking or smoking.")

elif 15 <= predicted_percentage <= 20:
    print("Recommended Diet:")
    print("Carbohydrates: Limit carbohydrate intake, especially from sources with a high glycemic index. "
          "Choose non-starchy vegetables, berries, and small portions of whole grains.")
    print("Proteins: Opt for lean protein sources and incorporate plant-based protein options like tofu, tempeh, "
          "and legumes.")
    print("Meal Timing: Consider spacing out meals evenly throughout the day to prevent large spikes in blood sugar levels.")
    print("Portion Monitoring: Pay close attention to portion sizes and the timing of meals and snacks "
          "to help manage blood sugar levels more effectively.")

if drinking.lower() == 'yes' or smoking.lower() == 'yes':
    print("Avoid drinking or smoking.")

