<a href="https://colab.research.google.com/github/Dukkipati-Likithasree/-CognoRise-Infotech/blob/main/DIABETES_PREDICTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer

# Load the dataset
data = pd.read_csv("diabetes_prediction_dataset.csv")

# Data Preprocessing
X = data.drop(columns=['diabetes'])
y = data['diabetes']

# Handling missing values
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X.select_dtypes(include=['float64', 'int64']))

# Encoding categorical features
categorical_features = X.select_dtypes(include=['object']).columns.tolist()
X_categorical = pd.get_dummies(X[categorical_features])

# Combine imputed numeric data with encoded categorical data
X_processed = np.hstack((X_imputed, X_categorical))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Training: AdaBoost with Grid Search for Hyperparameter Tuning
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0]
}

print("**AdaBoost Classifier**")
ada_boost = AdaBoostClassifier()
grid_search = GridSearchCV(ada_boost, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)
best_ada_boost = grid_search.best_estimator_

# Evaluation
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)

    print("Model Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"ROC-AUC: {roc_auc:.4f}")

print("**Evaluation on Test Data**")
evaluate_model(best_ada_boost, X_test_scaled, y_test)


**AdaBoost Classifier**
**Evaluation on Test Data**
Model Results:
Accuracy: 0.9723
Precision: 0.9730
Recall: 0.6950
F1-Score: 0.8108
ROC-AUC: 0.8466
