<a href="https://colab.research.google.com/github/MangalaPriyadharshini/MangalaPriyadharshini/blob/main/MLAssignmentGradientBoostingRFEFUNCTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
# ========================================
# Gradient Boosting + RFE (Diabetes Dataset)
# ========================================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Step 1: Load Dataset
dataset = pd.read_csv("diabetes.csv")
X = dataset.drop(columns=["Outcome"])
y = dataset["Outcome"]

# Step 2: Feature Scaling (GBM doesn’t require scaling, but keep for consistency)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Apply RFE with Gradient Boosting
gb_clf = GradientBoostingClassifier(random_state=42)
rfe = RFE(gb_clf, n_features_to_select=5)
rfe.fit(X_scaled, y)

# Get selected features
selected_features = X.columns[rfe.support_]
print("\n✅ Selected Features by RFE:", list(selected_features))

# Use only selected features
X_selected = X_scaled[:, rfe.support_]

# Step 4: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_selected, y, test_size=0.2, random_state=42, stratify=y
)

# Step 5: Train Gradient Boosting Model
gb_clf.fit(X_train, y_train)
y_pred = gb_clf.predict(X_test)

# Step 6: Evaluation
print("\n📊 Gradient Boosting Performance Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average="weighted"))
print("Recall:", recall_score(y_test, y_pred, average="weighted"))
print("F1 Score:", f1_score(y_test, y_pred, average="weighted"))

print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))



✅ Selected Features by RFE: ['Glucose', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']

📊 Gradient Boosting Performance Metrics:
Accuracy: 0.7467532467532467
Precision: 0.7410499621674805
Recall: 0.7467532467532467
F1 Score: 0.7422593190996118

Classification Report:
               precision    recall  f1-score   support

           0     0.7850    0.8400    0.8116       100
           1     0.6596    0.5741    0.6139        54

    accuracy                         0.7468       154
   macro avg     0.7223    0.7070    0.7127       154
weighted avg     0.7410    0.7468    0.7423       154



In [8]:
import pickle
filename = "GradientBoostingClassifier.sav"
pickle.dump(gb_clf,open(filename,"wb"))
load_model=pickle.load(open(filename,"rb"))

In [9]:
result=load_model.predict([[3,4.5,27,4.9,33	]])

In [10]:
result

array([0])