<a href="https://colab.research.google.com/github/MangalaPriyadharshini/MangalaPriyadharshini/blob/main/MLAssignmentExtraTreeClassifierRFEFUNCTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
# ========================================
# Extra Trees Classifier + RFE (Diabetes Dataset)
# ========================================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pickle

# Step 1: Load Dataset
dataset = pd.read_csv("diabetes.csv")
X = dataset.drop(columns=["Outcome"])
y = dataset["Outcome"]

# Step 2: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Feature Selection using RFE with Extra Trees
et_model = ExtraTreesClassifier(random_state=42)
rfe = RFE(estimator=et_model, n_features_to_select=5)
X_selected = rfe.fit_transform(X_scaled, y)

selected_features = X.columns[rfe.get_support()]
print("\n✅ Selected Features using RFE:", list(selected_features))

# Step 4: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_selected, y, test_size=0.2, random_state=42, stratify=y
)

# Step 5: Train Extra Trees Classifier
et_model.fit(X_train, y_train)

# Step 6: Predictions
y_pred = et_model.predict(X_test)

# Step 7: Evaluation
print("\n📊 Extra Trees Classifier Performance Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average="weighted"))
print("Recall:", recall_score(y_test, y_pred, average="weighted"))
print("F1 Score:", f1_score(y_test, y_pred, average="weighted"))

print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))


✅ Selected Features using RFE: ['Pregnancies', 'Glucose', 'BMI', 'DiabetesPedigreeFunction', 'Age']

📊 Extra Trees Classifier Performance Metrics:
Accuracy: 0.7207792207792207
Precision: 0.7156109196925524
Recall: 0.7207792207792207
F1 Score: 0.7174071488539876

Classification Report:
               precision    recall  f1-score   support

           0     0.7714    0.8100    0.7902       100
           1     0.6122    0.5556    0.5825        54

    accuracy                         0.7208       154
   macro avg     0.6918    0.6828    0.6864       154
weighted avg     0.7156    0.7208    0.7174       154



In [17]:
import pickle

filename = "ExtraTreesClassifier.sav"


pickle.dump(et_model, open(filename, "wb"))


loaded_model = pickle.load(open(filename, "rb"))



In [18]:
result=loaded_model.predict([[3,4.5,27,4.9,33	]])

In [19]:
result


array([1])