<a href="https://colab.research.google.com/github/SARIKELLA-MADHU/Machine-Learning/blob/main/week_8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Decision Tree vs Random Forest Comparison
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Generate dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=6, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Single Classifier: Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# Ensemble Classifier: Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Evaluation
metrics = lambda y_true, y_pred: (
    accuracy_score(y_true, y_pred),
    precision_score(y_true, y_pred),
    recall_score(y_true, y_pred),
    f1_score(y_true, y_pred)
)

dt_results = metrics(y_test, y_pred_dt)
rf_results = metrics(y_test, y_pred_rf)

print("Decision Tree:", dt_results)
print("Random Forest:", rf_results)


Decision Tree: (0.82, 0.7428571428571429, 0.896551724137931, 0.8125)
Random Forest: (0.855, 0.7843137254901961, 0.9195402298850575, 0.8465608465608465)


In [None]:
# Max, Average, and Weighted Voting
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import VotingClassifier

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Base models
model1 = LogisticRegression(max_iter=200)
model2 = DecisionTreeClassifier()
model3 = SVC(probability=True)

# Simple Ensemble Techniques
max_voting = VotingClassifier(estimators=[
    ('lr', model1), ('dt', model2), ('svc', model3)
], voting='hard')

avg_voting = VotingClassifier(estimators=[
    ('lr', model1), ('dt', model2), ('svc', model3)
], voting='soft')

max_voting.fit(X_train, y_train)
avg_voting.fit(X_train, y_train)

# Weighted Voting (weights assigned by model accuracy)
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)
wts = [
    model1.score(X_test, y_test),
    model2.score(X_test, y_test),
    model3.score(X_test, y_test)
]
weighted_voting = VotingClassifier(
    estimators=[('lr', model1), ('dt', model2), ('svc', model3)],
    voting='soft', weights=wts
)
weighted_voting.fit(X_train, y_train)

# Accuracy Comparison
print("Max Voting Accuracy:", accuracy_score(y_test, max_voting.predict(X_test)))
print("Average Voting Accuracy:", accuracy_score(y_test, avg_voting.predict(X_test)))
print("Weighted Voting Accuracy:", accuracy_score(y_test, weighted_voting.predict(X_test)))


Max Voting Accuracy: 1.0
Average Voting Accuracy: 1.0
Weighted Voting Accuracy: 1.0


In [None]:
# Hard vs Soft Voting Comparison
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42)

# Base models
estimator = [
    ('LR', LogisticRegression(max_iter=200)),
    ('SVC', SVC(probability=True)),
    ('DTC', DecisionTreeClassifier())
]

hard_voting = VotingClassifier(estimators=estimator, voting='hard')
soft_voting = VotingClassifier(estimators=estimator, voting='soft')

hard_voting.fit(X_train, y_train)
soft_voting.fit(X_train, y_train)

print("Hard Voting Accuracy:", accuracy_score(y_test, hard_voting.predict(X_test)))
print("Soft Voting Accuracy:", accuracy_score(y_test, soft_voting.predict(X_test)))


Hard Voting Accuracy: 1.0
Soft Voting Accuracy: 1.0


In [None]:
# Exploring Bagging using Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = make_classification(n_samples=1000, n_features=10, n_informative=6, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for n in [10, 50, 100]:
    rf = RandomForestClassifier(n_estimators=n, max_depth=None, random_state=42)
    rf.fit(X_train, y_train)
    acc = accuracy_score(y_test, rf.predict(X_test))
    print(f"Estimators={n}, Accuracy={acc:.3f}")


Estimators=10, Accuracy=0.825
Estimators=50, Accuracy=0.850
Estimators=100, Accuracy=0.855


In [None]:
# RandomForestRegressor with Out-of-Bag (OOB) Score
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Dataset
data = load_diabetes()
X, y = data.data, data.target

# Model
rf = RandomForestRegressor(n_estimators=100, oob_score=True, random_state=42)
rf.fit(X, y)

# Evaluation
pred = rf.predict(X)
print("OOB Score:", rf.oob_score_)
print("MSE:", mean_squared_error(y, pred))
print("R2 Score:", r2_score(y, pred))


OOB Score: 0.4205759837575107
MSE: 476.8155608597285
R2 Score: 0.9195910933940452


In [None]:
# AdaBoost, Gradient Boosting, XGBoost, CatBoost
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier

X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. AdaBoost
ada = AdaBoostClassifier(n_estimators=50, learning_rate=1.0, random_state=42)
ada.fit(X_train, y_train)
print("AdaBoost Accuracy:", accuracy_score(y_test, ada.predict(X_test)))

# 2. Gradient Boosting
gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb.fit(X_train, y_train)
print("Gradient Boosting Accuracy:", accuracy_score(y_test, gb.predict(X_test)))

# 3. XGBoost
from xgboost import XGBClassifier
xgb = XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
xgb.fit(X_train, y_train)
print("XGBoost Accuracy:", accuracy_score(y_test, xgb.predict(X_test)))


AdaBoost Accuracy: 0.85
Gradient Boosting Accuracy: 0.93
XGBoost Accuracy: 0.925
