1

In [11]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#Full
dt_full = DecisionTreeClassifier(random_state=42)
dt_full.fit(X_train, y_train)

y_train_pred = dt_full.predict(X_train)
y_test_pred = dt_full.predict(X_test)

print("Full Decision Tree:")
print("Train Accuracy:", accuracy_score(y_train, y_train_pred))
print("Test Accuracy: ", accuracy_score(y_test, y_test_pred))

# Pruned
dt_pruned = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_pruned.fit(X_train, y_train)

y_train_pred = dt_pruned.predict(X_train)
y_test_pred = dt_pruned.predict(X_test)

print("\nPruned Decision Tree (max_depth=3):")
print("Train Accuracy:", accuracy_score(y_train, y_train_pred))
print("Test Accuracy: ", accuracy_score(y_test, y_test_pred))


Full Decision Tree:
Train Accuracy: 1.0
Test Accuracy:  0.9415204678362573

Pruned Decision Tree (max_depth=3):
Train Accuracy: 0.9698492462311558
Test Accuracy:  0.9649122807017544


2.

In [12]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Full
dt_full = DecisionTreeClassifier(random_state=42)
dt_full.fit(X_train, y_train)
dt_full_train_acc = accuracy_score(y_train, dt_full.predict(X_train))
dt_full_test_acc = accuracy_score(y_test, dt_full.predict(X_test))

# Pruned
dt_pruned = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_pruned.fit(X_train, y_train)
dt_pruned_train_acc = accuracy_score(y_train, dt_pruned.predict(X_train))
dt_pruned_test_acc = accuracy_score(y_test, dt_pruned.predict(X_test))

# Random
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_train_acc = accuracy_score(y_train, rf.predict(X_train))
rf_test_acc = accuracy_score(y_test, rf.predict(X_test))

print("Model Comparison (Train/Test Accuracy):\n")
print(f"Full Decision Tree  -> Train: {dt_full_train_acc:.4f}, Test: {dt_full_test_acc:.4f}")
print(f"Pruned Decision Tree-> Train: {dt_pruned_train_acc:.4f}, Test: {dt_pruned_test_acc:.4f}")
print(f"Random Forest (100) -> Train: {rf_train_acc:.4f}, Test: {rf_test_acc:.4f}")


Model Comparison (Train/Test Accuracy):

Full Decision Tree  -> Train: 1.0000, Test: 0.9415
Pruned Decision Tree-> Train: 0.9698, Test: 0.9649
Random Forest (100) -> Train: 1.0000, Test: 0.9708


3.

In [13]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

learning_rates = [0.01, 0.1]
n_estimators_list = [50, 100, 200]

for lr in learning_rates:
    for n_est in n_estimators_list:
        gb = GradientBoostingClassifier(learning_rate=lr, n_estimators=n_est, random_state=42)
        gb.fit(X_train, y_train)

        y_train_pred = gb.predict(X_train)
        y_test_pred = gb.predict(X_test)

        print(f"\nGradient Boosting - learning_rate={lr}, n_estimators={n_est}")
        print("Train Accuracy:", accuracy_score(y_train, y_train_pred))
        print("Test Accuracy: ", accuracy_score(y_test, y_test_pred))



Gradient Boosting - learning_rate=0.01, n_estimators=50
Train Accuracy: 0.9773869346733668
Test Accuracy:  0.9590643274853801

Gradient Boosting - learning_rate=0.01, n_estimators=100
Train Accuracy: 0.9874371859296482
Test Accuracy:  0.9707602339181286

Gradient Boosting - learning_rate=0.01, n_estimators=200
Train Accuracy: 0.992462311557789
Test Accuracy:  0.9590643274853801

Gradient Boosting - learning_rate=0.1, n_estimators=50
Train Accuracy: 1.0
Test Accuracy:  0.9590643274853801

Gradient Boosting - learning_rate=0.1, n_estimators=100
Train Accuracy: 1.0
Test Accuracy:  0.9590643274853801

Gradient Boosting - learning_rate=0.1, n_estimators=200
Train Accuracy: 1.0
Test Accuracy:  0.9532163742690059


4.

In [15]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import numpy as np

data = load_breast_cancer()
X, y = data.data, data.target
features = data.feature_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Random
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_top5 = np.array(features)[np.argsort(rf.feature_importances_)[-5:][::-1]]
print("Random Forest Top 5 Features:", rf_top5)

print("\n")
# Gradient Boosting
gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb.fit(X_train, y_train)
gb_top5 = np.array(features)[np.argsort(gb.feature_importances_)[-5:][::-1]]
print("Gradient Boosting Top 5 Features:", gb_top5)


Random Forest Top 5 Features: ['mean concave points' 'worst concave points' 'worst area'
 'mean concavity' 'worst radius']


Gradient Boosting Top 5 Features: ['mean concave points' 'worst concave points' 'worst perimeter'
 'worst area' 'worst texture']
