#**Exercise - Ensemble Methods and Hyperparameter Tuning.**

**1. Load Required Libraries (Very Basic)**

In [2]:
# Import numpy for numerical operations
import numpy as np

# Import datasets and models from scikit-learn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Import evaluation metric
from sklearn.metrics import accuracy_score, f1_score


**2. Load Dataset (Iris Dataset)**

In [3]:
# Load the Iris dataset
iris = load_iris()

# Features (inputs)
X = iris.data

# Target (output / labels)
y = iris.target


**3. Split Dataset into Training and Testing**

In [4]:
# Split data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


**4. Decision Tree Classifier**

In [5]:
# Create Decision Tree model
dt_model = DecisionTreeClassifier(max_depth=3, random_state=42)

# Train the model
dt_model.fit(X_train, y_train)

# Predict on test data
dt_predictions = dt_model.predict(X_test)

# Calculate accuracy
dt_accuracy = accuracy_score(y_test, dt_predictions)

# Calculate F1 score
dt_f1 = f1_score(y_test, dt_predictions, average='weighted')

print("Decision Tree Accuracy:", dt_accuracy)
print("Decision Tree F1 Score:", dt_f1)


Decision Tree Accuracy: 1.0
Decision Tree F1 Score: 1.0


**5. Random Forest Classifier**

In [6]:
# Create Random Forest model
rf_model = RandomForestClassifier(
    n_estimators=100,      # number of trees
    max_depth=3,           # depth of each tree
    random_state=42
)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on test data
rf_predictions = rf_model.predict(X_test)

# Calculate accuracy
rf_accuracy = accuracy_score(y_test, rf_predictions)

# Calculate F1 score
rf_f1 = f1_score(y_test, rf_predictions, average='weighted')

print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest F1 Score:", rf_f1)


Random Forest Accuracy: 1.0
Random Forest F1 Score: 1.0


**6. Compare Models**

In [7]:
print("Model Comparison")
print("----------------")
print("Decision Tree F1 Score:", dt_f1)
print("Random Forest F1 Score:", rf_f1)


Model Comparison
----------------
Decision Tree F1 Score: 1.0
Random Forest F1 Score: 1.0


**7. Hyperparameter Tuning**

In [8]:
from sklearn.model_selection import GridSearchCV

# Parameters to test
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [2, 3, 4],
    'min_samples_split': [2, 5]
}

# GridSearch object
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    scoring='f1_weighted',
    cv=5
)

# Run GridSearch
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_

print("Best Parameters:", grid_search.best_params_)


Best Parameters: {'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 50}


**8. Test Best Model**

In [9]:
# Predict using best model
best_predictions = best_model.predict(X_test)

# Evaluate
best_f1 = f1_score(y_test, best_predictions, average='weighted')

print("Best Random Forest F1 Score:", best_f1)


Best Random Forest F1 Score: 1.0
