In [15]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

In [16]:
# Load the Wine dataset
data = load_wine()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Split the dataset into training and testing sets


In [17]:
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)

y_pred_dt = dt_classifier.predict(X_test)

# Calculiting F1 score
f1_dt = f1_score(y_test, y_pred_dt, average='weighted')
print(f"Decision Tree Classifier F1 Score: {f1_dt:.4f}")


Decision Tree Classifier F1 Score: 0.9440


In [18]:
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)

y_pred_rf = rf_classifier.predict(X_test)

f1_rf = f1_score(y_test, y_pred_rf, average='weighted') # Calculate F1 score
print(f"Random Forest Classifier F1 Score: {f1_rf:.4f}")


Random Forest Classifier F1 Score: 1.0000


In [19]:
print(f"Decision Tree Classifier F1 Score: {f1_dt:.4f}")
print(f"Random Forest Classifier F1 Score: {f1_rf:.4f}")


Decision Tree Classifier F1 Score: 0.9440
Random Forest Classifier F1 Score: 1.0000


In [20]:
# Train a Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state=42)
dt_regressor.fit(X_train, y_train)
y_pred_dt = dt_regressor.predict(X_test)
mse_dt = mean_squared_error(y_test, y_pred_dt)
print(f"Decision Tree Regressor MSE: {mse_dt:.4f}")

# Train a Random Forest Regressor
rf_regressor = RandomForestRegressor(random_state=42)
rf_regressor.fit(X_train, y_train)
y_pred_rf = rf_regressor.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"Random Forest Regressor MSE: {mse_rf:.4f}")


Decision Tree Regressor MSE: 0.1667
Random Forest Regressor MSE: 0.0648


In [23]:
from sklearn.model_selection import train_test_split, GridSearchCV

# hyperparameters grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, scoring='f1_weighted')

# Fit GridSearchCV to the data
grid_search.fit(X_train, y_train) #It searches for the best hyperparameters from the specified parameter grid that optimize the performance of the given estimator through cross-validation, using the training data.

# Get the best parameters
best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")

# Training RF
best_rf_classifier = RandomForestClassifier(**best_params, random_state=42)
best_rf_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred_best_rf = best_rf_classifier.predict(X_test)

# Calculate the F1 score
f1_best_rf = f1_score(y_test, y_pred_best_rf, average='weighted')
print(f"Optimized Random Forest Classifier F1 Score: {f1_best_rf:.4f}")


Best Hyperparameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Optimized Random Forest Classifier F1 Score: 1.0000
