In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.preprocessing import StandardScaler

In [2]:
# Load datasets
breast_cancer = load_breast_cancer()
iris = load_iris()

# Breast cancer features and target
x = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)
y = pd.Series(breast_cancer.target, name='target')

# Iris features and target
x_iris = pd.DataFrame(iris.data, columns=iris.feature_names)
y_iris = pd.Series(iris.target, name='target')


In [3]:
# Breast cancer: Train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Iris: Train-test split
x_train_iris, x_test_iris, y_train_iris, y_test_iris = train_test_split(x_iris, y_iris, test_size=0.2, random_state=42)


In [4]:
scaler_bc = StandardScaler()
x_train = pd.DataFrame(scaler_bc.fit_transform(x_train), columns=x_train.columns)
x_test = pd.DataFrame(scaler_bc.transform(x_test), columns=x_test.columns)

scaler_iris = StandardScaler()
x_train_iris = pd.DataFrame(scaler_iris.fit_transform(x_train_iris), columns=x_train_iris.columns)
x_test_iris = pd.DataFrame(scaler_iris.transform(x_test_iris), columns=x_test_iris.columns)


In [5]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(x_train, y_train)

# Predict and evaluate Breast Cancer
y_pred = rf.predict(x_test)
y_pred_proba = rf.predict_proba(x_test)[:, 1]

print(f"Breast Cancer Dataset - Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Breast Cancer Dataset - Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Breast Cancer Dataset - Recall: {recall_score(y_test, y_pred):.4f}")
print(f"Breast Cancer Dataset - F1 Score: {f1_score(y_test, y_pred):.4f}")
print(f"Breast Cancer Dataset - ROC AUC: {roc_auc_score(y_test, y_pred_proba):.4f}")
print(f"Breast Cancer Dataset - Confusion Matrix:\n{pd.crosstab(y_test, y_pred, rownames=['Actual'], colnames=['Predicted'])}")
print(f"Breast Cancer Dataset - Classification Report:\n{classification_report(y_test, y_pred)}")

# Train Random Forest on Iris
rf.fit(x_train_iris, y_train_iris)

# Predict and evaluate Iris
y_pred_iris = rf.predict(x_test_iris)

print(f"\nIris Dataset - Accuracy: {accuracy_score(y_test_iris, y_pred_iris):.4f}")
print(f"Iris Dataset - Precision: {precision_score(y_test_iris, y_pred_iris, average='weighted'):.4f}")
print(f"Iris Dataset - Recall: {recall_score(y_test_iris, y_pred_iris, average='weighted'):.4f}")
print(f"Iris Dataset - F1 Score: {f1_score(y_test_iris, y_pred_iris, average='weighted'):.4f}")
print(f"Iris Dataset - Confusion Matrix:\n{pd.crosstab(y_test_iris, y_pred_iris, rownames=['Actual'], colnames=['Predicted'])}")
print(f"Iris Dataset - Classification Report:\n{classification_report(y_test_iris, y_pred_iris)}")


Breast Cancer Dataset - Accuracy: 0.9649
Breast Cancer Dataset - Precision: 0.9589
Breast Cancer Dataset - Recall: 0.9859
Breast Cancer Dataset - F1 Score: 0.9722
Breast Cancer Dataset - ROC AUC: 0.9953
Breast Cancer Dataset - Confusion Matrix:
Predicted   0   1
Actual           
0          40   3
1           1  70
Breast Cancer Dataset - Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114


Iris Dataset - Accuracy: 1.0000
Iris Dataset - Precision: 1.0000
Iris Dataset - Recall: 1.0000
Iris Dataset - F1 Score: 1.0000
Iris Dataset - Confusion Matrix:
Predicted   0  1   2
Actual              
0          10  0   0
1           0  9   0
2           0  0  11
Iris Dataset - Classification Report:
     