<a href="https://colab.research.google.com/github/KuzmenkoO/amazinum_home_work/blob/main/lesson_10_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import numpy as np
from sklearn.datasets import load_breast_cancer, load_diabetes, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score,
    mean_absolute_error, mean_squared_error, r2_score
)
from sklearn.preprocessing import StandardScaler

# Logistic Regression: Breast Cancer
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

clf = LogisticRegression(max_iter=10000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_prob = clf.predict_proba(X_test)[:, 1]

print("Logistic Regression: Breast Cancer")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_prob))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Logistic Regression: Breast Cancer
Accuracy: 0.9473684210526315
Precision: 0.9841269841269841
Recall: 0.9253731343283582
F1 Score: 0.9538461538461539
ROC AUC: 0.9936487773896475
Confusion Matrix:
 [[46  1]
 [ 5 62]]


In [2]:

# Decision Tree Classifier: Breast Cancer
clf_tree = DecisionTreeClassifier(max_depth=5, random_state=0)
clf_tree.fit(X_train, y_train)
y_pred_tree = clf_tree.predict(X_test)
y_prob_tree = clf_tree.predict_proba(X_test)[:, 1]

print("\nDecision Tree Classifier: Breast Cancer")
print("Accuracy:", accuracy_score(y_test, y_pred_tree))
print("Precision:", precision_score(y_test, y_pred_tree))
print("Recall:", recall_score(y_test, y_pred_tree))
print("F1 Score:", f1_score(y_test, y_pred_tree))
print("ROC AUC:", roc_auc_score(y_test, y_prob_tree))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_tree))



Decision Tree Classifier: Breast Cancer
Accuracy: 0.9473684210526315
Precision: 0.9552238805970149
Recall: 0.9552238805970149
F1 Score: 0.9552238805970149
ROC AUC: 0.9266433788504287
Confusion Matrix:
 [[44  3]
 [ 3 64]]


In [3]:
# Linear Regression: Diabetes
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)

reg = LinearRegression()
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

print("\nLinear Regression: Diabetes")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2 Score:", r2_score(y_test, y_pred))



Linear Regression: Diabetes
MAE: 46.1735850037048
MSE: 3424.259334298694
RMSE: 58.51717127731563
R2 Score: 0.33223321731061806


In [4]:
# Decision Tree Regressor: California Housing
housing = fetch_california_housing()
X, y = housing.data, housing.target
X, _, y, _ = train_test_split(X, y, train_size=5000, random_state=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

tree_reg = DecisionTreeRegressor(max_depth=6)
tree_reg.fit(X_train, y_train)
y_pred = tree_reg.predict(X_test)

print("\nDecision Tree Regressor: California Housing")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2 Score:", r2_score(y_test, y_pred))


Decision Tree Regressor: California Housing
MAE: 0.5150575077374525
MSE: 0.508654422092013
RMSE: 0.7132001276584384
R2 Score: 0.6286694615521109
