In [5]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
# Загрузка данных
data_class = load_breast_cancer()
X_class, y_class = data_class.data, data_class.target

# Разделение данных на тренировочную и тестовую выборки
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

# Бейзлайн модель логистической регрессии
log_reg = LogisticRegression(max_iter=10000)
log_reg.fit(X_train_class, y_train_class)
y_pred_class = log_reg.predict(X_test_class)

# Оценка
accuracy_class = accuracy_score(y_test_class, y_pred_class)
precision_class = precision_score(y_test_class, y_pred_class)
recall_class = recall_score(y_test_class, y_pred_class)
f1_class = f1_score(y_test_class, y_pred_class)

print(f"Baseline Logistic Regression Classification Metrics:\nAccuracy: {accuracy_class}\nPrecision: {precision_class}\nRecall: {recall_class}\nF1 Score: {f1_class}")

Baseline Logistic Regression Classification Metrics:
Accuracy: 0.956140350877193
Precision: 0.9459459459459459
Recall: 0.9859154929577465
F1 Score: 0.9655172413793104


In [6]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [21]:
# Загрузка данных
data_reg = fetch_california_housing()
X_reg, y_reg = data_reg.data, data_reg.target

# Нормализация данных
scaler = StandardScaler()
X_reg_scaled = scaler.fit_transform(X_reg)

# Разделение данных на тренировочную и тестовую выборки
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg_scaled, y_reg, test_size=0.2, random_state=42)


lin_reg = LinearRegression()
lin_reg.fit(X_train_reg, y_train_reg)
y_pred_reg = lin_reg.predict(X_test_reg)

# Оценка
mae_reg = mean_absolute_error(y_test_reg, y_pred_reg)
mse_reg = mean_squared_error(y_test_reg, y_pred_reg)
r2_reg = r2_score(y_test_reg, y_pred_reg)

print(f"Baseline Linear Regression Metrics:\nMAE: {mae_reg}\nMSE: {mse_reg}\nR-squared: {r2_reg}")


Baseline Linear Regression Metrics:
MAE: 0.5332001304956561
MSE: 0.5558915986952441
R-squared: 0.575787706032451


In [22]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [23]:
# Пайплайн для логистической регрессии
pipeline_class = Pipeline([
    ('scaler', StandardScaler()),
    ('log_reg', LogisticRegression(max_iter=10000))
])

param_grid_class = {
    'log_reg__C': [0.01, 0.1, 1, 10, 100],
    'log_reg__solver': ['liblinear', 'saga']
}

grid_search_class = GridSearchCV(pipeline_class, param_grid_class, cv=5, scoring='accuracy')
grid_search_class.fit(X_train_class, y_train_class)

# Лучший классификатор
best_log_reg = grid_search_class.best_estimator_

# Оценка
y_pred_class_improved = best_log_reg.predict(X_test_class)
accuracy_class_improved = accuracy_score(y_test_class, y_pred_class_improved)
precision_class_improved = precision_score(y_test_class, y_pred_class_improved)
recall_class_improved = recall_score(y_test_class, y_pred_class_improved)
f1_class_improved = f1_score(y_test_class, y_pred_class_improved)

print(f"Improved Logistic Regression Classification Metrics:\nAccuracy: {accuracy_class_improved}\nPrecision: {precision_class_improved}\nRecall: {recall_class_improved}\nF1 Score: {f1_class_improved}")


Improved Logistic Regression Classification Metrics:
Accuracy: 0.9912280701754386
Precision: 0.9861111111111112
Recall: 1.0
F1 Score: 0.993006993006993


In [10]:
from sklearn.preprocessing import PolynomialFeatures

In [24]:
# Пайплайн для линейной регрессии
pipeline_reg = Pipeline([
    ('scaler', StandardScaler()),
    ('poly_features', PolynomialFeatures(degree=2, include_bias=False)),
    ('lin_reg', LinearRegression())
])

pipeline_reg.fit(X_train_reg, y_train_reg)

# Оценка
y_pred_reg_improved = pipeline_reg.predict(X_test_reg)
mae_reg_improved = mean_absolute_error(y_test_reg, y_pred_reg_improved)
mse_reg_improved = mean_squared_error(y_test_reg, y_pred_reg_improved)
r2_reg_improved = r2_score(y_test_reg, y_pred_reg_improved)

print(f"Improved Linear Regression Metrics:\nMAE: {mae_reg_improved}\nMSE: {mse_reg_improved}\nR-squared: {r2_reg_improved}")

Improved Linear Regression Metrics:
MAE: 0.4670009334696574
MSE: 0.4643015238301253
R-squared: 0.6456819729261881


In [13]:
import numpy as np

In [14]:
class CustomLogisticRegression:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(model)

            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_labels = [1 if i > 0.5 else 0 for i in y_predicted]
        return np.array(y_predicted_labels)

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

# Обучение и оценка пользовательской логистической регрессии
custom_log_reg = CustomLogisticRegression(learning_rate=0.01, n_iters=1000)
custom_log_reg.fit(X_train_class, y_train_class)
y_pred_class_custom = custom_log_reg.predict(X_test_class)

accuracy_class_custom = accuracy_score(y_test_class, y_pred_class_custom)
precision_class_custom = precision_score(y_test_class, y_pred_class_custom)
recall_class_custom = recall_score(y_test_class, y_pred_class_custom)
f1_class_custom = f1_score(y_test_class, y_pred_class_custom)

print(f"Custom Logistic Regression Classification Metrics:\nAccuracy: {accuracy_class_custom}\nPrecision: {precision_class_custom}\nRecall: {recall_class_custom}\nF1 Score: {f1_class_custom}")

Custom Logistic Regression Classification Metrics:
Accuracy: 0.9473684210526315
Precision: 1.0
Recall: 0.9154929577464789
F1 Score: 0.9558823529411765


  return 1 / (1 + np.exp(-x))


In [25]:
class CustomLinearRegression:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            y_predicted = np.dot(X, self.weights) + self.bias
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

# Обучение и оценка пользовательской линейной регрессии
custom_lin_reg = CustomLinearRegression(learning_rate=0.01, n_iters=1000)
custom_lin_reg.fit(X_train_reg, y_train_reg)
y_pred_reg_custom = custom_lin_reg.predict(X_test_reg)

mae_reg_custom = mean_absolute_error(y_test_reg, y_pred_reg_custom)
mse_reg_custom = mean_squared_error(y_test_reg, y_pred_reg_custom)
r2_reg_custom = r2_score(y_test_reg, y_pred_reg_custom)

print(f"Custom Linear Regression Metrics:\nMAE: {mae_reg_custom}\nMSE: {mse_reg_custom}\nR-squared: {r2_reg_custom}")

Custom Linear Regression Metrics:
MAE: 0.5482169312280617
MSE: 0.5673403507337071
R-squared: 0.5670509282565295
