## Линейные методы

Набор данных - выбран набор данных с иинформацией о пассажирах Титаника.

Импорт необходимых библиотек:

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from build_data import get_processed_data
from lin_regression import (
    transform_labels as transform_labels_lr,
    add_intercept as add_intercept_lr,
    ridge_regression,
    predict as predict_lr,
    evaluate_accuracy as evaluate_accuracy_lr
)
from lin_classification import (
    transform_labels as transform_labels_lc,
    add_intercept as add_intercept_lc,
    LinearClassifier,
)
from svm import SVM_SMO

best_params_lc = None
best_params_svm = None
best_lambda_lr = None

При помощи функций из build_data.py у меня происходит:

Загрузка тренировочного и тестового наборов данных, предобработка данных, разделение тестового набора данных на тренировочную и валидационную выборки, 

In [None]:
def load_data(train_path, test_path):
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)
    return train_df, test_df

In [None]:
def preprocess_data(train_df, test_df):
    train_df = train_df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
    test_df = test_df.drop(['Name', 'Ticket', 'Cabin'], axis=1)

    imputer_age = SimpleImputer(strategy='median')
    train_df['Age'] = imputer_age.fit_transform(train_df[['Age']])
    test_df['Age'] = imputer_age.transform(test_df[['Age']])

    imputer_embarked = SimpleImputer(strategy='most_frequent')
    train_df['Embarked'] = imputer_embarked.fit_transform(train_df[['Embarked']]).ravel()

    imputer_fare = SimpleImputer(strategy='median')
    test_df['Fare'] = imputer_fare.fit_transform(test_df[['Fare']])

    le = LabelEncoder()
    train_df['Sex'] = le.fit_transform(train_df['Sex'])
    test_df['Sex'] = le.transform(test_df['Sex'])

    train_df = pd.get_dummies(train_df, columns=['Embarked'], drop_first=True)
    test_df = pd.get_dummies(test_df, columns=['Embarked'], drop_first=True)

    missing_cols = set(train_df.columns) - set(test_df.columns)
    for col in missing_cols:
        if col != 'Survived':
            test_df[col] = 0

    test_df = test_df[train_df.drop('Survived', axis=1).columns]

    scaler = StandardScaler()
    numeric_features = ['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']
    train_df[numeric_features] = scaler.fit_transform(train_df[numeric_features])
    test_df[numeric_features] = scaler.transform(test_df[numeric_features])

    return train_df, test_df

In [None]:
def split_data(train_df, test_size=0.2, random_state=42):
    X = train_df.drop('Survived', axis=1)
    y = train_df['Survived']

    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )

    return X_train, X_val, y_train, y_val

Функция, вызов которой в main вызовет правильный процесс загрузки и обработки данных:

In [None]:
def get_processed_data(train_path='train.csv', test_path='test.csv'):
    train_df, test_df = load_data(train_path, test_path)
    train_df, test_df = preprocess_data(train_df, test_df)
    X_train, X_val, y_train, y_val = split_data(train_df)
    return X_train, X_val, y_train, y_val, test_df

# Алгоритм линейной регрессии с гребневой регуляризацией в матричном виде:

In [None]:
def transform_labels(y):
    return np.where(y == 1, 1, -1).reshape(-1, 1)

def add_intercept(X):
    intercept = np.ones((X.shape[0], 1))
    return np.hstack((intercept, X))

Реализация линейной регрессии с гребневой регуляризацией

In [None]:
def ridge_regression(X, y, lambda_reg):
    n_features = X.shape[1]
    I = np.eye(n_features)
    I[0, 0] = 0
    w = np.linalg.inv(X.T @ X + lambda_reg * I) @ X.T @ y
    return w

Прогнозирование и оценка точности модели

In [None]:
def predict(X, w):
    return np.sign(X @ w)

In [None]:
def evaluate_accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# Алгоритм линейной классификации

In [None]:
def transform_labels(y):
    return np.where(y == 1, 1, -1).reshape(-1, 1)

def add_intercept(X):
    intercept = np.ones((X.shape[0], 1))
    return np.hstack((intercept, X))

In [None]:
class LinearClassifier:
    def __init__(self, loss='mse', learning_rate=0.01, n_iterations=1000, lambda1=0.0, lambda2=0.0):
        self.loss = loss
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.lambda1 = lambda1
        self.lambda2 = lambda2
        self.w = None
        self.loss_history = []
        self.test_loss_history = []
        self.test_accuracy_history = []

    def loss_and_gradient(self, X, y):
        m = X.shape[0]
        scores = X @ self.w
        margins = y * scores

        if self.loss == 'mse':
            loss = (1 - margins) ** 2
            gradient = -(X.T @ (y * (1 - margins))) / m
        elif self.loss == 'logistic':
            loss = np.log(1 + np.exp(-margins))
            sigmoid = 1 / (1 + np.exp(-margins))
            gradient = -(X.T @ (y * (1 - sigmoid))) / m
        elif self.loss == 'exponential':
            loss = np.exp(-margins)
            gradient = -(X.T @ (y * loss)) / m
        else:
            raise ValueError("Unsupported loss function")

        gradient += self.lambda1 * np.sign(self.w) + 2 * self.lambda2 * self.w
        loss_mean = np.mean(loss) + self.lambda1 * np.sum(np.abs(self.w)) + self.lambda2 * np.sum(self.w ** 2)
        return loss_mean, gradient

    def fit(self, X_train, y_train, X_test=None, y_test=None):
        m, n = X_train.shape
        self.w = np.zeros((n, 1))

        for i in range(self.n_iterations):
            loss, gradient = self.loss_and_gradient(X_train, y_train)
            self.w -= self.learning_rate * gradient
            self.loss_history.append(loss)

            if X_test is not None and y_test is not None:
                test_loss, _ = self.loss_and_gradient(X_test, y_test)
                self.test_loss_history.append(test_loss)

                y_pred_test = self.predict(X_test)
                test_accuracy = self.evaluate_accuracy(y_test, y_pred_test)
                self.test_accuracy_history.append(test_accuracy)

    def predict(self, X):
        linear_output = X @ self.w
        y_pred = np.sign(linear_output)
        y_pred[y_pred == 0] = 1
        return y_pred

    def evaluate_accuracy(self, y_true, y_pred):
        return np.mean(y_true.flatten() == y_pred.flatten())

# svm через smo

Выбраны ядра:

In [None]:
def kernel_function(X1, X2, kernel_type='linear', degree=3, gamma=None):
    if gamma is None:
        gamma = 1 / X1.shape[1]

    if kernel_type == 'linear':
        return X1 @ X2.T
    elif kernel_type == 'polynomial':
        return (X1 @ X2.T + 1) ** degree
    elif kernel_type == 'rbf':
        sq_dists = np.sum(X1 ** 2, axis=1).reshape(-1, 1) + \
                   np.sum(X2 ** 2, axis=1) - 2 * X1 @ X2.T
        return np.exp(-gamma * sq_dists)
    else:
        raise ValueError("Unsupported kernel type")

In [None]:
class SVM_SMO:
    def __init__(self, kernel='linear', C=0.1, tol=1e-3, max_passes=50, degree=3, gamma=None):
        self.kernel = kernel
        self.C = C
        self.tol = tol
        self.max_passes = max_passes
        self.degree = degree
        self.gamma = gamma
        self.alpha = None
        self.b = 0
        self.X = None
        self.y = None
        self.K = None
        self.f = None
        self.loss_history = []
        self.test_accuracy_history = []
        self.eval_points = []
        self.iteration = 0

    def fit(self, X, y, X_test=None, y_test=None):
        m, n = X.shape
        self.X = X.astype(float)
        self.y = y.astype(float).reshape(-1, 1)
        self.alpha = np.zeros((m, 1))
        self.b = 0

        self.K = kernel_function(self.X, self.X, kernel_type=self.kernel,
                                 degree=self.degree, gamma=self.gamma)

        self.f = np.zeros((m, 1))

        passes = 0
        while passes < self.max_passes:
            num_changed_alphas = 0
            for i in range(m):
                E_i = self.f[i] - self.y[i]

                if (self.y[i] * E_i < -self.tol and self.alpha[i] < self.C) or \
                        (self.y[i] * E_i > self.tol and self.alpha[i] > 0):

                    j = self._select_j(i, m)
                    E_j = self.f[j] - self.y[j]

                    alpha_i_old = self.alpha[i].copy()
                    alpha_j_old = self.alpha[j].copy()

                    if self.y[i] != self.y[j]:
                        L = max(0, self.alpha[j] - self.alpha[i])
                        H = min(self.C, self.C + self.alpha[j] - self.alpha[i])
                    else:
                        L = max(0, self.alpha[i] + self.alpha[j] - self.C)
                        H = min(self.C, self.alpha[i] + self.alpha[j])
                    if L == H:
                        continue

                    eta = 2.0 * self.K[i, j] - self.K[i, i] - self.K[j, j]
                    if eta >= 0:
                        continue

                    self.alpha[j] -= self.y[j] * (E_i - E_j) / eta
                    self.alpha[j] = np.clip(self.alpha[j], L, H)
                    if abs(self.alpha[j] - alpha_j_old) < 1e-5:
                        continue

                    self.alpha[i] += self.y[i] * self.y[j] * (alpha_j_old - self.alpha[j])

                    b1 = self.b - E_i - \
                         self.y[i] * (self.alpha[i] - alpha_i_old) * self.K[i, i] - \
                         self.y[j] * (self.alpha[j] - alpha_j_old) * self.K[i, j]
                    b2 = self.b - E_j - \
                         self.y[i] * (self.alpha[i] - alpha_i_old) * self.K[i, j] - \
                         self.y[j] * (self.alpha[j] - alpha_j_old) * self.K[j, j]

                    if 0 < self.alpha[i] < self.C:
                        self.b = b1
                    elif 0 < self.alpha[j] < self.C:
                        self.b = b2
                    else:
                        self.b = (b1 + b2) / 2.0

                    delta_alpha_i = self.alpha[i] - alpha_i_old
                    delta_alpha_j = self.alpha[j] - alpha_j_old
                    self.f += (delta_alpha_i * self.y[i] * self.K[:, i].reshape(-1, 1)) + \
                              (delta_alpha_j * self.y[j] * self.K[:, j].reshape(-1, 1))

                    num_changed_alphas += 1
                    self.iteration += 1

            if X_test is not None and y_test is not None:
                y_pred_test = self.predict(X_test)
                test_accuracy = self.evaluate_accuracy(y_test, y_pred_test)
                self.test_accuracy_history.append(test_accuracy)
                self.eval_points.append(self.iteration)

            loss = self._compute_loss()
            self.loss_history.append(loss)

            if num_changed_alphas == 0:
                passes += 1
            else:
                passes = 0

    def _select_j(self, i, m):
        E_i = self.f[i] - self.y[i]
        E_diff = np.abs(self.f.flatten() - E_i.flatten())
        E_diff[i] = -1
        j = np.argmax(E_diff)
        if E_diff[j] == -1:
            j = i
            while j == i:
                j = np.random.randint(0, m)
        return j

    def _compute_loss(self):
        term1 = np.sum(self.alpha)
        term2 = 0.5 * np.sum(self.alpha * self.y * (self.K @ self.alpha * self.y))
        loss = term1 - term2
        return loss.item()


    def predict(self, X):
        K = kernel_function(X, self.X, kernel_type=self.kernel,
                            degree=self.degree, gamma=self.gamma)
        y_pred = (K @ (self.alpha * self.y)) + self.b
        return np.sign(y_pred)

    def evaluate_accuracy(self, y_true, y_pred):
        y_true = y_true.flatten()
        y_pred = y_pred.flatten()
        return np.mean(y_true == y_pred)

## main

Загрузка и предобработка данных:

In [None]:
def load_and_preprocess_data():
    X_train, X_val, y_train, y_val, test_df = get_processed_data()
    print(f'Размер тренировочной выборки: {X_train.shape}')
    print(f'Размер валидационной выборки: {X_val.shape}')

    X_full = np.vstack((X_train.values, X_val.values))
    y_full = np.hstack((y_train.values, y_val.values))
    return X_full, y_full

In [None]:
X_full, y_full = load_and_preprocess_data()

#### Линейная регрессия

In [None]:
def prepare_data_for_regression(X_full, y_full):
    y_transformed = transform_labels_lr(y_full).astype(float)
    X_with_intercept = add_intercept_lr(X_full).astype(float)
    return X_with_intercept, y_transformed

In [None]:
def normalize_features(X):
    X_features = X[:, 1:]
    X_mean = np.mean(X_features, axis=0)
    X_std = np.std(X_features, axis=0)
    X_std_corrected = np.where(X_std == 0, 1, X_std)
    X_features_normalized = (X_features - X_mean) / X_std_corrected
    X_normalized = np.hstack((np.ones((X_features_normalized.shape[0], 1)), X_features_normalized))
    return X_normalized

In [None]:
def linear_regression_with_ridge(X_train, y_train, X_test, y_test):
    lambda_values = np.logspace(-4, 4, 30)
    best_lambda = None
    best_accuracy = 0
    accuracies = []

    for lambda_reg in lambda_values:
        w = ridge_regression(X_train, y_train, lambda_reg)
        y_pred = predict_lr(X_test, w)
        accuracy = evaluate_accuracy_lr(y_test, y_pred)
        accuracies.append(accuracy)
        print(f'Lambda: {lambda_reg:.4f}, Accuracy: {accuracy:.4f}')
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_lambda = lambda_reg

    print(f'Лучшее значение lambda для гребневой регрессии: {best_lambda}, Accuracy: {best_accuracy:.4f}')

    w_best = ridge_regression(X_train, y_train, best_lambda)
    y_pred_test = predict_lr(X_test, w_best)
    final_accuracy = evaluate_accuracy_lr(y_test, y_pred_test)
    print(f'Итоговая точность на тестовой выборке (Линейная регрессия): {final_accuracy:.4f}')

    global best_lambda_lr
    best_lambda_lr = best_lambda

    plt.figure()
    plt.semilogx(lambda_values, accuracies, marker='o')
    plt.xlabel('Lambda (регуляризация)')
    plt.ylabel('Точность')
    plt.title('Зависимость точности линейной регрессии от регуляризации')
    plt.grid(True)
    plt.show()

    return w_best, final_accuracy, best_lambda

In [None]:
X_lr, y_lr = prepare_data_for_regression(X_full, y_full)
X_lr = normalize_features(X_lr)
X_train_lr, X_test_lr, y_train_lr, y_test_lr = split_data(X_lr, y_lr)

print("=== Линейная регрессия с гребневой регуляризацией ===")
w_lr, final_accuracy_lr, best_lambda_lr = linear_regression_with_ridge(
    X_train_lr, y_train_lr, X_test_lr, y_test_lr
)

#### Линейная классификация

Построение графика потерь

In [None]:
def plot_loss_curve(loss_history, title):
    plt.figure(figsize=(8, 6))
    plt.plot(range(len(loss_history)), loss_history, label='Функция потерь')
    plt.xlabel('Итерация')
    plt.ylabel('Значение функции потерь')
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
def linear_classification_with_gd(X_train, y_train, X_test, y_test):
    loss_functions = ['mse', 'logistic', 'exponential']
    learning_rates = [0.001, 0.005, 0.01]
    lambda1_values = [0.0, 0.01, 0.1]
    lambda2_values = [0.0, 0.01, 0.1]
    n_iterations = 1000

    best_params = {}
    best_accuracy = 0

    for loss in loss_functions:
        for lr in learning_rates:
            for l1 in lambda1_values:
                for l2 in lambda2_values:
                    classifier = LinearClassifier(
                        loss=loss,
                        learning_rate=lr,
                        n_iterations=n_iterations,
                        lambda1=l1,
                        lambda2=l2
                    )
                    classifier.fit(X_train, y_train, X_test, y_test)
                    y_pred = classifier.predict(X_test)
                    accuracy = classifier.evaluate_accuracy(y_test, y_pred)
                    print(f'Loss: {loss}, LR: {lr}, L1: {l1}, L2: {l2}, Accuracy: {accuracy:.4f}')
                    if accuracy > best_accuracy:
                        best_accuracy = accuracy
                        best_params = {
                            'loss': loss,
                            'learning_rate': lr,
                            'lambda1': l1,
                            'lambda2': l2,
                            'n_iterations': n_iterations
                        }
                        best_loss_history = classifier.test_loss_history.copy()

    print(f'Лучшие параметры для линейной классификации: {best_params}, Accuracy: {best_accuracy:.4f}')

    global best_params_lc
    best_params_lc = best_params

    classifier_best = LinearClassifier(
        loss=best_params['loss'],
        learning_rate=best_params['learning_rate'],
        n_iterations=best_params['n_iterations'],
        lambda1=best_params['lambda1'],
        lambda2=best_params['lambda2']
    )
    classifier_best.fit(X_train, y_train, X_test, y_test)
    y_pred_test = classifier_best.predict(X_test)
    final_accuracy = classifier_best.evaluate_accuracy(y_test, y_pred_test)
    print(f'Итоговая точность на тестовой выборке (Линейная классификация): {final_accuracy:.4f}')

    plot_loss_curve(classifier_best.test_loss_history, 'Кривая функции потерь на тестовом множестве (Линейная классификация)')

    return classifier_best, final_accuracy

In [None]:
print("=== Линейная классификация на основе градиентного спуска ===")
classifier_lc, final_accuracy_lc = linear_classification_with_gd(
    X_train_lr, y_train_lr, X_test_lr, y_test_lr
)

##### svm

In [None]:
def svm_with_smo(X_train, y_train, X_test, y_test):
    kernels = ['linear', 'polynomial', 'rbf']
    C_values = [0.01, 0.1, 1]
    tol_values = [1e-3, 1e-4]
    max_passes_values = [5, 10, 50, 100]
    degree_values = [2, 3, 4]
    gamma_values = [0.1, 1.0]

    best_params = {}
    best_accuracy = 0

    for kernel in kernels:
        for C in C_values:
            for tol in tol_values:
                for max_passes in max_passes_values:
                    if kernel == 'polynomial':
                        for degree in degree_values:
                            svm = SVM_SMO(
                                kernel=kernel,
                                C=C,
                                tol=tol,
                                max_passes=max_passes,
                                degree=degree
                            )
                            svm.fit(X_train, y_train, X_test, y_test)
                            y_pred = svm.predict(X_test)
                            accuracy = svm.evaluate_accuracy(y_test, y_pred)
                            print(
                                f'Kernel: {kernel}, C: {C}, tol: {tol}, '
                                f'max_passes: {max_passes}, degree: {degree}, Accuracy: {accuracy:.4f}')
                            if accuracy > best_accuracy:
                                best_accuracy = accuracy
                                best_params = {
                                    'kernel': kernel,
                                    'C': C,
                                    'tol': tol,
                                    'max_passes': max_passes,
                                    'degree': degree
                                }
                    elif kernel == 'rbf':
                        for gamma in gamma_values:
                            svm = SVM_SMO(
                                kernel=kernel,
                                C=C,
                                tol=tol,
                                max_passes=max_passes,
                                gamma=gamma
                            )
                            svm.fit(X_train, y_train, X_test, y_test)
                            y_pred = svm.predict(X_test)
                            accuracy = svm.evaluate_accuracy(y_test, y_pred)
                            print(
                                f'Kernel: {kernel}, C: {C}, tol: {tol}, '
                                f'max_passes: {max_passes}, gamma: {gamma}, Accuracy: {accuracy:.4f}')
                            if accuracy > best_accuracy:
                                best_accuracy = accuracy
                                best_params = {
                                    'kernel': kernel,
                                    'C': C,
                                    'tol': tol,
                                    'max_passes': max_passes,
                                    'gamma': gamma
                                }
                    else:
                        svm = SVM_SMO(
                            kernel=kernel,
                            C=C,
                            tol=tol,
                            max_passes=max_passes
                        )
                        svm.fit(X_train, y_train, X_test, y_test)
                        y_pred = svm.predict(X_test)
                        accuracy = svm.evaluate_accuracy(y_test, y_pred)
                        print(
                            f'Kernel: {kernel}, C: {C}, tol: {tol}, '
                            f'max_passes: {max_passes}, Accuracy: {accuracy:.4f}')
                        if accuracy > best_accuracy:
                            best_accuracy = accuracy
                            best_params = {
                                'kernel': kernel,
                                'C': C,
                                'tol': tol,
                                'max_passes': max_passes
                            }

    print(f'Лучшие параметры для SVM: {best_params}, Accuracy: {best_accuracy:.4f}')

    global best_params_svm
    best_params_svm = best_params

    svm_best = SVM_SMO(
        kernel=best_params['kernel'],
        C=best_params['C'],
        tol=best_params['tol'],
        max_passes=best_params['max_passes'],
        degree=best_params.get('degree', 3),
        gamma=best_params.get('gamma', None)
    )
    svm_best.fit(X_train, y_train, X_test, y_test)
    y_pred_test = svm_best.predict(X_test)
    final_accuracy = svm_best.evaluate_accuracy(y_test, y_pred_test)
    print(f'Итоговая точность на тестовой выборке (SVM): {final_accuracy:.4f}')

    plot_loss_curve(svm_best.loss_history, 'Кривая функции потерь на тестовом множестве (SVM)')

    return svm_best, final_accuracy

In [None]:
print("=== Метод опорных векторов (SVM) с SMO ===")
svm_model, final_accuracy_svm = svm_with_smo(
    X_train_lr[:, 1:], y_train_lr.flatten(), X_test_lr[:, 1:], y_test_lr.flatten()
)

Построение графиков

In [None]:
def collect_learning_curves(model_class, X_full, y_full, n_runs=50):
    test_accuracies = []
    eval_points_list = []

    for run in range(n_runs):
        X_train, X_test, y_train, y_test = train_test_split(
            X_full, y_full, test_size=0.5, stratify=y_full, random_state=run)

        if model_class == 'linear_classifier':
            classifier = LinearClassifier(
                loss=best_params_lc['loss'],
                learning_rate=best_params_lc['learning_rate'],
                n_iterations=best_params_lc['n_iterations'],
                lambda1=best_params_lc['lambda1'],
                lambda2=best_params_lc['lambda2'],
            )
            classifier.fit(X_train, y_train, X_test, y_test)
            test_accuracies.append(classifier.test_accuracy_history)
            eval_points_list.append(np.arange(0, best_params_lc['n_iterations']))
        elif model_class == 'svm':
            svm = SVM_SMO(
                kernel=best_params_svm['kernel'],
                C=best_params_svm['C'],
                tol=best_params_svm['tol'],
                max_passes=best_params_svm['max_passes'],
                degree=best_params_svm.get('degree', 3),
                gamma=best_params_svm.get('gamma', None)
            )
            svm.fit(X_train, y_train, X_test, y_test)
            test_accuracies.append(svm.test_accuracy_history)
            eval_points_list.append(svm.eval_points)

    max_length = max(len(acc) for acc in test_accuracies)
    test_accuracies_padded = [np.pad(acc, (0, max_length - len(acc)), 'edge') for acc in test_accuracies]
    eval_points_padded = [np.pad(points, (0, max_length - len(points)), 'edge') for points in eval_points_list]
    test_accuracies = np.array(test_accuracies_padded)
    eval_points = np.array(eval_points_padded).mean(axis=0)
    return test_accuracies, eval_points

In [None]:
def get_linear_regression_accuracy(X_train_lr, y_train_lr, X_test_lr, y_test_lr):
    w = ridge_regression(X_train_lr, y_train_lr, best_lambda_lr)
    y_pred_test = predict_lr(X_test_lr, w)
    accuracy = evaluate_accuracy_lr(y_test_lr, y_pred_test)
    return accuracy

In [None]:
def plot_learning_curve_with_confidence(test_accuracies, eval_points, title, lr_accuracy):
    mean_accuracy = np.mean(test_accuracies, axis=0)
    std_accuracy = np.std(test_accuracies, axis=0)
    n_runs = test_accuracies.shape[0]
    standard_error = std_accuracy / np.sqrt(n_runs)
    confidence_interval = 1.96 * standard_error

    plt.figure(figsize=(8, 6))
    plt.plot(eval_points, mean_accuracy, label='Средняя точность на тестовом множестве')
    plt.fill_between(eval_points, mean_accuracy - confidence_interval, mean_accuracy + confidence_interval, alpha=0.2)
    plt.axhline(y=lr_accuracy, color='r', linestyle='--', label='Линейная регрессия')
    plt.xlabel('Итерация')
    plt.ylabel('Точность на тестовом множестве')
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
n_runs = 50
test_accuracies_lc, eval_points_lc = collect_learning_curves(
    'linear_classifier', X_lr, y_lr, n_runs
)
test_accuracies_svm, eval_points_svm = collect_learning_curves(
    'svm', X_lr[:, 1:], y_lr, n_runs
)

lr_accuracy = get_linear_regression_accuracy(
    X_train_lr, y_train_lr, X_test_lr, y_test_lr
)

plot_learning_curve_with_confidence(
    test_accuracies_lc,
    eval_points_lc,
    'Кривая обучения для линейной классификации',
    lr_accuracy
)

plot_learning_curve_with_confidence(
    test_accuracies_svm,
    eval_points_svm,
    'Кривая обучения для SVM',
    lr_accuracy
)