## Задача 1

Реализовать класс для работы с линейной регрессией

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from numpy.linalg import inv, pinv

class MyLinearRegression:
    def __init__(self, regularization=None, weight_calc='matrix', lambda_1=None, lambda_2=None, 
                 batch_size=20, learning_rate=0.01, max_iter=5000, early_stopping=0.0001, standardize=True):
        if regularization not in [None, 'l1', 'l2', 'l1l2']:
            raise TypeError(f"Параметр regularization не может принимать значение '{regularization}'")
        if weight_calc not in ['matrix', 'gd', 'sgd']:
            raise TypeError(f"Параметр weight_calc не может принимать значение '{weight_calc}'")
        if regularization in ['l1', 'l1l2'] and weight_calc == 'matrix':
            raise TypeError("При 'l1' или 'l1l2' нельзя использовать параметр 'matrix'")
        if regularization in ['l1', 'l1l2'] and lambda_1 is None:
            raise TypeError(f"Значение коэффициента регулризации l1 не задано")
        if regularization in ['l2', 'l1l2'] and lambda_2 is None:
            raise TypeError(f"Значение коэффициента регулризации l2 не задано")
        
        self.regularization = regularization
        self.weight_calc = weight_calc
        self.lambda_1 = lambda_1
        self.lambda_2 = lambda_2
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.early_stopping = early_stopping
        self.standardize = standardize
        
        self.coefs_ = None
        self.intercept_ = None
        self.scaler_ = None

    def _prepare_data(self, X, fit=False):
        if isinstance(X, pd.DataFrame):
            X = X.values
        X = np.array(X)
        
        if self.standardize:
            if fit:
                self.scaler_ = StandardScaler()
                X = self.scaler_.fit_transform(X)
            else:
                if self.scaler_ is None:
                    raise ValueError("Модель не обучена. Сначала вызовите fit().")
                X = self.scaler_.transform(X)
        
        return X
    
    def _compute_gradient(self, X, y, weights):
        n = X.shape[0]
        predictions = X @ weights.reshape(-1, 1)
        grad = (2 / n) * X.T @ (predictions - y.reshape(-1, 1))
        grad = grad.flatten()
        
        if self.regularization == 'l1':
            grad[1:] += self.lambda_1 * np.sign(weights[1:])
        elif self.regularization == 'l2':
            grad[1:] += 2 * self.lambda_2 * weights[1:]
        elif self.regularization == 'l1l2':
            grad[1:] += self.lambda_1 * np.sign(weights[1:]) + 2 * self.lambda_2 * weights[1:]
        
        return grad
    
    def _fit_matrix(self, X, y):
        X_with_ones = np.hstack([np.ones((X.shape[0], 1)), X])
        y = y.reshape(-1, 1)
        
        if self.regularization == 'l2':
            n_features = X_with_ones.shape[1]
            reg_matrix = np.eye(n_features)
            reg_matrix[0, 0] = 0
            
            try:
                weights = inv(X_with_ones.T @ X_with_ones + self.lambda_2 * reg_matrix) @ X_with_ones.T @ y
            except np.linalg.LinAlgError:
                weights = pinv(X_with_ones.T @ X_with_ones + self.lambda_2 * reg_matrix) @ X_with_ones.T @ y
        else:
            try:
                weights = inv(X_with_ones.T @ X_with_ones) @ X_with_ones.T @ y
            except np.linalg.LinAlgError:
                weights = pinv(X_with_ones.T @ X_with_ones) @ X_with_ones.T @ y
        
        self.intercept_ = weights[0, 0]
        self.coefs_ = weights[1:, 0].reshape(-1, 1)
    
    def _fit_gd(self, X, y):
        X_with_ones = np.hstack([np.ones((X.shape[0], 1)), X])
        y = y.reshape(-1, 1)
        
        np.random.seed(42)
        weights = np.random.uniform(-0.1, 0.1, size=(X_with_ones.shape[1],))
        
        for i in range(self.max_iter):
            grad = self._compute_gradient(X_with_ones, y, weights)
            grad_norm = np.linalg.norm(grad)
            
            if grad_norm < self.early_stopping:
                break
            
            weights = weights - self.learning_rate * grad
        
        self.intercept_ = weights[0]
        self.coefs_ = weights[1:].reshape(-1, 1)
    
    def _fit_sgd(self, X, y):
        X_with_ones = np.hstack([np.ones((X.shape[0], 1)), X])
        y = y.reshape(-1, 1)
        n = X_with_ones.shape[0]
        
        np.random.seed(42)
        weights = np.random.uniform(-0.1, 0.1, size=(X_with_ones.shape[1],))
        
        for i in range(self.max_iter):
            indices = np.random.permutation(n)
            X_shuffled = X_with_ones[indices]
            y_shuffled = y[indices]
            
            for j in range(0, n, self.batch_size):
                batch_end = min(j + self.batch_size, n)
                X_batch = X_shuffled[j:batch_end]
                y_batch = y_shuffled[j:batch_end]
                
                grad = self._compute_gradient(X_batch, y_batch, weights)
                grad_norm = np.linalg.norm(grad)
                
                if grad_norm < self.early_stopping:
                    break
                
                weights = weights - self.learning_rate * grad
            
            if grad_norm < self.early_stopping:
                break
        
        self.intercept_ = weights[0]
        self.coefs_ = weights[1:].reshape(-1, 1)

    def fit(self, X: pd.DataFrame, y: pd.DataFrame):
        X = self._prepare_data(X, fit=True)
        if isinstance(y, pd.DataFrame):
            y = y.values
        y = np.array(y)
        
        if self.weight_calc == 'matrix':
            self._fit_matrix(X, y)
        elif self.weight_calc == 'gd':
            self._fit_gd(X, y)
        elif self.weight_calc == 'sgd':
            self._fit_sgd(X, y)
        
        return self

    def predict(self, X, ss=True):
        if self.coefs_ is None or self.intercept_ is None:
            raise ValueError("Модель не обучена. Сначала вызовите fit().")
        
        X = self._prepare_data(X, fit=False)
        predictions = X @ self.coefs_ + self.intercept_
        return predictions.flatten()

    def score(self, X, y):
        if self.coefs_ is None or self.intercept_ is None:
            raise ValueError("Модель не обучена. Сначала вызовите fit().")
        
        y_pred = self.predict(X)
        if isinstance(y, pd.DataFrame):
            y = y.values
        y = np.array(y).flatten()
        
        ss_res = np.sum((y - y_pred) ** 2)
        ss_tot = np.sum((y - np.mean(y)) ** 2)
        
        if ss_tot == 0:
            return 0.0
        
        r2 = 1 - (ss_res / ss_tot)
        return r2

Используя датасет про автомобили (целевой признак — price), сравнить (качество, скорость обучения и предсказания, важность признаков) модели `MyLinearRegression` с различными гиперпараметрами, сделать выводы. На этом же датасете сравнить модель `MyLinearRegression` с библиотечной реализацией из `sklearn`, составить таблицу(ы) (графики) результатов сравнения (качество, скорость обучения и предсказания, важность признаков).

## Задача 2

[Соревнование на Kaggle](https://kaggle.com/competitions/yadro-regression-2025)