In [9]:
from typing import Dict, List
import math
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, f1_score

# Error functions

In [10]:
class MY_Len_Exception(Exception):

    """
    My error class
    Parameters:
    message - error message
    """
    
    def __init__(self, message="Ошибка"):
        super().__init__(message)

def mae(model_data: List[float], ideal_data: List[float]) -> float:
    
    """
    MAE (Mean Absolute Error) - mean absolute error.
    
    Parameters:
    model_data - list of values ​​of the predicted model.
    ideal_data - list of true values.
    
    Returns:
    MAE value.
    """
    
    if len(model_data) != len(ideal_data):
        raise MY_Len_Exception("Размер данных модели и правильных данных не совпадают")
    
    n = len(model_data)
    error_sum = sum(abs(ideal_data[i] - model_data[i]) for i in range(n))
    
    return error_sum / n


In [21]:
def mse(model_data: List[float], ideal_data: List[float]) -> float:
    
    """
    MSE (Mean Squared Error) - mean square error.
    
    Parameters:
    model_data - list of values of the predicted model.
    ideal_data - list of true values.
    
    Returns:
    MSE value.
    """
    
    if len(model_data) != len(ideal_data):
        raise MY_Len_Exception()
    
    n = len(model_data)
    error_sum = sum(pow((ideal_data[i] - model_data[i]),2) for i in range(n))
    
    return error_sum / n

In [22]:
def accuracy_manual(model_data: List[any], true_data: List[any])-> float:
    
    """
    accuracy - Classification accuracy.
    
    Parameters:
    model_data - list of values of the predicted model.
    true_data - list of true values.
    
    Returns:
    The accuracy value.
    """
    
    if len(model_data) != len(true_data):
        raise MY_Len_Exception("Размер данных модели и правильных данных не совпадают")
    
    n = len(model_data) # общее кол-во объектов
    accuracy = sum(1 if model_data[i] == true_data[i] else 0 for i in range(n)) / n  
    
    return accuracy

In [23]:
def f1_score_manual(model_data: List[int], true_data: List[int]) -> tuple:
    
    """
    F1-measure (F1-score) - classification quality metric.
    
    Parameters:
    model_data - list of predicted model values (0 and 1).
    true_data - list of true values (0 and 1).
    
    Returns:
    (F1, Precision, Recall)
    """
    
    if len(model_data) != len(true_data):
        raise MY_Len_Exception("Размер данных модели и правильных данных не совпадают")

    n = len(model_data)  # Total number of objects
    
    # True Positives
    TP = sum(1 for i in range(n) if model_data[i] == 1 and true_data[i] == 1) 

    # False positives
    FP = sum(1 for i in range(n) if model_data[i] == 1 and true_data[i] == 0) 

    # False negatives
    FN = sum(1 for i in range(n) if model_data[i] == 0 and true_data[i] == 1)  

    # Check for division by zero
    Precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    Recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    F1 = 2 * (Precision * Recall) / (Precision + Recall) if (Precision + Recall) > 0 else 0

    return F1, Precision, Recall

## Calculating metrics

### Generating data for functions

In [5]:
# Generate 20 true values (ideal)
np.random.seed(42)  # Fix the seed for reproducibility
ideal_values = np.random.uniform(10, 50, 20)  # True values in the range 10-50

# Generate predicted values with a small spread (error)
model_values = ideal_values + np.random.normal(0, 5, 20) # Add noise (mean=0, std=5)

print(f'ideal_values: {ideal_values}')
print(f'model_values: {model_values}')

ideal_values: [24.98160475 48.02857226 39.27975767 33.94633937 16.24074562 16.23978081
 12.32334449 44.64704583 34.04460047 38.32290311 10.82337977 48.79639409
 43.29770563 18.49356443 17.27299869 17.33618039 22.16968972 30.99025727
 27.27780075 21.64916561]
model_values: [19.91744915 49.59980892 34.73963729 26.88482086 23.56898946 15.11089931
 12.66098551 37.5233049  31.32268685 38.87751606  5.06841188 50.67488418
 40.29451218 17.03509568 14.26446563 26.59757132 22.10220359 25.70170262
 31.39052531 15.54494736]


### Calculating MAE

In [7]:
mae__manual_value = mae(model_data = model_values, ideal_data=ideal_values)
print(f"mae__manual_value: {mae__manual_value:.4f}")

mae_sklearn = mean_absolute_error(ideal_values, model_values)
print(f"mae_sklearn: {mae_sklearn:.4f}")

mae__manual_value: 3.8685
mae_sklearn: 3.8685


### Calculating MSE

In [10]:
mse__manual_value = mse(model_data = model_values, ideal_data=ideal_values)
print(f"mse__manual_value: {mse__manual_value:.4f}")

mse_sklearn = mean_squared_error(ideal_values, model_values)
print(f"mse_sklearn: {mse_sklearn:.4f}")

mse__manual_value: 21.8458
mse_sklearn: 21.8458


### Generating data for functions

In [16]:
np.random.seed(42)
true_values = np.random.choice([0, 1], size=20, p=[0.5, 0.5])  # True values

# Generate labels predicted by the model (with some errors)
model_values = true_values.copy()
flip_indices = np.random.choice(len(true_values), size=5, replace=False)  
model_values[flip_indices] = 1 - model_values[flip_indices]  # Change 0 to 1 and vice versa

print(f'true_values: {true_values}')
print(f'model_values: {model_values}')


true_values: [0 1 1 1 0 0 0 1 1 1 0 1 1 0 0 0 0 1 0 0]
model_values: [1 0 1 1 0 1 0 0 1 1 0 1 1 0 0 0 0 1 1 0]


### Calculating Accuracy

In [16]:
accuracy_manual_value = accuracy_manual(model_data=y_pred, true_data=y_true)
accuracy = accuracy_score(y_true, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"accuracy_manual_value: {accuracy_manual_value:.4f}")

Accuracy: 0.7500
accuracy_manual_value: 0.7500


### F1-score

In [19]:
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
f1_score_value, precision, recall = f1_score_manual(model_data=y_pred, true_data=y_true)

print(f"F1-score: {f1:.4f}")
print(f"f1_score_value: {f1_score_value:.4f} precision {precision:.4f} recall {recall:.4f}")

F1-score: 0.7368
f1_score_value: 0.7368 precision 0.7000 recall 0.7778


## Gradient Descent

In [6]:
import sympy as sp
import random

def gradient_descent(func_str: str, start: tuple = None, eta: float = 0.1, lambda_decay: float = 0.01, iterations: int = 1000):
    """
    Gradient descent with adaptive step and automatic start point selection.

    Parameters:
    func_str - function as a string (e.g. "x**2 + y**2").
    start - initial coordinates (x, y) or (x,) for one variable. If None, it is selected automatically.
    eta - initial learning rate.
    lambda_decay - step decrease factor.
    iterations - number of iterations.
    
    Returns:
    Minimum coordinates found.
    """
    # Define all characters in a string that look like variables
    all_symbols = set(filter(str.isalpha, func_str))  # Select only letters
    allowed_symbols = {"x", "y"}
    
   # Check that there are no extra variables in the line
    extra_symbols = all_symbols - allowed_symbols
    if extra_symbols:
        raise ValueError(f"Функция содержит недопустимые переменные: {', '.join(extra_symbols)}")
    
    # Define variables (x, y or just x)
    symbols = sorted(all_symbols & allowed_symbols)
    if not symbols:
        raise ValueError("Функция не содержит переменных (x или y).")

    vars = [sp.Symbol(var) for var in symbols]
    f = sp.sympify(func_str)  # Convert the string to a sympy expression

    # Проверяем, есть ли хотя бы одна степень выше 1
    if all(sp.degree(f, var) <= 1 for var in vars):
        raise MY_Len_Exception("Функция линейная и не имеет минимума!")

    # Partial diff
    gradients = [sp.diff(f, var) for var in vars]

    # If the starting point is not passed, we will find it
    if start is None:
        crit_solutions = sp.solve(gradients, vars, dict=True)
        if crit_solutions:
            start = tuple(crit_solutions[0][var] for var in vars)
        else:
            start = tuple(random.uniform(-10, 10) for _ in vars)

    point = list(start)

    for k in range(1, iterations + 1):  
        grad_values = [grad.subs(dict(zip(vars, point))).evalf() for grad in gradients]

        # We calculate the sum of the absolute values of the gradient
        grad_norm = sum(abs(g) for g in grad_values)

        # adaptive step
        alpha = eta / (1 + lambda_decay * k + grad_norm)

        # Updating the point
        point = [point[i] - alpha * grad_values[i] for i in range(len(point))]

    return tuple(point)


In [57]:
gradient_descent("x**3 +2*x*y +y**2", start=(3, 5), eta=0.1, lambda_decay=0.01, iterations=100)

(-3.00140672583879, 1.97859555749400)

In [60]:
gradient_descent("x**3 +2*x*y +y**2", start=(3, 5))

(-90.9571855963997, 3.72908027013101)

In [58]:
gradient_descent("x**3 +2*x*y +y**2", eta=0.1, lambda_decay=0.01, iterations=100)

(0, 0)

In [61]:
gradient_descent("x**3 +2*x*y +y**2")

(0, 0)

In [59]:
gradient_descent("x**2", start=(2,), eta=0.1, lambda_decay=0.01, iterations=100)

(3.49822188055820e-5,)

In [11]:
gradient_descent("5*x + 6")

MY_Len_Exception: Функция линейная и не имеет минимума!

In [17]:
import numpy as np

def linear_regression_gd(X, y, lr=0.1, lambda_decay=0.01, iterations=1000):
    """
    Линейная регрессия с градиентным спуском и адаптивным шагом обучения.

    Параметры:
    X - матрица фичей (numpy array), размерность (n_samples, n_features)
    y - вектор пердсказаний, размерность (n_samples,)
    lr - базовый шаг обучения
    lambda_decay - коэффициент адаптивного уменьшения шага
    iterations - количество итераций

    Возвращает:
    w - вектор весов (коэффициентов) модели
    b - смещение (intercept)
    """
    n_samples, n_features = X.shape
    
    # Инициализация коэффициентов
    w = np.zeros(n_features)
    b = 0

    for k in range(1, iterations + 1):  
        # Предсказания
        y_predskas = np.dot(X, w) + b
        
        # Градиенты
        error = y_predskas - y
        dw = (2 / n_samples) * np.dot(X.T, error)  # Градиент по w
        db = (2 / n_samples) * np.sum(error)       # Градиент по b

        # Адаптивный шаг обучения
        grad_norm = np.linalg.norm(dw) + abs(db)  # Норма градиента
        alpha = lr / (1 + lambda_decay * k  )

        # Обновление параметров
        w -= alpha * dw
        b -= alpha * db

    return w, b


In [18]:
# Генерируем случайные данные
np.random.seed(42)
X = np.random.rand(100, 2) * 10  # 100 образцов, 2 признака
y = 3 * X[:, 0] + 5 * X[:, 1] + 7 + np.random.randn(100) * 2  # Линейная зависимость + шум

# Обучаем модель
w, b = linear_regression_gd(X, y)

print("Коэффициенты:", w)
print("Свободный член:", b)

Коэффициенты: [3.07596028 5.08049569]
Свободный член: 6.443772469364218
