In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor

In [2]:
data_regression = pd.read_csv("data_reg_final.csv")

## Продолжение

In [3]:
data_regression.head()

Unnamed: 0,Year,Age,Transmission_Automatic,Engine_capacity,Distance,Transmission_Manual,Style_Crossover,Fuel_type_Diesel,Style_SUV,Make_Toyota,...,Make_Audi,Fuel_type_Metan/Propan,Model_M Class,Make_Hyundai,Model_Superb,Make_Honda,Model_GLC,Model_A6,Make_Ford,Price
0,2011.0,12.0,1.0,1800.0,195000.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7750.0
1,2014.0,9.0,0.0,1500.0,135000.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8550.0
2,2012.0,11.0,0.0,1500.0,110000.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6550.0
3,2006.0,17.0,0.0,1600.0,200000.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4100.0
4,2000.0,23.0,0.0,2200.0,300000.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3490.0


In [28]:
X = data_regression.iloc[:, [0, 1, 2, 3, 4]]
y = data_regression["Price"]

In [29]:
X_regression_train, X_regression_test, y_regression_train, y_regression_test = train_test_split(X, y, test_size=0.2)

In [30]:
X_regression_train

Unnamed: 0,Year,Age,Transmission_Automatic,Engine_capacity,Distance
23931,2005.0,18.0,0.0,1600.0,233000.0
16875,2002.0,21.0,0.0,1800.0,280000.0
18067,2007.0,16.0,1.0,1364.0,176000.0
25252,2007.0,16.0,1.0,1591.0,184255.0
7799,2011.0,12.0,0.0,1496.0,166600.0
...,...,...,...,...,...
22747,2004.0,19.0,0.0,1300.0,123698.0
3092,1997.0,26.0,0.0,1400.0,210700.0
33308,2018.0,5.0,1.0,1995.0,24100.0
18974,2008.0,15.0,1.0,2500.0,170000.0


# Решение задачи регрессии

In [8]:
import pickle

class MLP_regression:
    def __init__(self, layer_sizes, activation_functions):
        self.layer_sizes = layer_sizes
        self.activation_functions = activation_functions
        self.weights = []
        self.biases = []
        self.activations = []
        for i in range(len(layer_sizes) - 1):
            self.weights.append(np.random.randn(layer_sizes[i], layer_sizes[i + 1]))
            self.biases.append(np.random.randn(layer_sizes[i + 1]))
            self.activations.append(np.zeros(layer_sizes[i + 1]))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def tanh(self, x):
        return np.tanh(x)

    def relu(self, x):
        return np.maximum(0, x)

    def derivative(self, x, activation_function):
        if activation_function == 'sigmoid':
            return x * (1 - x)
        elif activation_function == 'tanh':
            return 1 - np.power(x, 2)
        elif activation_function == 'relu':
            return (x > 0).astype(float)

    def forward(self, x):
        for i in range(len(self.weights)):
            x = np.dot(x, self.weights[i]) + self.biases[i]
            if self.activation_functions[i] == 'sigmoid':
                x = self.sigmoid(x)
            elif self.activation_functions[i] == 'tanh':
                x = self.tanh(x)
            elif self.activation_functions[i] == 'relu':
                x = self.relu(x)
            self.activations[i] = x
        return x

    def backward(self, x, y, learning_rate):
        output = self.forward(x)
        deltas = []
        for i in reversed(range(len(self.weights))):
            if i == len(self.weights) - 1:
                error = output - y
            else:
                error = np.dot(deltas[-1], self.weights[i + 1].T)
            delta = error * self.derivative(self.activations[i], self.activation_functions[i])
            deltas.append(delta)
        deltas.reverse()
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * np.dot(x.T if i == 0 else self.activations[i - 1].T, deltas[i])
            self.biases[i] -= learning_rate * np.sum(deltas[i], axis=0)

    def train(self, X, y, learning_rate, epochs):
        for epoch in range(epochs):
            self.backward(X, y, learning_rate)

    def r2_score(self, y_true, y_pred):
        ss_res = np.sum(np.square(y_true - y_pred))
        ss_tot = np.sum(np.square(y_true - np.mean(y_true)))
        return 1 - ss_res / ss_tot
    
    def save_parameters(self, filename):
        parameters = {"weights": self.weights, "biases": self.biases}
        with open(filename, "wb") as file:
            pickle.dump(parameters, file)

    def load_parameters(self, filename):
        with open(filename, "rb") as file:
            parameters = pickle.load(file)
        self.weights = parameters["weights"]
        self.biases = parameters["biases"]

# Регрессия

In [37]:
data_cleaned = data_regression

q_low = data_cleaned['Price'].quantile(0.01)
q_hi  = data_cleaned['Price'].quantile(0.99)

data_cleaned = data_cleaned[(data_cleaned['Price'] > q_low) & (data_cleaned['Price'] < q_hi)]
X = data_cleaned.drop(columns=['Price']).to_numpy()
y = data_cleaned['Price'].to_numpy().reshape(-1, 1)
X = (X - X.mean(axis=0)) / X.std(axis=0)
y = (y - y.mean()) / y.std()

# Инициализация и обучение модели
layer_sizes = [X.shape[1], 10, 5, 1]  # Примерная архитектура сети
activation_functions = ['tanh', 'relu', 'tanh']
mlp = MLP_regression(layer_sizes, activation_functions)
mlp.train(X, y, learning_rate=0.01, epochs=100)

In [39]:
y_pred = mlp.forward(X)
r2 = mlp.r2_score(y, y_pred)
print(f"r2_score: {r2}")

r2_score: 0.22128962100231975
