In [3]:
import pandas as pd

In [31]:
from sklearn.preprocessing import MinMaxScaler

In [7]:
from sklearn.model_selection import train_test_split

In [36]:
import numpy as np

# Датасет

## Импорт

In [209]:
PATH_TO_FILE: str = 'Mall_Customers.csv'

df = pd.read_csv(PATH_TO_FILE)

In [210]:
df.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [211]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   CustomerID              200 non-null    int64 
 1   Genre                   200 non-null    object
 2   Age                     200 non-null    int64 
 3   Annual Income (k$)      200 non-null    int64 
 4   Spending Score (1-100)  200 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 7.9+ KB


## Коректировка

### Преобразуем категориальные данные в числовые

In [212]:
df = pd.get_dummies(df, columns=['Genre'], drop_first=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype
---  ------                  --------------  -----
 0   CustomerID              200 non-null    int64
 1   Age                     200 non-null    int64
 2   Annual Income (k$)      200 non-null    int64
 3   Spending Score (1-100)  200 non-null    int64
 4   Genre_Male              200 non-null    uint8
dtypes: int64(4), uint8(1)
memory usage: 6.6 KB


### Выделяем целевую переменную

In [219]:
X = df.drop('Spending Score (1-100)', axis=1)
Y = df['Spending Score (1-100)']

### Нормализация

In [220]:
# Нормализация данных
X = X_train
Y = y_train
X = (X - X.min()) / (X.max() - X.min())
Y = (Y - Y.min()) / (Y.max() - Y.min())

## Разделение на обучающую и тестовую выборки

In [221]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [222]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 128 entries, 4 to 185
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   CustomerID          128 non-null    float64
 1   Age                 128 non-null    float64
 2   Annual Income (k$)  128 non-null    float64
 3   Genre_Male          128 non-null    float64
dtypes: float64(4)
memory usage: 5.0 KB


In [223]:
y_train.info()

<class 'pandas.core.series.Series'>
Int64Index: 128 entries, 4 to 185
Series name: Spending Score (1-100)
Non-Null Count  Dtype  
--------------  -----  
128 non-null    float64
dtypes: float64(1)
memory usage: 2.0 KB


# Создать нейронную сеть с нуля, т.е. не используя готовые библиотеки.


### Нейрон

In [224]:
class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        # Инициализация параметров сети
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        # Веса и смещения
        self.weights_input_hidden = np.random.rand(self.input_size, self.hidden_size)
        self.bias_hidden = np.random.rand(self.hidden_size)
        self.weights_hidden_output = np.random.rand(self.hidden_size, self.output_size)
        self.bias_output = np.random.rand(self.output_size)

    def activation(self, x):
        # Используем сигмоидную функцию активации
        return 1 / (1 + np.exp(-x))

    def activation_derivative(self, x):
        # Производная сигмоидной функции активации
        return x * (1 - x)

    def forward(self, X):
        # Прямое распространение
        self.hidden_layer_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_layer_output = self.activation(self.hidden_layer_input)
        self.output_layer_input = np.dot(self.hidden_layer_output, self.weights_hidden_output) + self.bias_output
        self.output = self.activation(self.output_layer_input)
        return self.output

    def backward(self, X, Y, output):
        # Обратное распространение ошибки
        output_error = Y - output
        output_delta = output_error * self.activation_derivative(output)

        hidden_layer_error = output_delta.dot(self.weights_hidden_output.T)
        hidden_layer_delta = hidden_layer_error * self.activation_derivative(self.hidden_layer_output)

        # Обновление весов и смещений
        self.weights_hidden_output += self.hidden_layer_output.T.dot(output_delta) * self.learning_rate
        self.bias_output += np.sum(output_delta, axis=0) * self.learning_rate
        self.weights_input_hidden += X.T.dot(hidden_layer_delta) * self.learning_rate
        self.bias_hidden += np.sum(hidden_layer_delta, axis=0) * self.learning_rate

    def train(self, X, Y, epochs):
        # Обучение сети
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, Y, output)

            if epoch % 1000 == 0:
                loss = np.mean(np.square(Y - output))
                print(f'Epoch {epoch}, Loss: {loss}')

    def predict(self, X, y):
        pre = self.forward(X)
        res = pd.DataFrame()
        res['Predicted'] = [p[0] for p in pre]
        res['Real'] = y
        return res

In [225]:
nn = SimpleNeuralNetwork(X_train.shape[1], 5, 1)

In [226]:
# # Инициализация и обучение нейронной сети
# input_size = X.shape[1]
# hidden_size = 5  # количество нейронов в скрытом слое
# output_size = 1  # для регрессии


nn.train(X_train.values, y_train.values.reshape(-1, 1), epochs=10000)

Epoch 0, Loss: 0.1944520811844379
Epoch 1000, Loss: 0.06270934431470657
Epoch 2000, Loss: 0.06177018236724931
Epoch 3000, Loss: 0.06148005936302501
Epoch 4000, Loss: 0.06121412272360739
Epoch 5000, Loss: 0.06098014805767147
Epoch 6000, Loss: 0.06079104296192475
Epoch 7000, Loss: 0.06064788100542891
Epoch 8000, Loss: 0.060542323930669176
Epoch 9000, Loss: 0.06046230617889395


## Тестирование

In [227]:
predictions = nn.predict(X_test.values, y_test.values)
predictions

Unnamed: 0,Predicted,Real
0,0.384761,0.132653
1,0.574683,0.408163
2,0.567466,0.540816
3,0.545095,0.153061
4,0.556481,0.612245
5,0.539465,0.061224
6,0.441806,0.132653
7,0.51195,0.938776
8,0.603099,0.316327
9,0.504838,0.908163
