In [3]:
import pandas as pd

In [31]:
from sklearn.preprocessing import MinMaxScaler

In [7]:
from sklearn.model_selection import train_test_split

In [36]:
import numpy as np

# Датасет

## Импорт

In [249]:
PATH_TO_FILE: str = 'Mall_Customers.csv'

df = pd.read_csv(PATH_TO_FILE)

In [250]:
df.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [251]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   CustomerID              200 non-null    int64 
 1   Genre                   200 non-null    object
 2   Age                     200 non-null    int64 
 3   Annual Income (k$)      200 non-null    int64 
 4   Spending Score (1-100)  200 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 7.9+ KB


## Коректировка

In [252]:
df = df.drop('CustomerID', axis=1)

### Преобразуем категориальные данные в числовые

In [253]:
df = pd.get_dummies(df, columns=['Genre'], drop_first=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype
---  ------                  --------------  -----
 0   Age                     200 non-null    int64
 1   Annual Income (k$)      200 non-null    int64
 2   Spending Score (1-100)  200 non-null    int64
 3   Genre_Male              200 non-null    uint8
dtypes: int64(3), uint8(1)
memory usage: 5.0 KB


### Выделяем целевую переменную

In [254]:
X = df.drop('Spending Score (1-100)', axis=1)
Y = df['Spending Score (1-100)']

### Нормализация

In [255]:
# Нормализация данных
X = (X - X.min()) / (X.max() - X.min())
Y = (Y - Y.min()) / (Y.max() - Y.min())

## Разделение на обучающую и тестовую выборки

In [256]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [257]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 160 entries, 79 to 102
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Age                 160 non-null    float64
 1   Annual Income (k$)  160 non-null    float64
 2   Genre_Male          160 non-null    float64
dtypes: float64(3)
memory usage: 5.0 KB


In [258]:
y_train.info()

<class 'pandas.core.series.Series'>
Int64Index: 160 entries, 79 to 102
Series name: Spending Score (1-100)
Non-Null Count  Dtype  
--------------  -----  
160 non-null    float64
dtypes: float64(1)
memory usage: 2.5 KB


# Создать нейронную сеть с нуля, т.е. не используя готовые библиотеки.


### Нейрон

In [259]:
class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        # Инициализация параметров сети
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        # Веса и смещения
        self.weights_input_hidden = np.random.rand(self.input_size, self.hidden_size)
        self.bias_hidden = np.random.rand(self.hidden_size)
        self.weights_hidden_output = np.random.rand(self.hidden_size, self.output_size)
        self.bias_output = np.random.rand(self.output_size)

    def activation(self, x):
        # Используем сигмоидную функцию активации
        return 1 / (1 + np.exp(-x))

    def activation_derivative(self, x):
        # Производная сигмоидной функции активации
        return x * (1 - x)

    def forward(self, X):
        # Прямое распространение
        self.hidden_layer_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_layer_output = self.activation(self.hidden_layer_input)
        self.output_layer_input = np.dot(self.hidden_layer_output, self.weights_hidden_output) + self.bias_output
        self.output = self.activation(self.output_layer_input)
        return self.output

    def backward(self, X, Y, output):
        # Обратное распространение ошибки
        output_error = Y - output
        output_delta = output_error * self.activation_derivative(output)

        hidden_layer_error = output_delta.dot(self.weights_hidden_output.T)
        hidden_layer_delta = hidden_layer_error * self.activation_derivative(self.hidden_layer_output)

        # Обновление весов и смещений
        self.weights_hidden_output += self.hidden_layer_output.T.dot(output_delta) * self.learning_rate
        self.bias_output += np.sum(output_delta, axis=0) * self.learning_rate
        self.weights_input_hidden += X.T.dot(hidden_layer_delta) * self.learning_rate
        self.bias_hidden += np.sum(hidden_layer_delta, axis=0) * self.learning_rate

    def train(self, X, Y, epochs):
        # Обучение сети
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, Y, output)

            if epoch % 1000 == 0:
                loss = np.mean(np.square(Y - output))
                print(f'Epoch {epoch}, Loss: {loss}')

    def predict(self, X, y):
        pre = self.forward(X)
        res = pd.DataFrame()
        res['Predicted'] = [p[0] for p in pre]
        res['Real'] = y
        return res

In [260]:
nn = SimpleNeuralNetwork(X_train.shape[1], 5, 1)

In [261]:
nn.train(X_train.values, y_train.values.reshape(-1, 1), epochs=10000)

Epoch 0, Loss: 0.24865560794591532
Epoch 1000, Loss: 0.0659215031398039
Epoch 2000, Loss: 0.06439140339883875
Epoch 3000, Loss: 0.064082659327283
Epoch 4000, Loss: 0.06385450383783131
Epoch 5000, Loss: 0.06373003140877868
Epoch 6000, Loss: 0.06366019569549079
Epoch 7000, Loss: 0.0636077865928576
Epoch 8000, Loss: 0.06355416397866082
Epoch 9000, Loss: 0.06349042361778583


## Тестирование

In [262]:
predictions = nn.predict(X_test.values, y_test.values)
predictions

Unnamed: 0,Predicted,Real
0,0.580335,0.520408
1,0.574298,0.795918
2,0.380462,0.030612
3,0.550698,0.0
4,0.432224,0.102041
5,0.694773,0.5
6,0.568081,0.469388
7,0.532942,0.122449
8,0.426684,0.122449
9,0.634897,0.653061
