In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Загрузка данных
data_path = "data/"
sales = pd.read_csv(data_path + 'sales_train.csv')
items = pd.read_csv(data_path + 'items.csv')
item_categories = pd.read_csv(data_path + 'item_categories.csv')
shops = pd.read_csv(data_path + 'shops.csv')

# Агрегация данных по месяцам
sales['date'] = pd.to_datetime(sales['date'], format='%d.%m.%Y')
sales['month'] = sales['date'].dt.to_period('M')
monthly_sales = sales.groupby(['shop_id', 'item_id', 'month']).agg(
    {'item_cnt_day': 'sum'}
).reset_index()
monthly_sales.rename(columns={'item_cnt_day': 'item_cnt_month'}, inplace=True)

# Удаление выбросов
monthly_sales = monthly_sales[(monthly_sales['item_cnt_month'] > 0) & (monthly_sales['item_cnt_month'] < 1000)]


  from pandas.core import (


In [4]:
# Присваиваем уникальные индексы пользователям (магазины) и товарам
shop_mapping = {shop: idx for idx, shop in enumerate(sales['shop_id'].unique())}
item_mapping = {item: idx for idx, item in enumerate(sales['item_id'].unique())}

monthly_sales['shop_id'] = monthly_sales['shop_id'].map(shop_mapping)
monthly_sales['item_id'] = monthly_sales['item_id'].map(item_mapping)

# Нормализация продаж
monthly_sales['item_cnt_month'] = np.log1p(monthly_sales['item_cnt_month'])

# Разделение данных на обучающую и тестовую выборки
train_data, test_data = train_test_split(monthly_sales, test_size=0.2, random_state=42)


In [5]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.optimizers import Adam

In [6]:
# Параметры
num_shops = len(shop_mapping)
num_items = len(item_mapping)
embedding_dim = 50

# Входные данные
shop_input = Input(shape=(1,), name='shop_input')
item_input = Input(shape=(1,), name='item_input')

# Эмбеддинги
shop_embedding = Embedding(input_dim=num_shops, output_dim=embedding_dim, name='shop_embedding')(shop_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_dim, name='item_embedding')(item_input)

# Уплощение
shop_vector = Flatten()(shop_embedding)
item_vector = Flatten()(item_embedding)

# Объединение и полносвязные слои
concat = Concatenate()([shop_vector, item_vector])
dense_1 = Dense(128, activation='relu')(concat)
dense_2 = Dense(64, activation='relu')(dense_1)
output = Dense(1, activation='linear')(dense_2)

# Модель
ncf_model = Model(inputs=[shop_input, item_input], outputs=output)
ncf_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

ncf_model.summary()


In [7]:
    # Подготовка входных данных
    train_shops = train_data['shop_id'].values
    train_items = train_data['item_id'].values
    train_sales = train_data['item_cnt_month'].values

    test_shops = test_data['shop_id'].values
    test_items = test_data['item_id'].values
    test_sales = test_data['item_cnt_month'].values

    # Обучение модели
    ncf_model.fit(
        [train_shops, train_items],
        train_sales,
        validation_data=([test_shops, test_items], test_sales),
        epochs=10,
        batch_size=256,
        verbose=1
    )


Epoch 1/10
[1m5018/5018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 8ms/step - loss: 0.1825 - mae: 0.2867 - val_loss: 0.1472 - val_mae: 0.2617
Epoch 2/10
[1m5018/5018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 7ms/step - loss: 0.1444 - mae: 0.2519 - val_loss: 0.1432 - val_mae: 0.2494
Epoch 3/10
[1m5018/5018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 7ms/step - loss: 0.1386 - mae: 0.2447 - val_loss: 0.1427 - val_mae: 0.2554
Epoch 4/10
[1m5018/5018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 7ms/step - loss: 0.1355 - mae: 0.2408 - val_loss: 0.1421 - val_mae: 0.2455
Epoch 5/10
[1m5018/5018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 7ms/step - loss: 0.1326 - mae: 0.2371 - val_loss: 0.1435 - val_mae: 0.2571
Epoch 6/10
[1m5018/5018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 8ms/step - loss: 0.1308 - mae: 0.2348 - val_loss: 0.1441 - val_mae: 0.2580
Epoch 7/10
[1m5018/5018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x19588b94f70>

In [8]:
# Предсказания
predictions = ncf_model.predict([test_shops, test_items])

# Добавление результатов в DataFrame
test_data['predicted_sales'] = predictions


[1m10036/10036[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 865us/step


In [9]:
# Топ товаров для каждого магазина
recommendations = test_data.groupby('shop_id').apply(
    lambda x: x.sort_values('predicted_sales', ascending=False).head(10)
)

# Вывод рекомендаций
recommendations.head()


  recommendations = test_data.groupby('shop_id').apply(


Unnamed: 0_level_0,Unnamed: 1_level_0,shop_id,item_id,month,item_cnt_month,predicted_sales
shop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1607976,0,10408,2013-04,3.258097,4.217931
0,1608000,0,10408,2015-04,3.89182,4.217931
0,1607999,0,10408,2015-03,4.234107,4.217931
0,1607980,0,10408,2013-08,4.553877,4.217931
0,1607985,0,10408,2014-01,4.394449,4.217931


In [10]:
ncf_model.save('model_rec_seller.h5')

