### Решение от Абрамовой Анна Константиновны направление Data Science

### Построение модели

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.statespace.sarimax import SARIMAXResults
from sklearn.metrics import mean_absolute_error

In [2]:
plt.rcParams["figure.figsize"] = (12, 8)
plt.rcParams.update({'font.size': 11})

In [3]:
df_transaction = pd.read_parquet("../data/transaction_df.parquet")
df_competitors = pd.read_parquet("../data/df_competitors.parquet")
df_cost = pd.read_parquet("../data/df_cost.parquet")

In [4]:
df_cost=df_cost.rename(columns={'date':'datetime'})
df_cost

Unnamed: 0,place,product,cost,datetime
0,Анор Лондо,Целебные травы,2.07,2216-01-02
1,Анор Лондо,Целебные травы,3.07,2216-01-26
2,Анор Лондо,Целебные травы,3.30,2216-02-12
3,Анор Лондо,Целебные травы,4.04,2216-03-06
4,Анор Лондо,Целебные травы,3.88,2216-03-27
...,...,...,...,...
1112,Фалькония,Эстус,3.10,2218-07-29
1113,Фалькония,Эстус,7.20,2218-08-10
1114,Фалькония,Эстус,3.31,2218-08-27
1115,Фалькония,Эстус,3.26,2218-09-16


In [5]:
df_transaction['datetime'] = pd.to_datetime(df_transaction['datetime']).dt.date

In [6]:
df_transaction = df_transaction.set_index('datetime')

In [7]:
df_transaction = df_transaction.dropna()

In [8]:
df_transaction = df_transaction[(df_transaction['amount'] > 0) & (df_transaction['price'] > 0)]

In [9]:
df_transaction.groupby(['datetime', 'price', 'product','place']).agg({'amount': ['sum']}).reset_index().to_csv('../data/transaction_group.csv', index=False)  

In [10]:
df_group = pd.read_csv("../data/transaction_group.csv", parse_dates=['datetime'],index_col='datetime')

In [11]:
df_group=df_group.dropna()

In [12]:
df_competitors=df_competitors.rename(columns={'price':'compet_price','date':'datetime'})

In [13]:
df_group.merge(df_competitors, on=['datetime','product','place']).to_csv('../data/transaction_with_competitors.csv', index=False)

In [14]:
df_transaction_competitors = pd.read_csv("../data/transaction_with_competitors.csv", parse_dates=['datetime'],index_col='datetime')

In [15]:
df_transaction_competitors['amount'] = df_transaction_competitors['amount'].astype(float)
df_transaction_competitors['price'] =df_transaction_competitors['price'].astype(float)
df_transaction_competitors['compet_price'] =df_transaction_competitors['compet_price'].astype(float)

In [16]:
# df_transaction_competitors=df_transaction_competitors.reset_index()

In [17]:
df_transaction_competitors.merge(df_cost, on=['datetime','product','place']).to_csv('../data/df_with_cost.csv', index=False)

In [18]:
df_with_cost=pd.read_csv("../data/df_with_cost.csv", parse_dates=['datetime'],index_col='datetime')
df_with_cost

Unnamed: 0_level_0,price,product,place,amount,competitor,compet_price,cost
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2216-01-02,2.73,Целебные травы,Нокрон,44.869785,Арториас&Co,3.04,1.47
2216-01-02,2.73,Целебные травы,Нокрон,44.869785,Длань господня,3.03,1.47
2216-01-02,2.73,Целебные травы,Нокрон,44.869785,ЛилIT,3.07,1.47
2216-01-02,2.87,Эстус,Нокрон,40.394523,Арториас&Co,2.86,2.08
2216-01-02,2.87,Эстус,Нокрон,40.394523,ЛилIT,3.09,2.08
...,...,...,...,...,...,...,...
2218-09-27,20.33,Эстус,Врата Балдура,25.646198,Светлые Души,17.99,7.86
2218-09-27,24.42,Эстус,Кеджистан,45.163825,Арториас&Co,20.13,10.15
2218-09-27,27.47,Эльфийская пыльца,Врата Балдура,26.922775,Длань господня,25.93,21.68
2218-09-27,27.47,Эльфийская пыльца,Врата Балдура,26.922775,ЛилIT,27.49,21.68


In [30]:
products = df_with_cost['product'].unique()
places = df_with_cost['place'].unique()

In [35]:
forecast_table = pd.DataFrame(columns=['day_num', 'product', 'place', 'price'])

# Цикл по каждому продукту и городу
for product in products:
    for place in places:
        # Фильтрация данных для каждого продукта и города
        subset = df_with_cost[(df_with_cost['product'] == product) & (df_with_cost['place'] == place)]
        
        # Создание SARIMAX модели и обучение
        model = SARIMAX(subset['price'], exog=subset['amount'], order=(1, 0, 1), seasonal_order=(1, 1, 1, 12))
        model_fit = model.fit(disp=False)
        
        # Прогноз на 90 дней вперед
        exog_forecast = pd.DataFrame({'Объем продаж': [subset['amount'].iloc[-3]]*90})
        forecast = model_fit.get_forecast(steps=90, exog=exog_forecast)
        forecast_values = forecast.predicted_mean
        
        # Создание таблицы прогноза
        forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
                                                             'product': [product]*90,
                                                             'place': [place]*90,
                                                             'price': forecast_values}))
        forecast_table['price']=round(forecast_table['price'], 2)
        forecast_table.reset_index(drop=True, inplace=True)



  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting seasonal autoregressive'
  warn('Non-invertible starting seasonal moving average'
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting seasonal moving average'
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.

  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_d

  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),


In [37]:
forecast_table[(forecast_table['product'] == 'Эстус') & (forecast_table['place'] == 'Фалькония')]

Unnamed: 0,day_num,product,place,price
157,1,Эстус,Фалькония,9.34
158,2,Эстус,Фалькония,9.34
159,3,Эстус,Фалькония,9.34
160,4,Эстус,Фалькония,9.31
161,5,Эстус,Фалькония,9.34
...,...,...,...,...
242,86,Эстус,Фалькония,10.43
243,87,Эстус,Фалькония,10.39
244,88,Эстус,Фалькония,10.34
245,89,Эстус,Фалькония,10.37


In [23]:
import math

In [24]:
def get_optimal_price(last_known_price, competitor_price):
    max_price = round(competitor_price * 0.8, 2)
    min_price = last_known_price
    
    for price in np.arange(min_price, max_price + 0.01, 0.01):  # Итерация по возможным ценам
        rounded_price = round(price, 2)  # Округление до сотых
        
        if math.isclose(rounded_price, price):
            return rounded_price
    
    return None

In [25]:
df_with_cost['price'] =round(df_with_cost['price'], 2)

In [26]:
forecast_table = pd.DataFrame(columns=['day_num', 'product', 'place', 'price'])

for product in products:
    for place in places:
        # Фильтрация данных для каждого продукта и города
        subset = df_with_cost[(df_with_cost['product'] == product) & (df_with_cost['place'] == place)]
        
        # Создание SARIMAX модели и обучение
        model = SARIMAX(subset['price'], уxog=subset['amount'],order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
        model_fit = model.fit(disp=False)
        
        # Прогноз на 90 дней вперед
        exog_forecast = pd.DataFrame({'Объем продаж': [subset['amount'].iloc[-3]]*90})
        forecast = model_fit.get_forecast(steps=90, exog=exog_forecast)
        forecast_values = forecast.predicted_mean
        
        last_known_price = subset['price'].iloc[-1]
        competitor_price = subset['compet_price'].iloc[-1]
        
        # Прогноз с учетом условий изменения цены
        modified_forecast = []
        current_price = last_known_price  # Инициализация текущей цены
        consecutive_days = 1
        
        for i in range(90):
            if i % 3 == 0:
                optimal_price = get_optimal_price(current_price, competitor_price)  # Определение оптимальной цены
                
                if optimal_price is not None:
                    current_price = optimal_price
                    consecutive_days = 1
            else:
                if consecutive_days < 3:
                    current_price = round(modified_forecast[-1], 2)
                    consecutive_days += 1
            modified_forecast.append(current_price)
        
        # Создание таблицы прогноза
        forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
                                                             'product': [product]*90,
                                                             'place': [place]*90,
                                                             'price': forecast_values}))

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('No

  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_d

  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  forecast_table = forecast_table.append(pd.DataFrame({'day_num': range(1, 91),


In [27]:
forecast_table['price']=round(forecast_table['price'], 2)

In [28]:
forecast_table

Unnamed: 0,day_num,product,place,price
129,1,Целебные травы,Нокрон,9.49
130,2,Целебные травы,Нокрон,9.53
131,3,Целебные травы,Нокрон,9.50
132,4,Целебные травы,Нокрон,9.55
133,5,Целебные травы,Нокрон,9.57
...,...,...,...,...
281,86,Эльфийская пыльца,Врата Балдура,33.19
282,87,Эльфийская пыльца,Врата Балдура,33.25
283,88,Эльфийская пыльца,Врата Балдура,33.23
284,89,Эльфийская пыльца,Врата Балдура,33.29


In [None]:
forecast_table.to_parquet("../data/df_baseline.parquet")