In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.statespace.sarimax import SARIMAXResults
from sklearn.metrics import mean_absolute_error

In [2]:
plt.rcParams["figure.figsize"] = (12, 8)
plt.rcParams.update({'font.size': 11})

In [42]:
df_transaction = pd.read_parquet("../data/transaction_df.parquet")
df_competitors = pd.read_parquet("../data/df_competitors.parquet")
df_cost = pd.read_parquet("../data/df_cost.parquet")

In [43]:
df_cost

Unnamed: 0,place,product,cost,date
0,Анор Лондо,Целебные травы,2.07,2216-01-02
1,Анор Лондо,Целебные травы,3.07,2216-01-26
2,Анор Лондо,Целебные травы,3.30,2216-02-12
3,Анор Лондо,Целебные травы,4.04,2216-03-06
4,Анор Лондо,Целебные травы,3.88,2216-03-27
...,...,...,...,...
1112,Фалькония,Эстус,3.10,2218-07-29
1113,Фалькония,Эстус,7.20,2218-08-10
1114,Фалькония,Эстус,3.31,2218-08-27
1115,Фалькония,Эстус,3.26,2218-09-16


In [44]:
df_transaction['datetime'] = pd.to_datetime(df_transaction['datetime']).dt.date

In [45]:
df_transaction = df_transaction.set_index('datetime')

In [46]:
df_transaction = df_transaction.dropna()

In [47]:
df_transaction = df_transaction[(df_transaction['amount'] > 0) & (df_transaction['price'] > 0)]

In [48]:
df_transaction.groupby(['datetime', 'price', 'product','place']).agg({'amount': ['sum']}).reset_index().to_csv('../data/transaction_group.csv', index=False)  

In [49]:
df_group = pd.read_csv("../data/transaction_group.csv", parse_dates=['datetime'],index_col='datetime')

In [50]:
df_group=df_group.dropna()

In [51]:
df_competitors=df_competitors.rename(columns={'price':'compet_price','date':'datetime'})

In [52]:
df_group.merge(df_competitors, on=['datetime','product','place']).to_csv('../data/transaction_with_competitors.csv', index=False)

In [53]:
df_transaction_competitors = pd.read_csv("../data/transaction_with_competitors.csv", parse_dates=['datetime'],index_col='datetime')

In [54]:
df_transaction_competitors['amount'] = df_transaction_competitors['amount'].astype(float)
df_transaction_competitors['price'] =df_transaction_competitors['price'].astype(float)
df_transaction_competitors['compet_price'] =df_transaction_competitors['compet_price'].astype(float)

In [55]:
df_transaction_competitors=df_transaction_competitors.reset_index()

In [56]:
# def adjust_predicted_prices(predicted_prices, competitor_prices):
#     adjusted_prices = predicted_prices
#     for i in range(len(predicted_prices)):
#         if predicted_prices[i] > (1.2 * competitor_prices[i]):
#             adjusted_prices[i] = 1.2 * competitor_prices[i]
#     return adjusted_prices

In [67]:
# def sarimax_forecast(data,product, place):
#     # Создание модели SARIMAX
#     endog =df_transaction_competitors[(df_transaction_competitors['product'] == product) & (df_transaction_competitors['place'] == place)]['amount']
#     exog = df_transaction_competitors[(df_transaction_competitors['product'] == product) & (df_transaction_competitors['place'] == place)]['price']
#     model = SARIMAX(endog=endog,
#                         order=(1, 0, 0),
#                         seasonal_order=(1, 0, 0, 12),exog=exog)
#     results = model.fit()

#         # Получение прогнозов
#     forecast = results.get_forecast(steps=90, exog=[10]*10)
    
    
# #     last_price = exog.iloc[-1]['price']
# #     competitor_price = exog.iloc[-1]['compet_price']
# #     forecast[forecast > 0.8 * competitor_price] = 0.8 * competitor_price

# #     # Создание датафрейма с прогнозом
# #     forecast_df = pd.DataFrame({'Дата': pd.DataFrame(range(1, 91)), 
# #                                 'product': [product] * 90, 
# #                                 'place': [place] * 90, 
# #                                 'price': forecast})

#     return forecast

def sarimax_model(df_transaction_competitors, product, city):
    df = df_transaction_competitors[(df_transaction_competitors['product'] == product) & (df_transaction_competitors['place'] == place)]
    df = df[['datetime', 'amount']]
    df['datetime'] = pd.to_datetime(df['datetime'])
    df.set_index('datetime', inplace=True)
    
    model = SARIMAX(df['amount'], order=(1, 1, 1), seasonal_order=(1, 1, 0, 12))
    model_fit = model.fit(disp=False)
    
    forecast = model_fit.predict(start=len(df), end=len(df)+89)
    
    return forecast

In [68]:
products = df_transaction_competitors['product'].unique()
places = df_transaction_competitors['place'].unique()

In [None]:
predictions = pd.DataFrame(columns=['date', 'product', 'place', 'Forecast'])

for product in products:
    for place in places:
        forecast = sarimax_model(df_transaction_competitors, product, place)
        date_range = pd.date_range(start=df_transaction_competitors['date'].max() + pd.DateOffset(1), periods=90)
        temp_df = pd.DataFrame({'date': date_range, 'product': product, 'place': place, 'Forecast': forecast})
        predictions = pd.concat([predictions, temp_df])

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


In [66]:
# df_baseline = []
# for (place, product) in df_transaction_competitors.groupby(["place", "product"]):
#     temp_df = pd.DataFrame(range(1, 91), columns=["day_num"])
#     temp_df["place"] = place
#     temp_df["product"] = product
#     forecast = sarimax_forecast(df_transaction_competitors, product, place)
#     temp_df["price"] = forecast.sort_values("datetime").iloc[-1]["price"]
#     df_baseline.append(temp_df)
# df_baseline = pd.concat(df_baseline)

In [60]:
# # prices= df_transaction_competitors['price']
# results = []

# for product in df_transaction_competitors['product'].unique():
#     for place in  df_transaction_competitors['place'].unique():
#         df_transaction_competitors[(df_transaction_competitors['product'] == product) & (df_transaction_competitors['place'] == place)]['price']
#         forecast = sarimax_forecast(df_transaction_competitors, product, place)
#         results.append(forecast)
# results_df = pd.concat(results)

In [61]:
# def make_forecast(model, prices):
#     forecast = model.get_forecast(steps=90, exog=prices)
#     return forecast

In [62]:

#     # Создание таблицы с предсказанными ценами
#     forecast_prices = pd.DataFrame({'day_num': pd.DataFrame(range(1, 91)),
#                                     'product': product,
#                                     'place': place,
#                                     'price': forecast.predicted_mean})

#     # Корректировка предсказанных цен
#     forecast_prices['price'] = adjust_predicted_prices(forecast_prices['price'], competitor_prices)

In [64]:
# predicted_prices_table = pd.DataFrame(columns=['Product', 'City', 'Predicted_price'])
# for product in df_transaction_competitors['product'].unique():
#     for place in  df_transaction_competitors['place'].unique():
#         prices = df_transaction_competitors[(df_transaction_competitors['product'] == product) & (df_transaction_competitors['place'] == place)]['price']
#         competitor_prices = df_transaction_competitors[(df_transaction_competitors['product'] == product) & (df_transaction_competitors['place'] == place)]['compet_price']
#         forecast = sarimax_forecast(product, place, prices, competitor_prices)
# #         forecast_table = forecast_table.append(forecast)
#         competitor_prices = df_transaction_competitors[(df_transaction_competitors['product'] == product) & (df_transaction_competitors['place'] == place)]['compet_price'].values
#         forecast_prices = adjust_predicted_prices(forecast, competitor_prices)
        
#         # Добавление результатов в таблицу
#         temp_df = pd.DataFrame({'Product': [product]*90, 'place': [place]*90, 'price': adjusted_prices})
#         predicted_prices_table = pd.concat([predicted_prices_table, temp_df])

# # Вывод таблицы с предсказанными ценами
# print(forecast_table)

In [None]:
# # Функция для корректировки предсказанных цен
# def adjust_predicted_prices(predicted_prices, competitor_prices):
#     adjusted_prices = predicted_prices
#     for i in range(len(predicted_prices)):
#         if predicted_prices[i] > (1.2 * competitor_prices[i]):
#             adjusted_prices[i] = 1.2 * competitor_prices[i]
#     return adjusted_prices

In [None]:
# predicted_prices_table = pd.DataFrame(columns=['product', 'place', 'price'])
# for product in df_transaction_competitors['product'].unique():
#     for place in df_transaction_competitors['place'].unique():
#         # Построение модели и предсказание объема продаж
#         forecast = build_sarimax_model(df_transaction_competitors, product, place)
#         predicted_prices=forecast.predicted_mean
        
#         # Коррекция предсказанных цен
#         competitor_prices = df_transaction_competitors[(df_transaction_competitors['product'] == product) & (df_transaction_competitors['place'] == place)]['compet_price'].values
#         adjusted_prices = adjust_predicted_prices(predicted_prices, competitor_prices)
        
#         # Добавление результатов в таблицу
#         temp_df = pd.DataFrame({'product': [product]*90, 'place': [place]*90, 'price': adjusted_prices})
#         predicted_prices_table = pd.concat([predicted_prices_table, temp_df])

# # Вывод таблицы с предсказанными ценами
# print(predicted_prices_table)

In [65]:
# def forecast_sales(df, product, city):
#     # Выбор нужных столбцов
#     sales = df[['Дата продажи', 'Объем продаж ' + product + ' в ' + city, 'Цена ' + product, 'Цена конкурента ' + product]]
#     sales['Дата продажи'] = pd.to_datetime(sales['Дата продажи'])
#     sales.set_index('Дата продажи', inplace=True)
    
#     # Создание модели SARIMAX
#     model = SARIMAX(sales['Объем продаж ' + product + ' в ' + city])

#     # Обучение модели
#     model_fit = model.fit()

#     # Прогнозирование продаж на 90 дней вперед
#     forecast = model_fit.forecast(steps=90)

#     # Проверка условия на цену (не больше 20% от цены конкурента)
#     last_price = sales.iloc[-1]['Цена ' + product]
#     competitor_price = sales.iloc[-1]['Цена конкурента ' + product]
#     forecast[forecast > 0.8 * competitor_price] = 0.8 * competitor_price

#     # Создание датафрейма с прогнозом
#     forecast_df = pd.DataFrame({'Дата': pd.date_range(start=sales.index[-1] + pd.DateOffset(1), periods=90), 
#                                 'Продукт': [product] * 90, 
#                                 'Город': [city] * 90, 
#                                 'Цена': forecast})

#     return forecast_df
