In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

from statsmodels.tsa.api import ExponentialSmoothing
from statsmodels.tsa.statespace.sarimax import SARIMAX

from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import ParameterGrid

from tqdm.auto import tqdm

import json

import warnings
warnings.filterwarnings('ignore')

## Витрина для GMV

In [2]:
dates = list(pd.date_range(end = "2023-12-15", periods = 135, freq = "D"))
dates_update = [i + timedelta(days=1) for i in dates]
categories= ["Detective", "Fantasy", "Horror", "Romance novel", "Graphic novel"]

mu = 400
sigma = 1000
max_count = 10

In [3]:
df = pd.DataFrame(columns=["created_at", "business_date", "category_name", "category_gmv"])

row = 0

for d in range(0, len(dates)):
    for cat in categories:
        df.loc[row, "business_date"] = dates[d].date()
        df.loc[row, "created_at"] = dates_update[d].date()
        df.loc[row, "category_name"] = cat
        # цена товара
        price = np.round(np.random.normal(mu, sigma))
        while price <= 0:
            price = np.round(np.random.normal(mu, sigma))
        # количество
        count = random.randint(0, max_count)
        df.loc[row, "category_gmv"] = int(price * count)
        row += 1

In [5]:
df

Unnamed: 0,created_at,business_date,category_name,category_gmv
0,2023-08-04,2023-08-03,Detective,106
1,2023-08-04,2023-08-03,Fantasy,0
2,2023-08-04,2023-08-03,Horror,5360
3,2023-08-04,2023-08-03,Romance novel,3510
4,2023-08-04,2023-08-03,Graphic novel,3460
...,...,...,...,...
670,2023-12-16,2023-12-15,Detective,0
671,2023-12-16,2023-12-15,Fantasy,7140
672,2023-12-16,2023-12-15,Horror,1294
673,2023-12-16,2023-12-15,Romance novel,12321


In [14]:
df.to_csv("data_gmv_example.csv", index=False)

## Витрина по клиентам

In [15]:
dates = list(pd.date_range(end = "2023-12-14", periods = 365 , freq = "D"))
dates_update = [i + timedelta(days=1) for i in dates]

mu = 300
sigma = 500

In [16]:
df = pd.DataFrame(columns=["created_at", "business_date", "customer_id", "customer_gmv", "customer_category"])

row = 0

for d in range(0, len(dates)):
    df.loc[row, "business_date"] = dates[d].date()
    df.loc[row, "created_at"] = dates_update[d].date()
    df.loc[row, "customer_id"] = "client_" + str(random.randint(1, 75))
    df.loc[row, "customer_category"] = "cat" + str(random.randint(1, 10))
    # цена товара
    price = np.round(np.random.normal(mu, sigma))
    while price <= 0:
        price = np.round(np.random.normal(mu, sigma))
    # количество
    count = random.randint(1, 6)
    df.loc[row, "customer_gmv"] = int(price * count)
    row += 1


In [21]:
df.drop("business_date", axis=1, inplace=True)

In [22]:
df

Unnamed: 0,created_at,customer_id,customer_gmv,customer_category
0,2022-12-16,client_2,2235,cat1
1,2022-12-17,client_3,2238,cat7
2,2022-12-18,client_26,4794,cat2
3,2022-12-19,client_38,2928,cat6
4,2022-12-20,client_64,805,cat6
...,...,...,...,...
360,2023-12-11,client_8,843,cat6
361,2023-12-12,client_30,600,cat6
362,2023-12-13,client_17,5514,cat9
363,2023-12-14,client_68,1608,cat2


In [23]:
df.to_csv("data_client_example.csv", index=False)