In [None]:
import itertools
import pandas as pd

from tree_predictor import TreePredictor
from utils import Utils

In [None]:
config = Utils.read_config_for_env(config_path='../config/config.yml')
treepredictor = TreePredictor(config)

In [None]:
df = treepredictor.data_daily_train

In [None]:
def create_df_full(df, columns):
    # Generate all possible combinations of shops, items, and dates.
    shops = df[columns[0]].unique()
    items = df[columns[1]].unique()
    dates = df[columns[2]].unique()
    all_combinations = list(itertools.product(shops, items, dates))
    df_all = pd.DataFrame(all_combinations, columns=columns)
    return df_all

def create_df_all(df, coldict):
    shop_item_pairs = df[[coldict['shops'], coldict['items']]].drop_duplicates()

    dates = df[coldict['date']].unique()

    # Create a list of all possible combinations of shop-item pairs with the dates
    df_all = pd.DataFrame(
        list(itertools.product(shop_item_pairs.values, dates)),
        columns=['shop_item', coldict['date']])
    # Split the 'shop_item' column back into separate 'shop_id' and 'item_id' columns
    df_all[[coldict['shops'], coldict['items']]] = pd.DataFrame(
        df_all['shop_item'].tolist(), 
        index=df_all.index)
    # Drop the intermediate 'shop_item' column
    df_all = df_all.drop(columns=['shop_item'])

    return df_all

def add_avgprice_to_df_all(df_all, df):
    # add prices (avg)
    return df_all

def add_catgory_to_df_all(df_all, df):
    # add category
    return df_all

def create_df_with_zero_sales(df, df_all, coldict):
    # Merge with the original dataframe
    df_merged = pd.merge(
        df_all,
        df,
        on=[coldict['shops'], coldict['items'], coldict['date']],
        how='left')
    # Fill missing values with 0
    df_merged['amount'].fillna(0, inplace=True)
    return df_merged

In [None]:
df_items_monthly_grouped = df.groupby(
    ['shop_id', 'item_id', 'monthly_period']
    )

df_items_monthly = df_items_monthly_grouped.agg(
    {
    'item_category_id': 'first',
    'price': 'mean',
    'amount': 'sum',
    }
).reset_index()

df_items_monthly

In [None]:
coldict = {'shops': 'shop_id', 'items': 'item_id', 'date': 'monthly_period'}
df_all = create_df_all(df_items_monthly, coldict)
df_all.shape

In [None]:
# df_all = add_avgprice_to_df_all(df_all, df)

# df_all = add_catgory_to_df_all(df_all, df)

create_df_with_zero_sales(df, df_all, coldict)


In [None]:
df_categories_monthly_grouped = df.groupby(
    ['shop_id', 'item_category_id', 'monthly_period']
    )

df_categories_monthly = df_categories_monthly_grouped.agg(
    {
    'amount': 'sum',
    }
).reset_index()

df_categories_monthly

In [None]:
df_monthly = pd.merge(
    df_items_monthly,
    df_categories_monthly,
    how='left',
    on=['shop_id', 'item_category_id', 'monthly_period'],
    suffixes=('_item', '_cat'))
df_monthly