In [1]:
BASE_DIR = '/Users/efraflores/Desktop/EF/Corner/Promos'

In [2]:
from pathlib import Path

from math import ceil
from datetime import timedelta, date
from pandas import DataFrame, Series, read_csv, to_datetime


class PromoResults:
    def __init__(self, base_dir: str) -> None:
        self.base_dir = Path(base_dir)
        self.stores_file = self.base_dir.joinpath('stores.csv')
        self.promo_file = self.base_dir.joinpath('promo_results.csv')
        for needed_file in [self.stores_file, self.promo_file]:
            if not needed_file.is_file():
                file_name = str(needed_file).split('/')[-1]
                print(f'There should be a file called "{file_name}" at:\n{self.base_dir}\nAdd it and try again!')
        self.orders_files = self.base_dir.glob('orders*.csv')


    def read_files(self) -> None:
        self.stores = read_csv(self.stores_file)
        self.promo = read_csv(self.promo_file, sep='\t', encoding='utf-16')
        self.orders = DataFrame()
        for file_chunk in self.orders_files:
            self.orders = self.orders.append(read_csv(file_chunk), ignore_index=True)


    def mod_stores_data(self, id_col: str) -> None:
        self.stores[id_col] = self.stores[id_col].str.replace(',','').map(int)
        self.stores.drop_duplicates(id_col, inplace=True)
        self.stores.reset_index(drop=True, inplace=True)
        self.stores['store'] = self.stores['store'].map(lambda x: x.title())
        self.stores_id_col = id_col


    def mod_promo_data(self, id_col: str, start_date_col: str, end_date_col: str) -> None:
        self.promo.drop_duplicates(id_col, inplace=True)
        self.promo.reset_index(drop=True, inplace=True)
        for col in [start_date_col, end_date_col]:
            self.promo[col] = to_datetime(self.promo[col], dayfirst=True)
        self.promo_id_col = id_col
        self.promo_start_date = start_date_col
        self.promo_end_date = end_date_col


    def mod_orders_data(self, id_col: str, date_col: str, store_col: str, sum_ticket_col: str) -> None:
        self.orders.drop_duplicates(id_col, inplace=True)
        self.orders.reset_index(drop=True, inplace=True)
        new_date_col = 'date order'
        self.orders[new_date_col] = to_datetime(self.orders[date_col], dayfirst=True)
        self.orders[sum_ticket_col] = self.orders[sum_ticket_col].map(lambda x: float(str(x).replace('$','').replace(',','')))

        self.orders.rename(columns={store_col:self.stores_id_col}, inplace=True)
        self.orders_id_col = id_col
        self.orders_date_col = new_date_col
        self.orders_ticket_col = sum_ticket_col


    def get_store(self, col_to_search: str, n_words: int=1) -> None:
        promo_copy = self.promo.copy()
        promo_copy['store'] = promo_copy[col_to_search].map(lambda x: ''.join(str(x).split()[:n_words]).title())
        promo_copy = promo_copy.merge(self.stores, on='store', how='left')
        promo_null = promo_copy[promo_copy[self.stores.columns[-1]].isnull()][self.promo.columns].copy()
        promo_copy.dropna(subset=[self.stores.columns[-1]], inplace=True)

        promo_null['store'] = promo_null[col_to_search].map(lambda x: ' '.join(str(x).split()[:n_words+1]).title())
        promo_null = promo_null.merge(self.stores, on='store', how='left')
        promo_copy = promo_copy.append(promo_null)

        self.promo = promo_copy.reset_index(drop=True).copy()


    def query_orders(self, project_id: int, store_id: int, start_date: date, end_date: date, user_col: str, move_n_period: int, just_comparable: bool=False) -> DataFrame:

        duration = (end_date - start_date).days
        n_weeks = ceil(duration/7)
        new_start = start_date - timedelta(days=n_weeks*7)*move_n_period
        new_end = end_date + timedelta(days=7 + duration)*move_n_period

        store_orders = self.orders[self.orders[self.stores_id_col]==store_id].copy()

        store_orders = store_orders[
            (store_orders[self.orders_date_col]>=new_start)&
            (store_orders[self.orders_date_col]<=new_end)
        ].copy()

        promo_label = []
        for row_date in store_orders[self.orders_date_col]:
            if row_date >= end_date:
                if row_date < end_date + timedelta(days=7): promo_label.append('04 Post not comparable')
                else: promo_label.append('03 Post comparable')
            elif row_date >= start_date: promo_label.append('02 Promo running')
            elif row_date >= new_start:
                if row_date > new_start + timedelta(days=duration): promo_label.append('01.5 Pre not comparable')
                else: promo_label.append('01 Pre comparable')
            else: promo_label.append('???')

        store_orders['promo_label'] = promo_label
        store_orders[self.orders_date_col] = store_orders[self.orders_date_col].map(lambda x: x.strftime(r"'%Y-%m-%d %H:%M"))

        if just_comparable:
            store_orders = store_orders[store_orders['promo_label'].isin(['01 Pre comparable','02 Promo running','03 Post comparable'])]
            store_orders = store_orders.replace({'01 Pre comparable':'01 Pre','02 Promo running':'02 Promo','03 Post comparable':'03 Post'})

        store_results = store_orders.pivot_table(
            index=self.stores_id_col,
            columns='promo_label',
            aggfunc={
                self.orders_date_col:['min','max'],
                self.orders_id_col:'count',
                user_col:Series.nunique,
                self.orders_ticket_col:['sum','mean']
            }
        )

        store_results.columns = [f'{z} {x} {y}' for x,y,z in store_results.columns]
        store_results = store_results[sorted(store_results.columns)].copy()

        store_results.reset_index(inplace=True)
        store_results[self.promo_id_col] = project_id
        # store_results.rename(columns={store_id_col:self.stores_id_col}, inplace=True)
        return store_results


    def summary(self, **kwargs) -> None:
        self.acum = DataFrame()
        for project, store, start_date, end_date in zip(self.promo[self.promo_id_col], self.promo[self.stores_id_col], self.promo[self.promo_start_date], self.promo[self.promo_end_date]):
            store_results = self.query_orders(project, store, start_date, end_date, **kwargs)
            self.acum = self.acum.append(store_results, ignore_index=True)

        self.total = self.promo.merge(self.acum).drop('index', axis=1)
        self.total.dropna(subset=['02 Promo Order ID count'], inplace=True)
        self.total.to_csv(self.base_dir.joinpath('summary.csv'), index=False, sep='\t', encoding='utf-16')

In [3]:
pr = PromoResults(BASE_DIR)

pr.read_files()
pr.mod_stores_data(id_col='store_id')
pr.mod_promo_data(id_col='project ID', start_date_col='project valid from', end_date_col='project valid until')
pr.mod_orders_data(id_col='Order ID', store_col='Store ID', date_col='Created at Local Time Time', sum_ticket_col='Total Ticket')
pr.get_store(col_to_search='project name')

In [4]:
pr.summary(user_col='User ID', move_n_period=4, just_comparable=True)
pr.total.sample(4)

Unnamed: 0,CPG,project ID,project name,project valid from,project valid until,project orders,budget used,project duration (days),completed days (%),sales,...,02 Promo Total Ticket sum,02 Promo User ID nunique,02 Promo date order max,02 Promo date order min,03 Post Order ID count,03 Post Total Ticket mean,03 Post Total Ticket sum,03 Post User ID nunique,03 Post date order max,03 Post date order min
293,Partners (Dummy),29007,Chedraui - Mascotas 17-29 Enero 2022,2022-01-17 06:00:00,2022-01-30 02:00:00,439,6085.46,13,100%,71310.55,...,65914210.0,38181.0,'2022-01-30 01:55,'2022-01-17 06:01,306207.0,1100.748333,337056800.0,99498.0,'2022-04-16 01:58,'2022-02-06 02:02
299,Partners (Dummy),28887,Costco Cuponera01 Farmacia 12 - 23ene 2022,2022-01-12 06:00:00,2022-01-24 05:55:55,930,6553.15,12,100%,460803.33,...,46324150.0,23638.0,'2022-01-24 05:40,'2022-01-12 06:05,137557.0,1612.947595,221872200.0,68357.0,'2022-04-06 05:32,'2022-01-31 06:18
359,Partners (Dummy),33756,Carnes Ramos DDM,2022-05-02 05:00:00,2022-05-12 05:00:00,706,34742.45,10,100%,348974.21,...,449945.8,671.0,'2022-05-11 23:07,'2022-05-02 08:17,,,,,,
379,Partners (Dummy),32907,La europea Semana Santa 022,2022-04-11 05:00:00,2022-04-25 05:00:00,101,17716.95,14,93%,70203.43,...,506323.2,509.0,'2022-04-24 21:37,'2022-04-11 10:53,846.0,1106.081572,935745.0,773.0,'2022-05-17 18:53,'2022-05-02 08:23
