# Parámetro

In [9]:
BASE_DIR = '/Users/efrain.flores/Desktop/EF/Corner/Promos'

# Código

In [10]:
from pathlib import Path

from math import ceil
from datetime import timedelta, date
from pandas import DataFrame, Series, Timedelta, read_csv, to_datetime


class PromoResults:
    def __init__(self, base_dir: str) -> None:
        self.base_dir = Path(base_dir)
        self.stores_file = self.base_dir.joinpath('stores.csv')
        self.promo_file = self.base_dir.joinpath('promo_results.csv')
        for needed_file in [self.stores_file, self.promo_file]:
            if not needed_file.is_file():
                file_name = str(needed_file).split('/')[-1]
                print(f'There should be a file called "{file_name}" at:\n{self.base_dir}\nAdd it and try again!')
        self.orders_files = self.base_dir.glob('orders*.csv')


    def read_files(self) -> None:
        self.stores = read_csv(self.stores_file)
        self.promo = read_csv(self.promo_file, sep='\t', encoding='utf-16')
        self.orders = DataFrame()
        for file_chunk in self.orders_files:
            self.orders = self.orders.append(read_csv(file_chunk), ignore_index=True)


    def mod_stores_data(self, id_col: str) -> None:
        self.stores[id_col] = self.stores[id_col].str.replace(',','').map(int)
        self.stores['store'] = self.stores['store'].astype(str).str.lower()
        self.stores_id_col = id_col


    def mod_promo_data(self, id_col: str, start_date_col: str, end_date_col: str) -> None:
        self.promo.drop_duplicates(id_col, inplace=True)
        self.promo.reset_index(drop=True, inplace=True)
        for col in [start_date_col, end_date_col]:
            self.promo[col] = to_datetime(self.promo[col], dayfirst=True)
        self.promo_id_col = id_col
        self.promo_start_date = start_date_col
        self.promo_end_date = end_date_col


    def mod_orders_data(self, id_col: str, date_col: str, store_col: str, sum_ticket_col: str) -> None:
        self.orders.drop_duplicates(id_col, inplace=True)
        self.orders.reset_index(drop=True, inplace=True)
        new_date_col = 'order_date'
        self.orders[new_date_col] = to_datetime(self.orders[date_col], yearfirst=True)
        self.orders[sum_ticket_col] = self.orders[sum_ticket_col].map(lambda x: float(str(x).replace('$','').replace(',','')))

        self.orders.rename(columns={store_col:self.stores_id_col}, inplace=True)
        self.orders_id_col = id_col
        self.orders_date_col = new_date_col
        self.orders_ticket_col = sum_ticket_col
        self.max_order_date = self.orders[new_date_col].max()


    def get_store(self, col_to_search: str, n_words: int=1) -> None:
        promo_copy = self.promo.copy()
        promo_copy[col_to_search] = promo_copy[col_to_search].astype(str).str.lower()
        promo_copy['store'] = promo_copy[col_to_search].map(lambda x: ' '.join(x.split()[:n_words]))
        promo_copy = promo_copy.merge(self.stores, on='store', how='left')
        promo_null = promo_copy[promo_copy[self.stores.columns[-1]].isnull()][self.promo.columns].copy()
        promo_copy.dropna(subset=[self.stores.columns[-1]], inplace=True)

        promo_null['store'] = promo_null[col_to_search].map(lambda x: ' '.join(x.split()[:n_words+1]))
        promo_null = promo_null.merge(self.stores, on='store', how='left')
        promo_copy = promo_copy.append(promo_null)

        promo_null_twice = promo_copy[promo_copy[self.stores.columns[-1]].isnull()][self.promo.columns].copy()
        promo_copy.dropna(subset=[self.stores.columns[-1]], inplace=True)
        promo_null_twice['store'] = promo_null_twice[col_to_search].map(lambda x: ' '.join(x.split()[:n_words+2]))
        promo_null_twice = promo_null_twice.merge(self.stores, on='store', how='left')
        promo_copy = promo_copy.append(promo_null_twice)
        
        promo_null_again = promo_copy[promo_copy[self.stores.columns[-1]].isnull()][self.promo.columns].copy()
        promo_copy.dropna(subset=[self.stores.columns[-1]], inplace=True)
        promo_null_again['store'] = promo_null_again[col_to_search].map(lambda x: ' '.join(x.split()[:n_words+3]))
        promo_null_again = promo_null_again.merge(self.stores, on='store', how='left')
        promo_copy = promo_copy.append(promo_null_again)


        self.promo = promo_copy.reset_index(drop=True).copy()
        self.promo.drop_duplicates(self.promo_id_col, inplace=True)


    def query_orders(self, project_id: int, store_id: int, start_date: date, end_date: date, user_col: str, move_n_period: int, verbose: bool=False, just_comparable: bool=False) -> DataFrame:

        duration_seconds = Timedelta.total_seconds(end_date - start_date)
        duration_days = duration_seconds/(60*60*24)
        duration_weeks = ceil(duration_days/7)
        n_weeks = max(move_n_period, duration_weeks)

        new_start_start = start_date - timedelta(days=7*n_weeks)
        new_start_end = new_start_start + timedelta(seconds=duration_seconds)
        new_end_start = start_date + timedelta(days=7*n_weeks)
        new_end_end = new_end_start + timedelta(seconds=duration_seconds)

        if verbose: 
            print(f'''
            For project {int(project_id)}: {store_id}
            promo was from {start_date} to {end_date} ({duration_days:.2f} days ~{duration_weeks} weeks)
            and it will be analized -{n_weeks} weeks:
            from {new_start_start} to {new_start_end} ({Timedelta.total_seconds(new_start_end-new_start_start)/(60*60*24):.2f} days)
            ''')

        store_orders = self.orders[self.orders[self.stores_id_col]==store_id].copy()

        store_orders = store_orders[
            (store_orders[self.orders_date_col]>=new_start_start)&
            (store_orders[self.orders_date_col]<=new_end_end)
        ].copy()

        promo_label = []
        for row_date in store_orders[self.orders_date_col]:
            if row_date >= end_date:
                if row_date < new_end_start: promo_label.append('04 Post not comparable')
                else: promo_label.append('03 Post comparable')
            elif row_date >= start_date: promo_label.append('02 Promo running')
            elif row_date >= new_start_start:
                if row_date > new_start_end: promo_label.append('01.5 Pre not comparable')
                else: promo_label.append('01 Pre comparable')
            else: promo_label.append('???')

        store_orders['promo_label'] = promo_label
        store_orders[self.orders_date_col] = store_orders[self.orders_date_col].map(lambda x: x.strftime(r"'%Y-%m-%d %H:%M"))

        if just_comparable:
            store_orders = store_orders[store_orders['promo_label'].isin(['01 Pre comparable','02 Promo running','03 Post comparable'])]
            store_orders = store_orders.replace({'01 Pre comparable':'01 Pre','02 Promo running':'02 Promo','03 Post comparable':'03 Post'})

        store_results = store_orders.pivot_table(
            index=self.stores_id_col,
            columns='promo_label',
            aggfunc={
                self.orders_date_col:['min','max'],
                self.orders_id_col:'count',
                user_col:Series.nunique,
                self.orders_ticket_col:['sum','mean']
            }
        )

        store_results.columns = [f'{z} {x} {y}' for x,y,z in store_results.columns]
        store_results = store_results[sorted(store_results.columns)].copy()

        store_results.reset_index(inplace=True)
        store_results[self.promo_id_col] = project_id
        return store_results


    def summary(self, **kwargs) -> None:
        self.acum = DataFrame()
        for project, store, start_date, end_date in zip(self.promo[self.promo_id_col], self.promo[self.stores_id_col], self.promo[self.promo_start_date], self.promo[self.promo_end_date]):
            store_results = self.query_orders(project, store, start_date, end_date, **kwargs)
            self.acum = self.acum.append(store_results, ignore_index=True)

# Importar

In [11]:
pr = PromoResults(BASE_DIR)
pr.read_files()

# Transformar

In [12]:
pr.mod_stores_data(id_col='store_id')
pr.mod_promo_data(id_col='project ID', start_date_col='project valid from', end_date_col='project valid until')
pr.mod_orders_data(id_col='Order ID', store_col='Store ID', date_col='Created at Local Time Time', sum_ticket_col='Total Ticket')

In [13]:
pr.get_store(col_to_search='project name')
pr.promo[pr.promo['store_id'].isnull()].shape

(2349, 26)

# Resumen

In [14]:
pr.summary(user_col='User ID', move_n_period=4, just_comparable=False, verbose=False)

In [17]:
pr.total = pr.promo.merge(pr.acum, on=['project ID','store_id'], how='inner').drop('index', axis=1)
pr.total.head()

Unnamed: 0,CPG,project ID,project name,project valid from,project valid until,project orders,budget used,project duration (days),completed days (%),sales,...,03 Post comparable Total Ticket sum,03 Post comparable User ID nunique,03 Post comparable order_date max,03 Post comparable order_date min,04 Post not comparable Order ID count,04 Post not comparable Total Ticket mean,04 Post not comparable Total Ticket sum,04 Post not comparable User ID nunique,04 Post not comparable order_date max,04 Post not comparable order_date min
0,2 Cerritos,25045,miniso 30off toda la tienda 16-31 oct,2021-10-16 06:00:00,2021-11-01 03:00:00,453,70599.18,16,100%,238238.25,...,344423.46,634.0,'2021-11-23 21:31,'2021-11-08 04:09,204.0,535.178137,109176.34,200.0,'2021-11-08 02:48,'2021-11-01 08:16
1,2 Cerritos,21247,soriana jr vinos y licores 16-17 jul 21,2021-07-16 05:00:34,2021-07-17 20:19:29,272,67070.38,1,100%,239105.55,...,2042126.96,2008.0,'2021-07-26 11:38,'2021-07-24 20:20,9626.0,921.343381,8868851.39,8535.0,'2021-07-24 20:19,'2021-07-17 20:20
2,Chedraui,28229,chedraui - martimiércoles 14 y 15 diciembre,2021-12-21 18:00:00,2021-12-23 06:00:00,2269,42443.31,2,100%,192163.66,...,6200151.55,5974.0,'2021-12-31 17:57,'2021-12-30 06:04,22373.0,1040.87692,23287539.34,17847.0,'2021-12-30 05:57,'2021-12-23 06:02
3,Cornershop (Dummy),35982,sanborns tddp 20% off gdl- hmo- merida- mexica...,2022-06-11 05:00:00,2022-06-20 05:00:00,35,5827.6,9,89%,29138.0,...,62660.11,99.0,'2022-06-30 16:53,'2022-06-27 09:05,169.0,662.238166,111918.25,163.0,'2022-06-26 22:42,'2022-06-20 07:50
4,Cornershop (Dummy),35979,sanborns tddp 20% off slp-toluca,2022-06-11 05:00:00,2022-06-20 05:00:00,19,3794.2,9,100%,18971.0,...,62660.11,99.0,'2022-06-30 16:53,'2022-06-27 09:05,169.0,662.238166,111918.25,163.0,'2022-06-26 22:42,'2022-06-20 07:50


# Exportar

In [18]:
pr.total.to_csv(pr.base_dir.joinpath('summary.csv'), index=False, sep='\t', encoding='utf-16')