# Parameters

In [1]:
BASE_DIR = '/Users/efraflores/Desktop/EF/Corner/Requests/new_users'
FILE_BASE_NAME = 'new_users'

# Code

In [2]:

from pathlib import Path
from IPython.display import display

# Ingeniería de variables
from numpy import array
from re import search as re_search
from pandas import DataFrame, Series, offsets, read_csv, to_datetime

class NewUsers:
    def __init__(self, base_dir: str, file_base_name: str) -> None:
        # Convierte el texto a objeto tipo Path para unir directorios, buscar archivos, etc
        self.base_dir = Path(base_dir)
        # Asigna el nombre base como atributo 
        self.file_name = file_base_name
        # Encuentra todos los archivos que comiencen con el nombre base en el directorio
        self.files_list = [x for x in self.base_dir.glob('*') if re_search(f'{self.file_name}_(?!result).+\.csv', str(x))]

    
    def read_files(self) -> DataFrame:
        '''
        Une todos los archivos que comienzan con el nombre base
        '''
        # Tabla vacía para ir depositando los csv
        self.df = DataFrame()
        # Obtiene el número de archivos, sólo es informativo
        total_files = len(self.files_list)

        for i,file_chunk in enumerate(self.files_list):
            # Obtener sólo el nombre del archivo, no su ubicación completa
            sub_name = str(file_chunk).split('/')[-1]
            aux = read_csv(file_chunk)
            aux['file'] = sub_name
            # Une la tabla anterior con el nuevo archivo
            self.df = self.df.append(aux, ignore_index=True)
            
            # Informa al usuario del avance
            print(f'Archivo {i+1}/{total_files} con nombre: {sub_name} es importado exitosamente')


    def get_quarter(self, date_col: str='Delivered at Local Time Dynamic')-> None:
        self.df[date_col] = to_datetime(self.df[date_col])
        self.df['year'] = self.df[date_col].dt.year
        self.df['month'] = self.df[date_col].dt.month
        self.df['quarter'] = self.df[date_col].dt.quarter #map(lambda x: str(x.to_period('Q').to_timestamp().date))
        self.df['year_quarter'] = self.df['year'].astype(str) + '_' + self.df['quarter'].map(lambda x: str(x).zfill(2))


    def user_rank(self, quarter_col: str='year_quarter', cols_to_group: list=['User ID', 'Order Platform', 'year_quarter'], cols_to_sum: list=['Orders Count']) -> None:
        self.df = self.df.groupby(cols_to_group)[cols_to_sum].sum().reset_index()
        tmp = self.df.groupby([x for x in cols_to_group if x!='year_quarter']).size()
        rank = tmp.map(range)
        rank =[item for sublist in rank for item in sublist]
        self.df['user_rank'] = rank
        self.df['user_rank'] = self.df['user_rank'] + 1
        
        tot_quarters = self.df[[quarter_col]].drop_duplicates().sort_values(quarter_col).reset_index(drop=True)
        tot_quarters.reset_index(inplace=True)
        self.df = self.df.merge(tot_quarters)
        self.df.sort_values(cols_to_group, inplace=True)
        self.df['prev_quarter'] = self.df.groupby([x for x in cols_to_group if x!=quarter_col])['index'].shift()
        self.df['quarter_diff'] = self.df['index'] - self.df['prev_quarter']
        self.df['quarter_diff_group'] = self.df['quarter_diff'].map(lambda x: 3 if x>=3 else x)
        # self.df['quarter_diff_group'] = self.df['quarter_diff_group'].fillna(0)

    def summary(self, quarter_col: str='year_quarter') -> None:

        orders = self.df.pivot_table(index=quarter_col, columns='Order Platform', values='Orders Count', aggfunc=sum)

        total_users = self.df.pivot_table(index=quarter_col, columns='Order Platform', values='User ID', aggfunc=Series.nunique)
        new_users = self.df[self.df['user_rank']==1].copy()
        new_users = new_users.pivot_table(index=quarter_col, columns='Order Platform', values='User ID', aggfunc=Series.nunique)
        
        total_users.rename(columns={x:f'{x}_total_users' for x in total_users.columns}, inplace=True)
        new_users.rename(columns={x:f'{x}_new_users' for x in new_users.columns}, inplace=True)
        
        self.total = orders.join(total_users.join(new_users))

        prev_vs_actual = self.df.pivot_table(index=quarter_col, columns=['Order Platform','quarter_diff_group'], values='index', aggfunc='count')
        prev_vs_actual.columns = [f'{x[0]}_{int(x[-1])}_Q_diff' for x in prev_vs_actual.columns]

        self.total = self.total.join(prev_vs_actual)
        self.total.to_csv(self.base_dir.joinpath(f'{self.file_name}_result.csv'))
        display(self.total)

    
    def full_pipeline(self) -> None:
        self.read_files()
        self.get_quarter()
        self.user_rank()
        self.summary()


In [7]:
nu.df[nu.df['quarter_diff_group']==1].to_csv(nu.base_dir.joinpath('test.csv'))

In [3]:
nu = NewUsers(BASE_DIR, FILE_BASE_NAME)
nu.full_pipeline()

Archivo 1/4 con nombre: new_users_2015_2019.csv es importado exitosamente
Archivo 2/4 con nombre: new_users_2021.csv es importado exitosamente
Archivo 3/4 con nombre: new_users_2020.csv es importado exitosamente
Archivo 4/4 con nombre: new_users_2022.csv es importado exitosamente


Unnamed: 0_level_0,Cornershop,Uber,Uber Postmates,Cornershop_total_users,Uber_total_users,Uber Postmates_total_users,Cornershop_new_users,Uber_new_users,Uber Postmates_new_users,Cornershop_0_Q_diff,Cornershop_1_Q_diff,Cornershop_2_Q_diff,Cornershop_3_Q_diff,Uber_0_Q_diff,Uber_1_Q_diff,Uber_2_Q_diff,Uber_3_Q_diff,Uber Postmates_0_Q_diff
year_quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2015_02,192.0,,,108.0,,,108.0,,,108.0,,,,,,,,
2015_03,2990.0,,,1335.0,,,1268.0,,,1268.0,67.0,,,,,,,
2015_04,7936.0,,,2454.0,,,1859.0,,,1859.0,586.0,9.0,,,,,,
2016_01,17096.0,,,4894.0,,,3354.0,,,3354.0,1424.0,112.0,4.0,,,,,
2016_02,33596.0,,,7980.0,,,4735.0,,,4735.0,2996.0,195.0,54.0,,,,,
2016_03,44853.0,,,10509.0,,,5240.0,,,5240.0,4830.0,321.0,118.0,,,,,
2016_04,55600.0,,,12269.0,,,4835.0,,,4835.0,6621.0,547.0,266.0,,,,,
2017_01,70941.0,,,15929.0,,,6739.0,,,6739.0,7948.0,748.0,494.0,,,,,
2017_02,91593.0,,,23216.0,,,11020.0,,,11020.0,10459.0,1017.0,720.0,,,,,
2017_03,113250.0,,,25179.0,,,10301.0,,,10301.0,12957.0,1076.0,845.0,,,,,
