# Always On

## Parámetros

In [1]:
BASE_DIR = '/Users/efraflores/Desktop/EF/Corner/AlwaysOn/data'
FILE_NAME = 'op.csv'

## Código

In [2]:
from pathlib import Path
from pandas import DataFrame, read_csv, cut

class AlwaysOn:
    def __init__(self, base_dir, file_name) -> None:
        self.base_dir = Path(base_dir)
        self.file_name = file_name
        self.file_path = self.base_dir.joinpath(self.file_name)
        if not self.file_path.is_file():
            print(f'Debería existir un archivo llamado {self.file_name} en el directorio:\n{self.base_dir}\n\nAgrega el archivo e intenta de nuevo!')
    
    def get_file(self) -> None:
        try: self.df = read_csv(self.file_path)
        except: print(f'No se pudo importar el archivo {self.file_name} desde el directorio:\n{self.base_dir}')
    
    def top_n(self,  group_col: str, count_col: list, n: int) -> DataFrame:
        top = self.df.reset_index().pivot_table(index=[group_col,count_col], values='index', aggfunc='count').reset_index()
        top[f'rank_{count_col}'] = top.groupby(group_col)['index'].rank(method="first", ascending=False).to_frame()
        top = top[top[f'rank_{count_col}']<=n].sort_values([group_col,f'rank_{count_col}']).drop('index', axis=1)
        return top

    def full_pipeline(self, n_category: int, n_product: int, export_result: bool=False, product_col: str='product', qty_col: str='quantity', qty_found_col: str='quantity_found', store_category_col: str='store_category', product_category_col='product_category', cols: list=['store_category','product_id','product','product_category','rank_product_category','rank_first_word']) -> DataFrame:
        self.get_file()
        top_categories = self.top_n(group_col=store_category_col, count_col=product_category_col, n=n_category)

        self.df['first_word'] = self.df[product_col].map(lambda x: str(x).split()[0])
        top_products = self.top_n(group_col=store_category_col, count_col='first_word', n=n_product)

        self.df = self.df.merge(top_categories).merge(top_products)
        self.df = self.df.pivot_table(index=cols, values=[qty_col, qty_found_col], aggfunc=sum).reset_index()
        
        self.df['fr'] = (self.df[qty_found_col]/(self.df[qty_col]+1e-10)).map(lambda x: 1 if x > 1 else x)
        self.df['fr_range'] = cut(self.df['fr'], bins=[-.01,0.7,0.8,0.9,0.95,1])
        self.df['fr_range'] = self.df['fr_range'].map(lambda x: str(int((x.left+.01)*100)).zfill(2)+' - '+str(int(x.right*100)).zfill(2))
        print('\nDistribución de productos por rango de FoundRate:\n\n', self.df['fr_range'].value_counts(1, dropna=False).sort_index())

        self.df.sort_values([store_category_col, qty_col], ascending=[True, False], inplace=True)

        if export_result: self.df.to_csv(self.base_dir.joinpath('always_on_products.csv'), index=False, sep='\t', encoding='utf-16')
        return self.df

ao = AlwaysOn(BASE_DIR, FILE_NAME)
df = ao.full_pipeline(n_category=5, n_product=5, export_result=True)


Distribución de productos por rango de FoundRate:

 00 - 70     0.156291
71 - 80     0.035099
81 - 90     0.058278
91 - 95     0.064901
96 - 100    0.685430
Name: fr_range, dtype: float64
