# Parámetros

In [21]:
BASE_DIR = '/Users/efraflores/Desktop/EF/Corner/Requests'
FILE_NAME = 'orderproduct_freshmarket.csv'

# Código

In [22]:
from pathlib import Path
from pandas import DataFrame, read_csv

class TopProducts:
    def __init__(self, base_dir: str, file_name: str) -> None:
        self.base_dir = Path(base_dir)
        self.file_name = file_name
        self.just_name = ''.join(self.file_name.split('.')[:-1])
        self.file_path = self.base_dir.joinpath(file_name)
        if not self.file_path.is_file:
            print(f'Debería haber un archivo llamado "{self.file_name}" en el directorio:\n{self.base_dir}\nAgrégalo e intenta de nuevo!')

    def get_file(self, in_chunks: bool=False, chunksize: int=10000) -> None:
        if in_chunks:
            self.df = DataFrame()
            data = read_csv(self.file_path, chunksize=chunksize)
            for i,x in enumerate(data):
                if i>2: break
                else: self.df = self.df.append(x, ignore_index=True)
        else:
            self.df = read_csv(self.file_path)
    
    def transform(self, cumsum_threshold: float, order_col: str='order_id', store_col: str='store', cols_to_group: list=['store','category','product','SKU','product_id'], cols_to_sum: list=['sales','qty_requested','qty_found']) -> None:
        self.get_file()
        self.df = self.df.groupby(cols_to_group).agg({**{order_col:'nunique'}, **{x:sum for x in cols_to_sum}}).reset_index()
        self.df.sort_values([store_col, order_col], ascending=[True, False], inplace=True)
        self.df['cumsum']=self.df[order_col]/self.df.groupby(store_col)[order_col].transform('sum')
        self.df['cumsum'] = self.df.groupby(store_col)['cumsum'].cumsum()
        self.df = self.df[self.df['cumsum']<=cumsum_threshold].reset_index(drop=True)
        self.df.rename({order_col:'orders'}, axis=1, inplace=True)
        self.df.to_csv(self.base_dir.joinpath(f'{self.just_name}_top{cumsum_threshold}.csv'), index=False, sep='\t', encoding='utf-16')

# Transformar

In [23]:
tp = TopProducts(BASE_DIR, FILE_NAME)
tp.transform(cumsum_threshold=0.8)
tp.df.head(7)

Unnamed: 0,store,category,product,SKU,product_id,orders,sales,qty_requested,qty_found,cumsum
0,1452|Fresh Market,134|Frutas frescas,"Plátano exportación Precio por kg, unidad 160 ...",422,1466607,1762,40918.1046,1543.42,1841.174,0.218475
1,1452|Fresh Market,134|Frutas frescas,"Papaya maradol Precio por kg, unidad: 2.6 kg a...",444,1466608,557,30829.3525,1094.735,932.375,0.287539
2,1452|Fresh Market,136|Verduras frescas,Aguacate hass Malla 5 piezas,535,1298234,454,22167.3,511.0,497.0,0.343831
3,1452|Fresh Market,136|Verduras frescas,"Aguacate Hass Precio por kg, unidad: 270 g aprox",4,1231639,452,18822.2368,433.82,261.232,0.399876
4,1452|Fresh Market,134|Frutas frescas,"Guayaba Precio por kg, unidad: 112 g aprox",58,1463713,333,4847.9337,343.07,163.123,0.441166
5,1452|Fresh Market,137|Verduras y frutas envasadas,Espinaca Popeye Bolsa 284 g,522,335438,323,17165.6,346.0,344.0,0.481215
6,1452|Fresh Market,136|Verduras frescas,"Aguacate primera Precio por kg, unidad: 400 g ...",919,1466609,318,18119.5809,479.3,222.291,0.520645
