# SoM

## Parameters

In [3]:
BASE_DIR = '/Users/efraflores/Desktop/EF/Corner/Brands/New_SoM'
CPG_NAME = 'Nestle'

## Class

In [61]:
from time import sleep
from pathlib import Path
from re import sub, UNICODE
from unicodedata import normalize
from pandas import DataFrame, read_csv
from IPython.display import clear_output

class SoM:
    def __init__(self, base_dir, cpg_name):
        self.base_dir = Path(base_dir).joinpath(cpg_name)
        self.cpg_name = cpg_name.strip().replace(' ','_').title()

    def __str__(self) -> str:
        return f'CPG:\t{self.cpg_name}\nPath:\t{self.base_dir}'

    def clean_text_column(self, text, pattern="[^a-zA-Z0-9\s]") -> str:
        # Remove special characters like symbols or accents áäâàã
        clean = normalize('NFD', str(text).replace('\n', ' \n ')).encode('ascii', 'ignore')
        clean = sub(pattern, ' ', clean.decode('utf-8'), flags=UNICODE).strip().lower()
        # Two or more spaces will be replaced with one
        clean = sub(r'\s{2,}',' ', clean)
        # Clean any null string and replace spaces with underscore
        clean = sub(r'^nan$','', clean).replace(' ','_')
        return clean

    def read_tableau_data(self) -> DataFrame:
        df = read_csv(self.base_dir.joinpath(f'{self.cpg_name}_data.csv'), sep='\t', encoding='utf-16', low_memory=False)
        # Drop last "Total" row
        df = df.iloc[:-1,:].copy()
        # Clean every column name
        df.columns = map(self.clean_text_column, df.columns)
        return df

    def create_catalog(self, category_col='category_en', cols_keep=['category_id','brand_name','category_en'], cols_before=['CPG_real','CPG','brand_id'], export=False) -> DataFrame:
        # Without duplicates of parameter cols
        df = self.read_tableau_data()[cols_keep].drop_duplicates().reset_index(drop=True)
        # Create columns to fill manually
        for col in cols_before: df[col] = 'EMPTY FOR NOW'
        # Copy the category name as the default category name for the CPG
        df['category_CPG'] = df[category_col]
        # Sort in the correct format order
        catalog = df[cols_before+cols_keep+['category_CPG']].copy()
        # Export it as csv
        if export: catalog.to_csv(self.base_dir.joinpath(f'{self.cpg_name}_catalog.csv'), index=False, sep='\t', encoding='utf-16')
        return catalog

    def clean_tableau_data(self, col_month_year='month_year', to_drop=['share_sales','avg_ticket_currency','found_rate','fulfillment','frequency']) -> DataFrame:
        df = self.read_tableau_data().drop(columns=to_drop)
        # Split every row by its space "july 2021" --> ['july','2021']
        df[col_month_year] = df[col_month_year].str.split()
        # Get the first 3 characters from the 1st elem ['july','2021'] --> 'jul'
        df['month'] = df[col_month_year].str[0].str[:3]
        # Get the second element 
        df['year'] = df[col_month_year].str[1]
        # Create the list+dict to map "jul" --> "07_jul"
        list_month = ['ene','feb','mar','abr','may','jun','jul','ago','sep','oct','nov','dic']
        dict_month = dict(zip(list_month, map(lambda x: str(x[0]).zfill(2)+'_'+x[1], zip(range(1,13),list_month))))
        df['month'] = df['month'].map(dict_month)
        # Remove the "month_year" column
        df.drop(columns=col_month_year, inplace=True)
        return df

    def cool_print(self, text):
        # Print as typing
        acum = ''
        for x in text:
            acum += x
            clear_output(wait=True)
            sleep(.03)
            print(acum)

    def user_input(self):
        # Ask user if he/she/they want to exit or continue
        user_response = ''
        while user_response not in ['y','n','Y','N']:
            user_response = input('Type "y" to continue or "n" to exit\n')
        else: 
            if user_response in ('n','N'): exit

    def create_som(self, **kwargs):
        # Connect all methods to create the SoM data interacting with an user
        self.cool_print(f'Welcome to SoM creator!\nWe are about to build the data for {self.cpg_name.upper()}!')
        self.user_input()
        ask_catalog = ''
        while ask_catalog not in ['y','n','Y','N']:
            ask_catalog = input('Do you want to create a catalog? y/n\n')
        else: 
            if ask_catalog in ('y','Y'): 
                catalog = self.create_catalog()

som = SoM(BASE_DIR, CPG_NAME)
print(som)

som.create_som()

Welcome to SoM creator!
We are about to build the data for NESTLE!
