# SoM

## Parameters

In [1]:
BASE_DIR = '/Users/efraflores/Desktop/EF/Corner/Brands/New_SoM'
CPG_NAME = 'HyHo'

## Code

In [2]:
from time import sleep
from pathlib import Path
from unicodedata import normalize
from pandas import DataFrame, read_csv
from IPython.display import clear_output
from re import sub, search, UNICODE, I

class SoM:
    def __init__(self, base_dir, cpg_name, colab=False) -> None:
        self.cpg_name = cpg_name.strip().replace(' ','_').title()
        if colab: self.base_dir = Path(base_dir)
        else: self.base_dir = Path(base_dir).joinpath(cpg_name)
        self.cpg_data = self.base_dir.joinpath(f'{self.cpg_name}_Tableau.csv')
        if colab: self.brand_cpg = self.base_dir.joinpath('brand_cpg.csv')
        else: self.brand_cpg = self.base_dir.parent.joinpath('brand_cpg.csv')
        for search_file in [self.cpg_data, self.brand_cpg]:
            if not search_file.is_file(): 
                search_name = ''.join(str(search_file).split('/')[-1])
                search_path = ''.join(str(search_file).split('/')[:-1])
                print(f'There should be a file called: {search_name} at path:\n{search_path}\n\nAdd this file and try again!\n')

    def __str__(self) -> str:
        return f'CPG:\t{self.cpg_name}\nPath:\t{self.base_dir}\nFile:\t{self.cpg_data}\nBrand-CPG:\t{self.brand_cpg}'

    def clean_text_column(self, text: str, pattern="[^a-zA-Z0-9\s]") -> str:
        # Remove special characters like symbols or accents áäâàã
        clean = normalize('NFD', str(text).replace('\n', ' \n ')).encode('ascii', 'ignore')
        clean = sub(pattern, ' ', clean.decode('utf-8'), flags=UNICODE).strip().lower()
        # Two or more spaces will be replaced with one
        clean = sub(r'\s{2,}',' ', clean)
        # Clean any null string and replace spaces with underscore
        clean = sub(r'^nan$','', clean).replace(' ','_')
        return clean

    def read_tableau_data(self) -> DataFrame:
        df = read_csv(self.cpg_data, sep='\t', encoding='utf-16', low_memory=False)
        # Drop last "Total" row
        df = df.iloc[:-1,:].copy()
        # Clean every column name
        df.columns = map(self.clean_text_column, df.columns)
        return df

    def create_catalog(self, category_col='category_en', brand_col='brand_id', cols_keep=['category_id','brand_id','brand_name','category_en'], cols_catalog=['brand_id','cpg_names'], export=False) -> DataFrame:
        # Without duplicates of parameter cols
        df = self.read_tableau_data()[cols_keep].drop_duplicates().reset_index(drop=True)
        # Import the brand->CPG catalog
        bc = read_csv(self.brand_cpg, low_memory=False).astype(str)
        # Merge it with brand_id
        df = df.merge(bc[cols_catalog].astype(str), on=brand_col, how='left').fillna('EMPTY')
        # Create a column to group competitors
        df['CPG'] = df[cols_catalog[-1]].map(lambda x: self.cpg_name if search(self.cpg_name, self.clean_text_column(str(x)), flags=I)!=None else 'Comp')
        # Copy the category name as the default category name for the CPG
        df['category_CPG'] = df[category_col]
        # Sort in the correct format order
        catalog = df[['CPG']+cols_catalog[-1:]+cols_keep+['category_CPG']].rename(columns={cols_catalog[-1]:'CPG_real'})
        catalog.sort_values(['CPG',brand_col, category_col], inplace=True)
        # Export it as csv
        if export: catalog.to_csv(self.base_dir.joinpath(f'{self.cpg_name}_catalog.csv'), index=False, sep='\t', encoding='utf-16')
        return catalog

    def clean_tableau_data(self, col_month_year='month_year', to_drop=['share_sales','avg_ticket_currency','found_rate','fulfillment','frequency']) -> DataFrame:
        df = self.read_tableau_data().drop(columns=to_drop)
        # Split every row by its space "july 2021" --> ['july','2021']
        df[col_month_year] = df[col_month_year].str.split()
        # Get the first 3 characters from the 1st elem ['july','2021'] --> 'jul'
        df['month'] = df[col_month_year].str[0].str[:3]
        # Get the second element 
        df['year'] = df[col_month_year].str[1]
        # Create the list+dict to map "jul" --> "07_jul"
        list_month = ['ene','feb','mar','abr','may','jun','jul','ago','sep','oct','nov','dic']
        dict_month = dict(zip(list_month, map(lambda x: str(x[0]).zfill(2)+'_'+x[1], zip(range(1,13),list_month))))
        df['month'] = df['month'].map(dict_month)
        # Remove the "month_year" column
        df.drop(columns=col_month_year, inplace=True)
        return df

    def cool_print(self, text: str, sleep_time=0.03, by_word=False) -> None:
        # Print as typing
        acum = ''
        for x in text.split() if by_word else text:
            acum += x+' ' if by_word else x
            clear_output(wait=True)
            sleep(sleep_time*(9 if by_word else 1))
            print(acum)
        sleep(0.9)

    def user_exit(self) -> bool:
        # Ask user if he/she/they want to exit or continue
        user_response = ''
        while user_response not in ['y','n','Y','N']:
            user_response = input('Enter "y" to continue or "n" to exit\n')
        else: 
            return user_response in ('n','N')

    def create_som(self, kwargs_catalog={}, kwargs_tableau={}) -> None:
        # Connect all methods to create the SoM data interacting with an user
        self.cool_print(f'Welcome to SoM Creator!\nWe are about to build the data for {self.cpg_name.upper()}!\nFirst step:\tBrand + Category catalog!')
        # Continue?
        if self.user_exit():
            self.cool_print('Have a nice day!')
            return None
        # Ask for the creation of catalog
        ask_catalog = ''
        while ask_catalog not in ['y','n','Y','N']:
            ask_catalog = input('Do you want to create a catalog? y/n\n')
        else: 
            # Create catalog
            if ask_catalog in ('y','Y'): 
                catalog = self.create_catalog(export=True, **kwargs_catalog)
                self.cool_print(f'Catalog was created at path:\n{self.base_dir}\nnamed: {self.cpg_name}_catalog.csv')
            else:
                # Import it
                read_csv_params = {'sep':'\t', 'encoding':'utf-16'}
                try: 
                    catalog = read_csv(self.base_dir.joinpath(f'{self.cpg_name}_catalog.csv'), low_memory=False, **read_csv_params)
                    self.cool_print(f'{self.cpg_name}_catalog.csv was found at path:\n{self.base_dir}')
                # UTF-16 error
                except UnicodeError:
                    catalog = read_csv(self.base_dir.joinpath(f'{self.cpg_name}_catalog.csv'), low_memory=False)
                    self.cool_print(f'{self.cpg_name}_catalog.csv was found at path:\n{self.base_dir}')
                # 404, exit
                except FileNotFoundError:
                    self.cool_print(f'File with name {self.cpg_name}_catalog.csv was not found at path:\n{self.base_dir}\n\nAdd it and try again!\nHave a nice day!')
                    return None        
        # Merge data with catalog
        self.cool_print('Now, it is time to merge the data with the catalog')
        # Continue?
        if self.user_exit():
            self.cool_print('Have a nice day!')
            return None
        # Read data and merge it
        df = self.clean_tableau_data(**kwargs_tableau).astype(str)
        df = df.merge(catalog.astype(str), on=['brand_id','category_id'], suffixes=('_tableau',''))
        # Export it, process finished
        df.to_csv(self.base_dir.joinpath(f'{self.cpg_name}_som.csv'), index=False, sep='\t', encoding='utf-16')
        self.cool_print(f'A file named: {self.cpg_name}_som.csv was created at path:\n{self.base_dir}\n\nHave a nice day!')

## Create SoM file

In [3]:
SoM(BASE_DIR, CPG_NAME).create_som()

A file named: Hyho_som.csv was created at path:
/Users/efraflores/Desktop/EF/Corner/Brands/New_SoM/HyHo

Have a nice day!
