### This file contains the functions need it for the main file "Walmart Sales Prediction - Project"

In [None]:
def groupby_sum(df, group_vars, agg_var='Total', sort_var='Total'):
    '''
    Return: a Pandas dataframe object where rows have been gruped by a given group of columns (categorical variables). 
            The resulting dataframe will be sorted descending from highest to lowest amount of deaths and the index column will be reset.
    Input parameters:
        - df -> Pandas dataframe object: a dataframe with categorical variables and an aggregation variable.
        - group_vars -> list object: a list of values with the name of a group of categorical variables (e.g.: ['Sexo', 'Edad']).
        - agg_var -> string: a string with the name of the variable to be aggregated. In this case the variable 'Total' (number of deaths) is set as default.
        - sort_var -> string: a string with the name of the variable to sort the dataframe by. In this case the variable 'Total' (number of deaths) is set as default.
    '''
    df = df.groupby(group_vars, as_index=False).agg({agg_var:'sum'})
    df = df.sort_values(by=sort_var, ascending=False)
    return df.reset_index(drop=True)

In [None]:
def cat_var(df, cols):
    '''
    Return: a Pandas dataframe object with the following columns:
        - "categorical_variable" => every categorical variable include as an input parameter (string).
        - "number_of_possible_values" => the amount of unique values that can take a given categorical variable (integer).
        - "values" => a list with the posible unique values for every categorical variable (list).
    Input parameters:
        - df -> Pandas dataframe object: a dataframe with categorical variables.
        - cols -> list object: a list with the name (string) of every categorical variable to analyse.
    '''
    cat_list = []
    for col in cols:
        cat = df[col].unique()
        cat_num = len(cat)
        cat_dict = {"categorical_variable":col,
                    "number_of_possible_values":cat_num,
                    "values":cat}
        cat_list.append(cat_dict)
    df = pd.DataFrame(cat_list).sort_values(by="number_of_possible_values", ascending=False)
    return df.reset_index(drop=True)