In [None]:
import subprocess

%run "{os.path.dirname(os.getcwd())}\general_functions\generalFunctions.ipynb" 

## PPTX Functions

### Slide 1

In [43]:
def promoEvolutionNew(prs, promotionsBrandSortedTotalFinal, numOfDuplicates, position=0):
    """
    Generate PowerPoint slides for promo evolution.
 
    Args:
    - prs (pptx.presentation): PowerPoint presentation object.
    - promotionsBrandSortedTotal (dict): Dictionary of DataFrames containing sorted promotion data for different markets.
    - numOfDuplicates (int): Number of slides to duplicate for different markets.
    - position (int): Position to start adding slides in the presentation.
 
    Returns:
    - Replace the slides with new data
    """
    # Loop through each slide number
    slidenum=0
    for key,df in promotionsBrandSortedTotalFinal.items():
 
        df = promotionsBrandSortedTotalFinal[key].reset_index(drop=True)
       
       
        # Update title
        shapes = prs.slides[slidenum + position].shapes
        titlNumber = get_shape_number(shapes, "Evolution (%) Promo efficiency across brands | Category | National | P12M")
        headerNumber = get_shape_number(shapes, "Stable VSOD on Hydro with mix results om uplifts (Replace With SO WHAT)")
        headerNumber = get_shape_number(shapes, "Promo evolution (Replace With SO WHAT)")
        shapes[titlNumber - 1].text = data_source
        shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
        shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        shapes[titlNumber].text = shapes[titlNumber].text.replace('Category', key.split(' | ')[0]).replace('National', key.split(' | ')[1])
        shapes[titlNumber].text_frame.paragraphs[0].font.size = Pt(12)
        shapes[titlNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
       
        # Create table and chart
        tables, charts = createTableAndChart(shapes)
        table = tables[0].table
        # Remove unnecessary rows
        num_rows_to_remove = len(table.rows) - df['Top Brands'].nunique() - 1
        table_height = get_table_height(table)
        for _ in range(num_rows_to_remove):
            if len(table.rows) > 1:
                row = table.rows[1]
                remove_row(table, row)
       
        # Adjust row heights
        total_row_height = table_height - table.rows[0].height
        num_rows = len(table.rows) - 1
        if num_rows > 0:
            cell_height = total_row_height / num_rows
            for row in range(1, table.rows.__len__()):
                table.rows[row].height = int(cell_height)
       
        # Populate table cells
        for i, row in enumerate(table.rows):
            for j, cell in enumerate(row.cells):
                if i == 0:  # Header row
                    continue
                if j == 0:  # Brand column
                    cell.text = df['Top Brands'].iloc[i - 1]
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
                elif j == 1:  #Promo Value Sales
                    value = df['Promo Value'].iloc[i - 1]
                    if len(str(value)) > 3:
                        formatted_value = '{:,}'.format(int(value))
                        cell.text = str(formatted_value)
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                    else:
                        cell.text = str(df['Promo Value'].iloc[i - 1])
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                elif j == 2:  # VSOD column
                    cell.text = str(int(round(df['VSOD'].replace(np.nan, 0).iloc[i - 1] * 100, 0))) + '%'
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
               
                elif j == 3:  # VSOD IYA column
                    cell.text = str(int(round(df['VSOD IYA'].replace(np.nan, 0).iloc[i - 1] * 100, 0)))
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                # Set font size and alignment
                cell.text_frame.paragraphs[0].font.size = Pt(8)
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
       
        # Configure charts
        col = ['VSOD Evaluation vs YA', 'Promo Value Uplift vs YA']
        for i in range(len(col)):
            chart = charts[i].chart
            chart_data = CategoryChartData()
            chart_data.categories = df['Top Brands'].tolist()[:11]
            chart_data.add_series(col[i], df[col[i]].tolist()[:11] if df[col[i]].tolist()[:11] else [None])
            chart.series[0].invert_if_negative = False
            chart.replace_data(chart_data)
            #
            # Color the points in the chart based on value
            for idx, point in enumerate(chart.series[0].points):
                data_label = point.data_label
                fill = point.format.fill
                fill.solid()
                if chart.series[0].values[idx] == 0:
                    point.data_label.text_frame.text = ''
                if chart.series[0].values[idx] < 0:
                    fill.fore_color.rgb = RGBColor(255, 191, 191)  # Red color for negative values
                else:
                    fill.fore_color.rgb = RGBColor(222, 221, 221)  # Grey color for positive values
           
            # Update chart data and axis scaling
            chart.replace_data(chart_data)
            value_axis = chart.value_axis
            min_value = min(chart.series[0].values)
            max_value = max(chart.series[0].values)
            value_axis.minimum_scale = None
            value_axis.maximum_scale = None
            #Axis_limit_min = 0.8*min, Axis_limit_max = 1.2*max
        slidenum+=1

In [None]:
def promoValueSales(prs, promotionsBrandDF, numOfDuplicates, position=0):
    """
    Generate PowerPoint slides for promo value sales.

    Args:
    - prs (pptx.presentation): PowerPoint presentation object.
    - promotionsBrandDF (dict): Dictionary of DataFrames containing promotion data for different markets.
    - numOfDuplicates (int): Number of slides to duplicate for different markets.
    - position (int): Position to start adding slides in the presentation.

    Returns:
    - Replace the slides with new data
    """
    # Loop through each slide number
    slidenum = 0
    for key,df in promotionsBrandDF.items():
        # Retrieve DataFrame for the current market
        df = promotionsBrandDF[key].reset_index(drop=True)
        
        # Remove rows with 'Others' in 'Top Brands' column and filter by 'Value Share'
        df = df[~df['Top Brands'].str.contains('Others', case=False)]
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        df = df[df['Value Share'] > 0.01]
     
  
        # Select client brands
        df_client = selectClientBrands(df,'Top Brands', 'Promo Value')
        number_of_brands_needed = max(6 - len(df_client),0)
     
        # Filter top brands and concatenate with client brands
        df = df[~df['Top Brands'].isin(client_brands)]
        df = df.sort_values(by='Promo Value', ascending=False).head(number_of_brands_needed)
        df = pd.concat([df, df_client], ignore_index=True)
        df = df.sort_values(by='Promo Value', ascending=False)
    
        
        # Update title
        shapes = prs.slides[slidenum + position].shapes
        titlNumber = get_shape_number(shapes, "Promo Value Sales | Category | National | P12M")
        headerNumber = get_shape_number(shapes, "Promo Value Sales (Replace With SO WHAT)")
        shapes[titlNumber - 1].text = data_source
        shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
        shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        shapes[titlNumber].text = shapes[titlNumber].text.replace('Category', key.split(' | ')[0]).replace(
            'National', key.split(' | ')[1])
        shapes[titlNumber].text_frame.paragraphs[0].font.size = Pt(12)
        shapes[titlNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        
        # Create table and chart
        tables, charts = createTableAndChart(shapes)
        table = tables[0].table
        
        # Remove unnecessary rows
        num_rows_to_remove = len(table.rows) - df['Top Brands'].nunique() - 1
        table_height = get_table_height(table)
        for _ in range(num_rows_to_remove):
            if len(table.rows) > 1:
                row = table.rows[1]
                remove_row(table, row)
        
        # Adjust row heights
        total_row_height = table_height - table.rows[0].height
        num_rows = len(table.rows) - 1
        if num_rows > 0:
            cell_height = total_row_height / num_rows
            for row in range(1, table.rows.__len__()):
                table.rows[row].height = int(cell_height)
        
        # Populate table cells
        for i, row in enumerate(table.rows):
            for j, cell in enumerate(row.cells):
                if i == 0:  # Header row
                    continue
                if j == 0:  # Brand column
                    cell.text = df['Top Brands'].iloc[i - 1]
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
                elif j == 1:  # Promo Value sales column
                    value = df['Promo Value'].iloc[i - 1]
                    if len(str(value)) > 3:
                        formatted_value = '{:,}'.format(int(value))
                        cell.text = str(formatted_value)
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                    else:
                        cell.text = str(df['Promo Value'].iloc[i - 1])
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                elif j == 2:  # Volume Sold on Deal (VSOD) column
                    cell.text = str(int(round(df['VSOD'].replace(np.nan, 0).iloc[i - 1] * 100, 0))) + '%'
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                else:  # VSOD IYA column
                    cell.text = str(int(round(df['VSOD IYA'].replace(np.nan, 0).iloc[i - 1] * 100, 0)))
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                # Set font size and alignment
                cell.text_frame.paragraphs[0].font.size = Pt(8)
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
        slidenum +=1


In [None]:
def promoValueSales_no(prs, promotionsBrandDF, numOfDuplicates, position=0):
    """
    Generate PowerPoint slides for promo value sales.

    Args:
    - prs (pptx.presentation): PowerPoint presentation object.
    - promotionsBrandDF (dict): Dictionary of DataFrames containing promotion data for different markets.
    - numOfDuplicates (int): Number of slides to duplicate for different markets.
    - position (int): Position to start adding slides in the presentation.

    Returns:
    - Replace the slides with new data
    """
    # Loop through each slide number
    slidenum = 0
    for key,df in promotionsBrandDF.items():
        # Retrieve DataFrame for the current market
        df = promotionsBrandDF[key].reset_index(drop=True)
        
        # Remove rows with 'Others' in 'Top Brands' column and filter by 'Value Share'
        df = df[~df['Top Brands'].str.contains('Others', case=False)]
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        df = df[df['Value Share'] > 0.01]
        df = df.sort_values(by='Promo Value', ascending=False)
    
        # Update title
        shapes = prs.slides[slidenum + position].shapes
        titlNumber = get_shape_number(shapes, "Promo Value Sales | Category | National | P12M")
        headerNumber = get_shape_number(shapes, "Promo Value Sales (Replace With SO WHAT)")
        shapes[titlNumber - 1].text = data_source
        shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
        shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        shapes[titlNumber].text = shapes[titlNumber].text.replace('Category', key.split(' | ')[0]).replace(
            'National', key.split(' | ')[1])
        shapes[titlNumber].text_frame.paragraphs[0].font.size = Pt(12)
        shapes[titlNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        
        # Create table and chart
        tables, charts = createTableAndChart(shapes)
        table = tables[0].table
        
        # Remove unnecessary rows
        num_rows_to_remove = len(table.rows) - df['Top Brands'].nunique() - 1
        table_height = get_table_height(table)
        for _ in range(num_rows_to_remove):
            if len(table.rows) > 1:
                row = table.rows[1]
                remove_row(table, row)
        
        # Adjust row heights
        total_row_height = table_height - table.rows[0].height
        num_rows = len(table.rows) - 1
        if num_rows > 0:
            cell_height = total_row_height / num_rows
            for row in range(1, table.rows.__len__()):
                table.rows[row].height = int(cell_height)
        
        # Populate table cells
        for i, row in enumerate(table.rows):
            for j, cell in enumerate(row.cells):
                if i == 0:  # Header row
                    continue
                if j == 0:  # Brand column
                    cell.text = df['Top Brands'].iloc[i - 1]
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
                elif j == 1:  # Promo Value sales column
                    value = df['Promo Value'].iloc[i - 1]
                    if len(str(value)) > 3:
                        formatted_value = '{:,}'.format(int(value))
                        cell.text = str(formatted_value)
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                    else:
                        cell.text = str(df['Promo Value'].iloc[i - 1])
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                elif j == 2:  # Volume Sold on Deal (VSOD) column
                    cell.text = str(int(round(df['VSOD'].replace(np.nan, 0).iloc[i - 1] * 100, 0))) + '%'
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                else:  # VSOD IYA column
                    cell.text = str(int(round(df['VSOD IYA'].replace(np.nan, 0).iloc[i - 1] * 100, 0)))
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                # Set font size and alignment
                cell.text_frame.paragraphs[0].font.size = Pt(8)
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
        slidenum +=1

### Slide 2

In [None]:
def promoEvolution(prs, promotionsBrandSortedTotalFinal, numOfDuplicates, position=0):
    """
    Generate PowerPoint slides for promo evolution.

    Args:
    - prs (pptx.presentation): PowerPoint presentation object.
    - promotionsBrandSortedTotal (dict): Dictionary of DataFrames containing sorted promotion data for different markets.
    - numOfDuplicates (int): Number of slides to duplicate for different markets.
    - position (int): Position to start adding slides in the presentation.

    Returns:
    - Replace the slides with new data
    """
    # Loop through each slide number
    slidenum=0
    for key,df in promotionsBrandSortedTotalFinal.items():

        df = promotionsBrandSortedTotalFinal[key].reset_index(drop=True)
        # Update title
        shapes = prs.slides[slidenum + position].shapes
        titlNumber = get_shape_number(shapes, "Evolution (%) Promo efficiency across brands | Category | National | P12M")
        headerNumber = get_shape_number(shapes, "Promo evolution (Replace With SO WHAT)")
        shapes[titlNumber - 1].text = data_source
        shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
        shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        shapes[titlNumber].text = shapes[titlNumber].text.replace('Category', key.split(' | ')[0]).replace('National', key.split(' | ')[1])
        shapes[titlNumber].text_frame.paragraphs[0].font.size = Pt(12)
        shapes[titlNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        
        # Create table and chart
        tables, charts = createTableAndChart(shapes)
        table = tables[0].table
        # Remove unnecessary rows
        num_rows_to_remove = len(table.rows) - df['Top Brands'].nunique() - 1
        table_height = get_table_height(table)
        for _ in range(num_rows_to_remove):
            if len(table.rows) > 1:
                row = table.rows[1]
                remove_row(table, row)
        
        # Adjust row heights
        total_row_height = table_height - table.rows[0].height
        num_rows = len(table.rows) - 1
        if num_rows > 0:
            cell_height = total_row_height / num_rows
            for row in range(1, table.rows.__len__()):
                table.rows[row].height = int(cell_height)
        
        # Populate table cells
        for i, row in enumerate(table.rows):
            for j, cell in enumerate(row.cells):
                if i == 0:  # Header row
                    continue
                if j == 0:  # Brand column
                    cell.text = df['Top Brands'].iloc[i - 1]
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
                elif j == 1:  # VSOD column
                    cell.text = str(int(round(df['VSOD'].replace(np.nan, 0).iloc[i - 1] * 100, 0))) + '%'
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                cell.text_frame.paragraphs[0].font.size = Pt(8)
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
        
        # Configure charts
        col = ['VSOD Evaluation vs YA', 'Promo Value Uplift vs YA']
        for i in range(len(col)):
            chart = charts[i].chart
            chart_data = CategoryChartData()
            chart_data.categories = df['Top Brands'].tolist()[:11]
            chart_data.add_series(col[i], df[col[i]].tolist()[:11] if df[col[i]].tolist()[:11] else [None])
            chart.series[0].invert_if_negative = False
            chart.replace_data(chart_data)
            # 
            # Color the points in the chart based on value
            for idx, point in enumerate(chart.series[0].points):
                data_label = point.data_label
                fill = point.format.fill
                fill.solid()
                if chart.series[0].values[idx] == 0:
                    point.data_label.text_frame.text = ''
                if chart.series[0].values[idx] < 0:
                    fill.fore_color.rgb = RGBColor(255, 191, 191)  # Red color for negative values
                else:
                    fill.fore_color.rgb = RGBColor(222, 221, 221)  # Grey color for positive values
            
            # Update chart data and axis scaling
            chart.replace_data(chart_data)
            value_axis = chart.value_axis
            min_value = min(chart.series[0].values)
            max_value = max(chart.series[0].values)
            value_axis.minimum_scale = None
            value_axis.maximum_scale = None
            #Axis_limit_min = 0.8*min, Axis_limit_max = 1.2*max
        slidenum+=1

### Slide 3

In [47]:
def VSOD1(prs, data, numOfDuplicates, position=0):
    """
    This function updates a PowerPoint presentation with volume sold on deal (VSOD) summaries
    for different markets. It modifies the slide shapes and populates tables with VSOD data.

    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    promotionsBrandsWithMarket (dict): A dictionary where keys are market names and values are DataFrames
                                       containing promotions and brand data for each market.
    numOfDuplicates (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    for slidenum in range(numOfDuplicates):
        # Get the market name for the current slide
        market = list(data.keys())[slidenum]

        # Get the shapes in the current slide
        shapes = prs.slides[slidenum + position].shapes
        # Update the title shape with the market name
        titleNumber = get_shape_number(shapes, "Volume sold on deal Summary | By Brand | By Sector | Category | National | P12M")
        headerNumber = get_shape_number(shapes, "VSOD Summary by Sector (Replace With SO WHAT)")
        shapes[titleNumber-1].text = data_source
        shapes[titleNumber].text = shapes[titleNumber].text.replace('National', market.split(' | ')[0])
        if market.split(' | ')[1] in segments:
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Sector','Segment')
        elif market.split(' | ')[1] in subsegments:
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Sector','SubSegments')
        elif market.split(' | ')[1] in subcategories:
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Sector','SubCategory')
        shapes[titleNumber].text = shapes[titleNumber].text.replace(' Category', ' '+categories[0])
        shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
        shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
        shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Create and retrieve table and chart shapes
        tables, charts = createTableAndChart(shapes)
        table = tables[0].table
        
        # Get the DataFrame for the current market
        df = data[market]
        df['Top Brands'] = df['Top Brands'].replace('Grand Total', 'Total')
        
        # Get the list of brands in the order specified by client_brands
        brands = data[market]['Top Brands'].unique()
        uniqueSector = df['SOURCE'].unique()
        
        # Remove excess rows and columns from the table
        num_rows_to_remove = len(table.rows) - df['Top Brands'].nunique() - 1
        #table_height = 3.84
        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=1)
        num_columns_to_remove = len(table.columns) - df['SOURCE'].nunique() - 1   
        table = col_cell_remove(table, num_columns_to_remove)

        # Populate the table with VSOD data
        uniqueSector = df['SOURCE'].unique()
        for i, row in enumerate(table.rows):
            for j, cell in enumerate(row.cells):
                if i == 0:
                    # Set the header cells with sector names
                    if j != 0:
                        cell.text = uniqueSector[j-1]
                        cell.text_frame.paragraphs[0].font.size = Pt(10)
                        cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                        cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(87, 85, 85)
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
                    continue

                if j == 0:
                    # Set the first column with brand names
                    cell.text = brands[i-1]
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'

                if i != 0 and j != 0:
                    # Set the cells with VSOD values
                    brand = brands[i-1]
                    value = df[(df['SOURCE'] == uniqueSector[j-1]) & (df['Top Brands'] == brand)]['VSOD'].to_list()[0]
                    cell.text = str(int(round(value * 100, 0))) + '%' if value != 0 else ''
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Bold' #if brand in client_brands else 'Nexa Book'

                cell.text_frame.paragraphs[0].font.size = Pt(8)
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER


In [None]:
def VSODSectors(prs, promotionsBrandsWithMarket, numOfDuplicates, position=0):
    """
    This function updates a PowerPoint presentation with volume sold on deal (VSOD) summaries
    for different markets. It modifies the slide shapes and populates tables with VSOD data.

    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    promotionsBrandsWithMarket (dict): A dictionary where keys are market names and values are DataFrames
                                       containing promotions and brand data for each market.
    numOfDuplicates (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    slidenum = 0
    for key, df in promotionsBrandsWithMarket.items():
        # Get the market name for the current slide
        
        market = key.split(' | ')[0]

        # Get the shapes in the current slide
        shapes = prs.slides[slidenum + position].shapes
        
        # Update the title shape with the market name
        titleNumber = get_shape_number(shapes, "Volume sold on deal Summary | By Brand | By Sector | Category | National | P12M")
        headerNumber = get_shape_number(shapes, "VSOD Summary by Sector (Replace With SO WHAT)")
        shapes[titleNumber-1].text = data_source
        shapes[titleNumber].text = shapes[titleNumber].text.replace('National', market.split(' | ')[0])
        shapes[titleNumber].text = shapes[titleNumber].text.replace('Category', categories[0])
        shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
        shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
        shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Create and retrieve table and chart shapes
        tables, charts = createTableAndChart(shapes)
        table = tables[0].table

        brands = df['Top Brands'].unique()
        
        reordered_brands = [brand for brand in client_brands if brand in brands] + [brand for brand in brands if brand not in client_brands]
        brands = reordered_brands
                
        uniqueSector = [key.split(' | ')[1] for key in promotionsBrandsWithMarket.keys()]
        
        # Remove excess rows and columns from the table
        num_rows_to_remove = len(table.rows) - df['Top Brands'].nunique() - 2
        #table_height = 3.84
        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=1)
        num_columns_to_remove = len(table.columns) - len(promotionsBrandsWithMarket.keys())- 1   
        table = col_cell_remove(table, num_columns_to_remove)

        # Populate the table with VSOD data
        for i, row in enumerate(table.rows):
            for j, cell in enumerate(row.cells):
                if i == 0:
                    # Set the header cells with sector names
                    if j != 0:
                        cell.text = uniqueSector[j-1]
                        cell.text_frame.paragraphs[0].font.size = Pt(10)
                        cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                        cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(87, 85, 85)
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
                    continue

                if j == 0:
                    # Set the first column with brand names
                    cell.text = brands[i-1]
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
                    # Set the header cells with sector names
                
                if i != 0 and j != 0:
                    # Set the cells with VSOD values
                    brand = brands[i-1]
                    if  not (df[(df['SOURCE'] == uniqueSector[j-1]) & (df['Top Brands'] == brand)]['VSOD']).empty:
                        
                        value = df[(df['SOURCE'] == uniqueSector[j-1]) & (df['Top Brands'] == brand)]['VSOD'].to_list()[0] 
                        cell.text = str(int(round(value * 100, 0))) + '%' if value != 0 else ''
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Bold' if brand in client_brands else 'Nexa Book'
                if i == 1:
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'

                cell.text_frame.paragraphs[0].font.size = Pt(8)
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
        slidenum+=1

In [49]:
def VSODSegments(prs, promotionsBrandsWithMarket, numOfDuplicates, position=0):
    """
    This function updates a PowerPoint presentation with volume sold on deal (VSOD) summaries
    for different markets. It modifies the slide shapes and populates tables with VSOD data.

    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    promotionsBrandsWithMarket (dict): A dictionary where keys are market names and values are DataFrames
                                       containing promotions and brand data for each market.
    numOfDuplicates (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    numOfDuplicates=len(promotionsBrandsWithMarket)
    df = pd.DataFrame({"Segment":segments})
    df["Sectorsegment"]=sectorsegments
    df=df.groupby("Sectorsegment")["Segment"].unique().reset_index()
    segsector=dict(zip(df["Sectorsegment"],df["Segment"]))
    for sectornum , sector in enumerate(sectors):
        for slidenum in range(numOfDuplicates):
            #get segment by sector
            uniquesegment = segsector[sector]
            # Get the market name for the current slide
            market = list(promotionsBrandsWithMarket.keys())[slidenum]

            # Get the shapes in the current slide
            shapes = prs.slides[numOfDuplicates*sectornum+slidenum + position].shapes
            
            # Update the title shape with the market name
            titleNumber = get_shape_number(shapes, "Volume sold on deal Summary | By Brand | By Segment | Sector | National | P12M")
            headerNumber = get_shape_number(shapes, "VSOD Summary by Segment (Replace With SO WHAT)")
            shapes[titleNumber-1].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('National', market)
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Sector', sector)
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

            # Create and retrieve table and chart shapes
            tables, charts = createTableAndChart(shapes)
            table = tables[0].table
            
            # Get the DataFrame for the current market
            df = promotionsBrandsWithMarket[market]
            # Get the list of brands in the order specified by client_brands
            brands=promotionsBrandsWithMarket[market][promotionsBrandsWithMarket[market]['SOURCE']==sector]['Top Brands'].unique()
            reordered_brands = [brand for brand in client_brands if brand in brands] + [brand for brand in brands if brand not in client_brands]
            brands = reordered_brands
            uniquesegment = segsector[sector]
            # Remove excess rows and columns from the table
            num_rows_to_remove = len(table.rows) - df['Top Brands'].nunique() - 1
            #table_height = 3.84
            table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=1)
            num_columns_to_remove = len(table.columns) - len(uniquesegment)- 1   
            table = col_cell_remove(table, num_columns_to_remove)

            # Populate the table with VSOD data
    
            for i, row in enumerate(table.rows):
                for j, cell in enumerate(row.cells):
                    if i == 0:
                        # Set the header cells with sector names
                        if j != 0:
                            cell.text = uniquesegment[j-1]
                            cell.text_frame.paragraphs[0].font.size = Pt(10)
                            cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                            cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(87, 85, 85)
                            cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
                        continue

                    if j == 0:
                        # Set the first column with brand names
                        cell.text = brands[i-1]
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'

                    if i != 0 and j != 0:
                        # Set the cells with VSOD values
                        #print(brands[i-1], df[(df['SOURCE'] == uniquesegment[j-1]) & (df['Top Brands'] == brand)]['VSOD'].to_list()[0])
                        brand = brands[i-1]
                        value = df[(df['SOURCE'] == uniquesegment[j-1]) & (df['Top Brands'] == brand)]['VSOD'].to_list()[0]
                        cell.text = str(int(round(value * 100, 0))) + '%' if value != 0 else ''
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Bold' if brand in client_brands else 'Nexa Book'
                    if i == 1:
                        cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'

                    cell.text_frame.paragraphs[0].font.size = Pt(8)
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER


### Slide 4

In [None]:
def valueUpliftRetailer(prs, concated, numOfDuplicates, position=0):
    """
    This function updates a PowerPoint presentation with value uplift by brand for different categories/sectors.
    It modifies the slide shapes and populates tables and charts with the value uplift data.
    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    concated (dict): A dictionary where keys are category/sector names and values are DataFrames containing value uplift data.
    numOfDuplicates (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.
    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(concated, range(numOfDuplicates)):
        # Get the DataFrame for the current category/sector
        df = concated[key]
        df = df[df['Promo Value'] > 0].reset_index(drop=True)
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
 
        # Get the current slide and its shapes
        slide = prs.slides[slide_num + position]
        shapes = slide.shapes
       
        # Update the title shape with the category/sector name
        titleNumber = get_shape_number(shapes, "Value Uplift by brand | Category/Sector | P12M")
        headereNumber = get_shape_number(shapes, "Value uplift by retailer by brand (Replace With SO WHAT)")
        shapes[titleNumber-1].text = data_source
        shapes[titleNumber].text = shapes[titleNumber].text.replace('Category/Sector', key)
        shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
        shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        shapes[headereNumber].text_frame.paragraphs[0].font.size = Pt(16)
        shapes[headereNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
 
        # Create and retrieve table and chart shapes
        tables, charts = createTableAndChart(slide.shapes)
        chart = charts[0].chart
        table = tables[0].table
       
        # Get the list of markets (sources)
        market_list = list(df['SOURCE'].unique())
       
        # Adjust the number of columns in the table
        num_cols_to_remove = len(table.columns) - len(market_list)
        table_width = get_table_width(table)
        for _ in range(num_cols_to_remove):
            if len(table.columns):  # Skip removing the first row if there is more than one row
                col = table.columns[0]
                remove_col(table, col)
        if num_cols_to_remove:
            total_col_width = table_width
            num_cols = len(table.columns)
            if num_cols > 0:
                cell_width = total_col_width / num_cols
                for col in range(0, table.columns.__len__()):
                    table.columns[col].width = int(cell_width)
 
        # Populate the table with market names
        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                value = market_list[column_num]
                cell.text = str(value)
                set_cell_font(cell, 'Nexa Bold', 8)
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(87, 85, 85)
                cell.text_frame.paragraphs[0].font.bold = False
       
        # Prepare chart data 
        chart_data = BubbleChartData()
        chart_data.categories = [i for i in range((df['SOURCE'].nunique() * 2) + 1)]
        series = chart_data.add_series('Average Value Uplift')
        catPos = [i for i in range(1, len(chart_data.categories), 2)]
 
        # Populate the chart with value uplift data
        filtered_brands_list = []

        for i, source in enumerate(df['SOURCE'].unique()):
            dfSource = df[df['SOURCE'] == source]
            dfSource = dfSource[~dfSource['Top Brands'].str.contains('Others', case=False)]
            dfSource = dfSource[dfSource['Value Share'] > 0.01]
            df_client = selectClientBrands(dfSource,'Top Brands', 'Value Share')
            number_of_brands_needed = max(10 - len(df_client),0)
            dfSource = dfSource[~dfSource['Top Brands'].isin(client_brands)]
            dfSource = dfSource.sort_values(by='Value Share', ascending=False).head(number_of_brands_needed)
            dfSource = pd.concat([dfSource, df_client], ignore_index=True)
            series.has_data_labels = True
            start = len(series)
            brands = dfSource['Top Brands'].unique()
             # ✅ Collect filtered brands
            if isinstance(dfSource, pd.DataFrame) and 'Top Brands' in dfSource.columns:
                filtered_brands_list.append(dfSource[['Top Brands']].copy())
            # print(key,filtered_brands_list)
# Concatenate all DataFrames and get unique brand names
            all_top_brands = pd.concat(filtered_brands_list, ignore_index=True)

            for brand in brands:
                if normalized:
                    series.add_data_point(catPos[i], dfSource[dfSource['Top Brands'] == brand]['Value Uplift (v. base) Normalized'].unique()[0], dfSource[dfSource['Top Brands'] == brand]['Promo Value'].unique()[0])
                else: 
                    series.add_data_point(catPos[i], dfSource[dfSource['Top Brands'] == brand]['Value Uplift (v. base)'].unique()[0], dfSource[dfSource['Top Brands'] == brand]['Promo Value'].unique()[0])
            chart.replace_data(chart_data)
            value_axis = chart.category_axis
            value_axis.minimum_scale = 0
            value_axis.maximum_scale = (df['SOURCE'].nunique() * 2)
            xlsx_file=BytesIO()
            with chart_data._workbook_writer._open_worksheet(xlsx_file) as (workbook, worksheet):
                chart_data._workbook_writer._populate_worksheet(workbook, worksheet)
                worksheet.write(0, 4, "labels")
                worksheet.write_column(1, 4,all_top_brands['Top Brands'], None)
            chart._workbook.update_from_xlsx_blob(xlsx_file.getvalue())
 
            stop = len(chart.series[0].points)
            clibrands=all_top_brands['Top Brands']
            for j in range(start, stop):
                point = chart.series[0].points[j]

                point.format.fill.solid()
                if clibrands[j % len(clibrands)] in client_brands:
                    point.format.fill.fore_color.rgb = RGBColor(126, 202, 196)
                else:
                    point.format.fill.fore_color.rgb = RGBColor(230, 229, 229)
 
        # Set the axis scale for the chart
        value_axis = chart.category_axis
        value_axis.minimum_scale = 0
        value_axis.maximum_scale = (df['SOURCE'].nunique() * 2)
 

In [None]:
def valueUpliftRetailer_no(prs, concated, numOfDuplicates, position=0):
    """
    This function updates a PowerPoint presentation with value uplift by brand for different categories/sectors.
    It modifies the slide shapes and populates tables and charts with the value uplift data.
 
    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    concated (dict): A dictionary where keys are category/sector names and values are DataFrames containing value uplift data.
    numOfDuplicates (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.
 
    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(concated, range(numOfDuplicates)):
        # Get the DataFrame for the current category/sector
        df = concated[key]
        df = df[df['Promo Value'] > 0].reset_index(drop=True)
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
 
        # Get the current slide and its shapes
        slide = prs.slides[slide_num + position]
        shapes = slide.shapes
       
        # Update the title shape with the category/sector name
        titleNumber = get_shape_number(shapes, "Value Uplift by brand | Category/Sector | P12M")
        headereNumber = get_shape_number(shapes, "Value uplift by retailer by brand (Replace With SO WHAT)")
        shapes[titleNumber-1].text = data_source
        shapes[titleNumber].text = shapes[titleNumber].text.replace('Category/Sector', key)
        shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
        shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        shapes[headereNumber].text_frame.paragraphs[0].font.size = Pt(16)
        shapes[headereNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
 
        # Create and retrieve table and chart shapes
        tables, charts = createTableAndChart(slide.shapes)
        chart = charts[0].chart
        table = tables[0].table
       
        # Get the list of markets (sources)
        market_list = list(df['SOURCE'].unique())
       
        # Adjust the number of columns in the table
        num_cols_to_remove = len(table.columns) - len(market_list)
        table_width = get_table_width(table)
        for _ in range(num_cols_to_remove):
            if len(table.columns):  # Skip removing the first row if there is more than one row
                col = table.columns[0]
                remove_col(table, col)
        if num_cols_to_remove:
            total_col_width = table_width
            num_cols = len(table.columns)
            if num_cols > 0:
                cell_width = total_col_width / num_cols
                for col in range(0, table.columns.__len__()):
                    table.columns[col].width = int(cell_width)
 
        # Populate the table with market names
        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                value = market_list[column_num]
                cell.text = str(value)
                set_cell_font(cell, 'Nexa Bold', 8)
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(87, 85, 85)
                cell.text_frame.paragraphs[0].font.bold = False
       
        # Prepare chart data
        chart_data = BubbleChartData()
        chart_data.categories = [i for i in range((df['SOURCE'].nunique() * 2) + 1)]
        series = chart_data.add_series('Average Value Uplift')
        catPos = [i for i in range(1, len(chart_data.categories), 2)]
        filtered_brands_list=[]
        # Populate the chart with value uplift data
        for i, source in enumerate(df['SOURCE'].unique()):
            dfSource = df[df['SOURCE'] == source]
            dfSource = dfSource[~dfSource['Top Brands'].str.contains('Others', case=False)]
            dfSource = dfSource[dfSource['Value Share'] > 0.01]
            dfSource = dfSource.sort_values(by='Value Share', ascending=False)
            series.has_data_labels = True
            start = len(series)
            brands = dfSource['Top Brands'].unique()
            if isinstance(dfSource, pd.DataFrame) and 'Top Brands' in dfSource.columns:
                filtered_brands_list.append(dfSource[['Top Brands']].copy())
            all_top_brands = pd.concat(filtered_brands_list, ignore_index=True)
            for brand in brands:
                if normalized:
                    series.add_data_point(catPos[i], dfSource[dfSource['Top Brands'] == brand]['Value Uplift (v. base) Normalized'].unique()[0], dfSource[dfSource['Top Brands'] == brand]['Promo Value'].unique()[0])
                else: 
                    series.add_data_point(catPos[i], dfSource[dfSource['Top Brands'] == brand]['Value Uplift (v. base)'].unique()[0], dfSource[dfSource['Top Brands'] == brand]['Promo Value'].unique()[0])
            chart.replace_data(chart_data)
            value_axis = chart.category_axis
            value_axis.minimum_scale = 0
            value_axis.maximum_scale = (df['SOURCE'].nunique() * 2)
 
            chart.replace_data(chart_data)
 
            xlsx_file=BytesIO()
            with chart_data._workbook_writer._open_worksheet(xlsx_file) as (workbook, worksheet):
                chart_data._workbook_writer._populate_worksheet(workbook, worksheet)
                worksheet.write(0, 4, "labels")
                worksheet.write_column(1, 4,all_top_brands['Top Brands'], None)
            chart._workbook.update_from_xlsx_blob(xlsx_file.getvalue())
            clibrands=all_top_brands['Top Brands']
            stop = len(chart.series[0].points)
            for j in range(start, stop):
                point = chart.series[0].points[j]
                point.format.fill.solid()
                if clibrands[j % len(clibrands)] in client_brands:
                    point.format.fill.fore_color.rgb = RGBColor(126, 202, 196)
                else:
                    point.format.fill.fore_color.rgb = RGBColor(230, 229, 229)
 
        # Set the axis scale for the chart
        value_axis = chart.category_axis
        value_axis.minimum_scale = 0
        value_axis.maximum_scale = (df['SOURCE'].nunique() * 2)
 


In [None]:
# def valueUpliftRetailer(prs, concated, numOfDuplicates, position=0):
#     """
#     This function updates a PowerPoint presentation with value uplift by brand for different categories/sectors.
#     It modifies the slide shapes and populates tables and charts with the value uplift data.
 
#     Parameters:
#     prs (Presentation): The PowerPoint presentation object to modify.
#     concated (dict): A dictionary where keys are category/sector names and values are DataFrames containing value uplift data.
#     numOfDuplicates (int): The number of slides to duplicate and update.
#     position (int, optional): The starting slide position in the presentation. Defaults to 0.
 
#     Returns:
#     Replace the slides with new data
#     """
#     for key, slide_num in zip(concated, range(numOfDuplicates)):
#         # Get the DataFrame for the current category/sector
#         df = concated[key]
#         df = df[df['Promo Value'] > 0].reset_index(drop=True)
#         df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
 
#         # Get the current slide and its shapes
#         slide = prs.slides[slide_num + position]
#         shapes = slide.shapes
       
#         # Update the title shape with the category/sector name
#         titleNumber = get_shape_number(shapes, "Value Uplift by brand | Category/Sector | P12M")
#         headereNumber = get_shape_number(shapes, "Value uplift by retailer by brand (Replace With SO WHAT)")
#         shapes[titleNumber-1].text = data_source
#         shapes[titleNumber].text = shapes[titleNumber].text.replace('Category/Sector', key)
#         shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
#         shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
#         shapes[headereNumber].text_frame.paragraphs[0].font.size = Pt(16)
#         shapes[headereNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
 
#         # Create and retrieve table and chart shapes
#         tables, charts = createTableAndChart(slide.shapes)
#         chart = charts[0].chart
#         table = tables[0].table
       
#         # Get the list of markets (sources)
#         market_list = list(df['SOURCE'].unique())
       
#         # Adjust the number of columns in the table
#         num_cols_to_remove = len(table.columns) - len(market_list)
#         table_width = get_table_width(table)
#         for _ in range(num_cols_to_remove):
#             if len(table.columns):  # Skip removing the first row if there is more than one row
#                 col = table.columns[0]
#                 remove_col(table, col)
#         if num_cols_to_remove:
#             total_col_width = table_width
#             num_cols = len(table.columns)
#             if num_cols > 0:
#                 cell_width = total_col_width / num_cols
#                 for col in range(0, table.columns.__len__()):
#                     table.columns[col].width = int(cell_width)
 
#         # Populate the table with market names
#         for row_number, row in enumerate(table.rows, start=0):
#             for column_num, cell in enumerate(row.cells):
#                 value = market_list[column_num]
#                 cell.text = str(value)
#                 set_cell_font(cell, 'Nexa Bold', 8)
#                 cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
#                 cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(87, 85, 85)
#                 cell.text_frame.paragraphs[0].font.bold = False
       
#         # Prepare chart data 
#         chart_data = BubbleChartData()
#         chart_data.categories = [i for i in range((df['SOURCE'].nunique() * 2) + 1)]
#         series = chart_data.add_series('Average Value Uplift')
#         catPos = [i for i in range(1, len(chart_data.categories), 2)]
 
#         # Populate the chart with value uplift data
#         filtered_brands_list = []

#         for i, source in enumerate(df['SOURCE'].unique()):
#             dfSource = df[df['SOURCE'] == source]
#             dfSource = dfSource[~dfSource['Top Brands'].str.contains('Others', case=False)]
#             dfSource = dfSource[dfSource['Value Share'] > 0.01]
#             df_client = selectClientBrands(dfSource,'Top Brands', 'Value Share')
#             number_of_brands_needed = max(10 - len(df_client),0)
#             dfSource = dfSource[~dfSource['Top Brands'].isin(client_brands)]
#             dfSource = dfSource.sort_values(by='Value Share', ascending=False).head(number_of_brands_needed)
#             dfSource = pd.concat([dfSource, df_client], ignore_index=True)
#             series.has_data_labels = True
#             start = len(series)
#             brands = dfSource['Top Brands'].unique()
#              # ✅ Collect filtered brands
#             if isinstance(dfSource, pd.DataFrame) and 'Top Brands' in dfSource.columns:
#                 filtered_brands_list.append(dfSource[['Top Brands']].copy())

#             all_top_brands = pd.concat(filtered_brands_list, ignore_index=True)

#             for brand in brands:
#                 if normalized:
#                     series.add_data_point(catPos[i], dfSource[dfSource['Top Brands'] == brand]['Value Uplift (v. base) Normalized'].unique()[0], dfSource[dfSource['Top Brands'] == brand]['Promo Value'].unique()[0])
#                 else: 
#                     series.add_data_point(catPos[i], dfSource[dfSource['Top Brands'] == brand]['Value Uplift (v. base)'].unique()[0], dfSource[dfSource['Top Brands'] == brand]['Promo Value'].unique()[0])
#             chart.replace_data(chart_data)
#             value_axis = chart.category_axis
#             value_axis.minimum_scale = 0
#             value_axis.maximum_scale = (df['SOURCE'].nunique() * 2)
 
#             # chart.replace_data(chart_data)
            
#             # all_top_brands = pd.concat(filtered_brands_list, ignore_index=True)
            
#             # print(f"{key}: All Top Brands after filtering and concat:\n", all_top_brands['Top Brands'])
#             xlsx_file=BytesIO()
#             with chart_data._workbook_writer._open_worksheet(xlsx_file) as (workbook, worksheet):
#                 chart_data._workbook_writer._populate_worksheet(workbook, worksheet)
#                 worksheet.write(0, 4, "labels")
#                 worksheet.write_column(1, 4,all_top_brands['Top Brands'], None)
#             chart._workbook.update_from_xlsx_blob(xlsx_file.getvalue())
 
#             stop = len(chart.series[0].points)
#             clibrands=all_top_brands['Top Brands']
#             for j in range(start, stop):
#                 point = chart.series[0].points[j]
#                 point.format.fill.solid()
#                 if clibrands[j % len(clibrands)] in client_brands:
#                     point.format.fill.fore_color.rgb = RGBColor(126, 202, 196)
#                 else:
#                     point.format.fill.fore_color.rgb = RGBColor(230, 229, 229)
 
#         # Set the axis scale for the chart
#         value_axis = chart.category_axis
#         value_axis.minimum_scale = 0
#         value_axis.maximum_scale = (df['SOURCE'].nunique() * 2)
 


### Slide 5

In [53]:
def VolumeUplift(prs, modified_promotionProductsP12M, numOfDuplication, position=0):
    """
    This function updates a PowerPoint presentation with volume uplift data by product for different categories.
    It modifies the slide shapes and populates tables and charts with the volume uplift data.
 
    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    modified_promotionProductsP12M (dict): A dictionary where keys are category names and values are DataFrames containing volume uplift data.
    numOfDuplication (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.
 
    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(modified_promotionProductsP12M, range(numOfDuplication)):
        # Get the current slide and its shapes
        slide = prs.slides[slide_num + position]
        df = modified_promotionProductsP12M[key]
 
        # Filter and sort the DataFrame
        df = df[df[f'{prodORitem}'] != '']
        df = df[df['Promo Sales'] >= 10000]
        df = df.sort_values(by='Promo Value', ascending=False).reset_index(drop=True)
       
        shapes = slide.shapes
       
        # Update the title and data source text
        headerNumber = get_shape_number(shapes, "Volume Uplift vs discount depth (Replace With SO WHAT)")
        titleNumber = get_shape_number(shapes, "Volume Uplift vs discount depth | By Product | Category | Coop Alleanza | P12M")
        datasourcenum = get_shape_number(shapes, "DATA SOURCE: Trade Panel/Retailer Data | Ending Dec 2022")
        shapes[datasourcenum].text = data_source
        shapes[titleNumber].text = shapes[titleNumber].text.replace('Category', key.split(' | ')[1]).replace('Coop Alleanza', key.split(' | ')[0])
        shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
        shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
        shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
        shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
       
        # Create and retrieve table and chart shapes
        tables, charts = createTableAndChart(slide.shapes)
        chart = charts[0].chart
 
        # Prepare chart data
        category = df[f'{prodORitem}'].tolist()
        if normalized:
            y_values = df['Volume Uplift (v. Base) Normalized'].tolist()
        else:
            y_values = df['Volume Uplift (v. Base)'].tolist()
        x_values = df['Discount Depth (%)'].tolist()
        chart_data = XyChartData()
        series = chart_data.add_series('Scatter')
       
        # Populate the chart with volume uplift data
        for i in range(len(category)):
            series.add_data_point(x_values[i],y_values[i])
            series.has_data_labels = True
       
        chart.replace_data(chart_data)
       
        # Set data labels for the chart points
        for k, point in enumerate(chart.series[0].points):
            data_label = point.data_label
            data_label.text_frame.text = category[k]
            data_label.position = XL_LABEL_POSITION.ABOVE
       
        chart.replace_data(chart_data)
        value_axis = chart.value_axis
        if len(y_values) ==0:
            y_values=[0]
        min_value = min(y_values)
        max_value = max(y_values)
        value_axis.minimum_scale = 0.8 * min_value
        value_axis.maximum_scale = 1.2 * max_value
        if len(x_values) ==0:
            x_values=[0]
        category_axis = chart.category_axis
        min_value = min(x_values)
        max_value = max(x_values)
        category_axis.minimum_scale = 0.8 * min_value
        category_axis.maximum_scale = 1.2 * max_value
        
        xlsx_file = BytesIO()
        with chart_data._workbook_writer._open_worksheet(xlsx_file) as (workbook, worksheet):
            chart_data._workbook_writer._populate_worksheet(workbook, worksheet)
            worksheet.write(0, 3, "Promo Value")
            worksheet.write_column(1,3, df['Promo Value'].to_list(), None)
            worksheet.write(0, 4, f"{prodORitem}")
            worksheet.write_column(1, 4, df[f'{prodORitem}'].to_list(), None)
 
        chart._workbook.update_from_xlsx_blob(xlsx_file.getvalue())

### Slide 6

In [None]:
def ValueUpliftvsPromoEfficiencyQuadrant(prs, new_modified_promotionProductsP12M, numOfDuplication, position=0):
    """
    This function updates a PowerPoint presentation with value uplift versus promo efficiency data
    for different products, categories, and retailers. It modifies the slide shapes and populates
    tables and charts with the relevant data.
 
    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    modified_promotionProductsP12M (dict): A dictionary where keys are category/sector/retailer names
                                           and values are DataFrames containing value uplift data.
    numOfDuplication (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.
 
    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(new_modified_promotionProductsP12M, range(numOfDuplication)):
        # Get the current slide and its shapes
        slide = prs.slides[slide_num + position]
        df = new_modified_promotionProductsP12M[key]
        if normalized:
             y_avg = df["Value Uplift (v. base) Normalized"].mean()
        else:
            y_avg = df["Value Uplift (v. base)"].mean()
        x_avg = df["Trade Effectiveness"].mean()

        shapes = slide.shapes
       
        # Update the title and data source text
        titleNumber = get_shape_number(shapes, 'Value Uplift vs Promo Efficiency | By Product | Category/Sector | Retailer | P12M\nBubble Size: Promo Sales\n')
        headerNumber = get_shape_number(shapes, 'Value Uplift vs Promo Efficiency Quadrant (Replace With SO WHAT)')
        datasourcenum = get_shape_number(shapes, "DATA SOURCE: Trade Panel/Retailer Data | Ending Dec 2022")
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[titleNumber].text_frame.paragraphs[0].runs[0].text = shapes[5].text_frame.paragraphs[0].runs[0].text.replace('Category/Sector', key.split(' | ')[1]).replace('Retailer', key.split(' | ')[0])
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[1].font.name = 'Nexa Bold (Headings)'
            shapes[titleNumber].text_frame.paragraphs[1].font.size = Pt(12)
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
       
        # Create and retrieve table and chart shapes
        tables, charts = createTableAndChart(slide.shapes)
        chart = charts[0].chart
        table = tables[0].table
 
        # Adjust the number of rows in the table based on the DataFrame
        num_rows_to_remove = len(table.rows) - df.shape[0]
        #table_height = 3.85
        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=0)
       
        # Populate the table with product data
        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num == 0 and row_number < df.shape[0]:
                    value = row_number + 1
                    cell.text = str(value)
                    set_cell_font(cell, 'Nexa Book (Body)', 4)
                if column_num == 1 and row_number < df.shape[0]:
                    value = df[f'{prodORitem}'][row_number]
                    cell.text = str(value)
                    set_cell_font(cell, 'Nexa Book (Body)', 4)
       
        # Prepare bubble chart data
        chart_data = BubbleChartData()
        x_values = df['Trade Effectiveness'].tolist()
        if normalized:
                y_values = df['Value Uplift (v. base) Normalized'].tolist()
        else:
                y_values = df['Value Uplift (v. base)'].tolist()
        bubble_sizes = df['Promo Sales'].tolist()
        chart_data.categories = [c for c in list(df['index'])]
        series_1 = chart_data.add_series('Value Uplift')
 
        # Add data points to the bubble chart
        for i in range(len(x_values)):
            series_1.add_data_point(x_values[i], y_values[i], bubble_sizes[i])
       
        chart.replace_data(chart_data)
        xlsx_file = BytesIO()
 
        # Set data labels for the chart points
        for i, point in enumerate(chart.series[0].points):
            data_label = point.data_label
            data_label.has_text_frame = True
        with chart_data._workbook_writer._open_worksheet(xlsx_file) as (workbook, worksheet):
            chart_data._workbook_writer._populate_worksheet(workbook, worksheet)
            worksheet.write(0, 4, "labels")
            worksheet.write(0, 5, f"{prodORitem}")
            worksheet.write_column(1,5, df[f'{prodORitem}'].to_list(), None)
            # Write sequential numbers as labels starting from the first point in the series
            labels = [str(i + 1) for i, _ in enumerate(chart.series[0].points)]  # Create a list of labels as strings
            worksheet.write_column(1, 4, labels)  # Write the list of labels to the worksheet column
           
 
        chart._workbook.update_from_xlsx_blob(xlsx_file.getvalue())
        data_label.text_frame.text = str(i + 1)
        data_label.position = XL_LABEL_POSITION.CENTER
 
        value_axis = chart.value_axis
        if y_avg<1:
            vertical_cross_value = 1
            value_axis.crosses = XL_AXIS_CROSSES.CUSTOM
            value_axis.crosses_at = vertical_cross_value
            value_axis.maximum_scale = 1.2
        else:
            vertical_cross_value = y_avg
            value_axis.crosses = XL_AXIS_CROSSES.CUSTOM
            value_axis.crosses_at = vertical_cross_value
           
 
        category_axis = chart.category_axis
        if x_avg <1:
            horizontal_cross_value = 1
            category_axis.crosses = XL_AXIS_CROSSES.CUSTOM
            category_axis.crosses_at = horizontal_cross_value
            category_axis.maximum_scale = 1.2
        else:
            horizontal_cross_value = x_avg
            category_axis.crosses = XL_AXIS_CROSSES.CUSTOM
            category_axis.crosses_at = horizontal_cross_value
 

### Slide 7

In [None]:
def top20(prs, new_modified_promotionProductsP12M, numOfDuplication, position=0, pos='before'):
    """
    This function updates a PowerPoint presentation with the top 20 promotions by value and incremental value
    for different categories and retailers. It modifies the slide shapes and populates tables with the relevant data.

    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    modified_promotionProductsP12M (dict): A dictionary where keys are category/sector/retailer names 
                                           and values are DataFrames containing promotion data.
    numOfDuplication (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.
    pos (str, optional): Positioning of the currency symbol, either 'before' or 'after'. Defaults to 'before'.

    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(new_modified_promotionProductsP12M, range(numOfDuplication)):
        # Get the current slide and its shapes
        slide = prs.slides[slide_num + position]
        df = new_modified_promotionProductsP12M[key]
        df = df.head(20)
        shapes = prs.slides[slide_num + position].shapes

        # Update the title and data source text
        titleNumber = get_shape_number(shapes, "Top 20 Promotions By Value and Incremental | Carrefour | P12M")
        headerNumber = get_shape_number(shapes, 'Top 20 promotions (Replace With SO WHAT)')
        datasourcenum = get_shape_number(shapes, "Data Source | Trade Panel | Ending October 2022")
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Carrefour', key)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Create and retrieve table and chart shapes
        tables, charts = createTableAndChart(slide.shapes)
        table = tables[0].table

        # Adjust the number of rows in the table based on the DataFrame
        num_rows_to_remove = len(table.rows) - df.shape[0] - 1
        #table_height = 3.84
        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=1)

        # Populate the table with promotion data
        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num == 0 and row_number > 0:
                    value = df['Top Brands'][row_number - 1]
                    cell.text = str(value)
                    set_cell_font(cell, 'Nexa Book', 6)
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
                elif column_num == 1 and row_number > 0:
                    value = df[f'{prodORitem}'][row_number - 1]
                    cell.text = str(value)
                    set_cell_font(cell, 'Nexa Book', 6)
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
                elif column_num == 2 and row_number > 0:
                    value = df['Discount Depth (%)'][row_number - 1]
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 3 and row_number > 0:
                    value = df['VSOD'][row_number - 1]
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 4 and row_number ==0:
                        cell.text = 'Shelf Price/Unit ('+currency +')'
                        set_cell_font(cell, 'Nexa Bold', 7)
                elif column_num == 4 and row_number > 0:
                    value = df['Base Price/Unit'][row_number - 1]
                    cell.text = ' {:.2f}'.format(float(value)) if pos == 'before' else '{:.2f} '.format(float(value)) 
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 5 and row_number ==0:
                    cell.text = 'Promo Price/Unit ('+currency +')'
                    set_cell_font(cell, 'Nexa Bold', 7)
                elif column_num == 5 and row_number > 0:
                    value = df['Promo Price/Unit'][row_number - 1]
                    cell.text = ' {:.2f}'.format(float(value)) if pos == 'before' else '{:.2f} '.format(float(value))
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 6 and row_number ==0:
                    cell.text = 'Promo Value ('+currency +')'
                    set_cell_font(cell, 'Nexa Bold', 7)
                elif column_num == 6 and row_number > 0:
                    value = str(df['Promo Value'][row_number - 1])
                    #cell.text = add_apostrophes(value) + ' ' + currency
                    cell.text = format_number(value, use_decimals=False, decimals=0, use_apostrophes=True, currency_symbol=None, currency_before=False)
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 7 and row_number > 0:
                    if normalized:
                        value = df['Value Uplift (v. base) Normalized'][row_number - 1]
                    else:
                        value = df['Value Uplift (v. base)'][row_number - 1]
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 8 and row_number > 0:
                    if normalized:
                        value = df['Volume Uplift (v. Base) Normalized'][row_number - 1]
                    else:
                        value = df['Volume Uplift (v. Base)'][row_number - 1]  
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 9 and row_number ==0:
                        cell.text = 'Incr Value ('+currency +')'
                        set_cell_font(cell,'Nexa Bold', 7)
                elif column_num == 9 and row_number > 0:
                    value = df['Incr Value'][row_number - 1]
                    cell.text = str(round(value)) 
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 10 and row_number > 0:
                    value = df['Trade Effectiveness'][row_number - 1]
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 11 and row_number == 0:
                    value = 'Gross Margin %'
                    cell.text = value
                    set_cell_font(cell, 'Nexa Bold', 7)
                elif column_num == 11 and row_number > 0:
                    value = round(df['Gross Margin %'][row_number - 1] *100)
                    if value == 0:
                        cell.text = ''
                    else:
                        cell.text = str(value) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                
                # Adjust the position of the currency symbol if necessary
                if pos == 'after' and row_number > 0 and column_num in (5, 6, 9):
                    cell.text = cell.text.split(' ')[1] + ' ' + cell.text.split(' ')[0]


### Slide 8

In [58]:
def top20Client(prs, top20clientonly, numOfDuplication, position=0, pos='before'):
    """
    This function updates a PowerPoint presentation with the top 20 promotions by value and incremental value
    for different client brands, categories, and retailers. It modifies the slide shapes and populates tables with the relevant data.

    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    modified_promotionProductsP12M (dict): A dictionary where keys are category/sector/retailer names 
                                           and values are DataFrames containing promotion data.
    numOfDuplication (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.
    pos (str, optional): Positioning of the currency symbol, either 'before' or 'after'. Defaults to 'before'.

    Returns:
    Replace the slides with new data
    """
    slide_num = -1
    for key,slide_num in zip(top20clientonly,range(numOfDuplication)):
        slide = prs.slides[slide_num + position]
        df = top20clientonly[key]
        
        shapes = prs.slides[slide_num + position].shapes

        # Update the title and data source text
        titleNumber = get_shape_number(shapes, "Top 20 Promotions By Value and Incremental | Carrefour | Findus | P12M")
        headerNumber = get_shape_number(shapes, 'Top 20 promotions CLIENT ONLY (Replace With SO WHAT)')
        datasourcenum = get_shape_number(shapes, "Data Source | Trade Panel | Ending October 2022")
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Carrefour', key.split('|')[0]).replace('Findus', key.split('|')[1])
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Create and retrieve table and chart shapes
        tables, charts = createTableAndChart(slide.shapes)
        table = tables[0].table

        # Adjust the number of rows in the table based on the DataFrame
        num_rows_to_remove = len(table.rows) - df.shape[0] - 1
        #table_height = 3.84
        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=1)

        # Populate the table with promotion data
        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num == 0 and row_number > 0:
                    value = df['Top Brands'][row_number - 1]
                    cell.text = str(value)
                    set_cell_font(cell, 'Nexa Book', 6)
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
                elif column_num == 1 and row_number > 0:
                    value = df[f'{prodORitem}'][row_number - 1]
                    cell.text = str(value)
                    set_cell_font(cell, 'Nexa Book', 6)
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
                elif column_num == 2 and row_number > 0:
                    value = df['Discount Depth (%)'][row_number - 1]
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 3 and row_number > 0:
                    value = df['VSOD'][row_number - 1]
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 4 and row_number ==0:
                    cell.text = 'Shelf Price/Unit ('+currency +')'
                    set_cell_font(cell, 'Nexa Bold', 7)
                elif column_num == 4 and row_number > 0:
                    value = df['Base Price/Unit'][row_number - 1]
                    cell.text = ' {:.2f}'.format(float(value)) if pos == 'before' else '{:.2f} '.format(float(value)) 
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 5 and row_number ==0:
                    cell.text = 'Promo Price/Unit ('+currency +')'
                    set_cell_font(cell, 'Nexa Bold', 7)
                elif column_num == 5 and row_number > 0:
                    value = df['Promo Price/Unit'][row_number - 1]
                    cell.text = ' {:.2f}'.format(float(value)) if pos == 'before' else '{:.2f} '.format(float(value)) 
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 6 and row_number ==0:
                    cell.text = 'Promo Value ('+currency +')'
                    set_cell_font(cell, 'Nexa Bold', 7)
                elif column_num == 6 and row_number > 0:
                    value = str(df['Promo Value'][row_number - 1])
                    #cell.text = add_apostrophes(value) + ' ' + currency
                    cell.text = format_number(value, use_decimals=False, decimals=0, use_apostrophes=True, currency_symbol=None, currency_before=False)
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 7 and row_number > 0:
                    if normalized:
                        value = df['Value Uplift (v. base) Normalized'][row_number - 1]
                    else:
                        value = df['Value Uplift (v. base)'][row_number - 1]
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 8 and row_number > 0:
                    if normalized:
                        value = df['Volume Uplift (v. Base) Normalized'][row_number - 1]
                    else:
                        value = df['Volume Uplift (v. Base)'][row_number - 1]
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 9 and row_number ==0:
                        cell.text = 'Incr Value ('+currency +')'
                        set_cell_font(cell,'Nexa Bold', 7)
                elif column_num == 9 and row_number > 0:
                    value = df['Incr Value'][row_number - 1]
                    cell.text = str(round(value))
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 10 and row_number > 0:
                    value = df['Trade Effectiveness'][row_number - 1]
                    cell.text = str(round(value * 100)) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)
                elif column_num == 11 and row_number == 0:
                    value = 'Gross Margin %'
                    cell.text = value
                    set_cell_font(cell, 'Nexa Bold', 7)
                elif column_num == 11 and row_number > 0:
                    value = round(df['Gross Margin %'][row_number - 1] *100)
                    cell.text = '' if value == 0 else str(value) + '%'
                    set_cell_font(cell, 'Nexa Book', 6)

                # Adjust the position of the currency symbol if necessary
                if pos == 'after' and row_number > 0 and column_num in (5, 6, 9):
                    cell.text = cell.text.split(' ')[1] + ' ' + cell.text.split(' ')[0]


### Slide 9

In [59]:
def bot20Client(prs, bottom20clientonly, numOfDuplication, position=0, pos='before'):
    """
    This function updates a PowerPoint presentation with the bottom 20 promotions by value and incremental value
    for different client brands, categories, and retailers. It modifies the slide shapes and populates tables with the relevant data.

    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    modified_promotionProductsP12M (dict): A dictionary where keys are category/sector/retailer names 
                                           and values are DataFrames containing promotion data.
    numOfDuplication (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.
    pos (str, optional): Positioning of the currency symbol, either 'before' or 'after'. Defaults to 'before'.

    Returns:
    Replace the slides with new data
    """
    slide_num = -1
    for key,slide_num in zip(bottom20clientonly,range(numOfDuplication)):
            slide = prs.slides[slide_num + position]
            df = bottom20clientonly[key]

            
            shapes = prs.slides[slide_num + position].shapes

            # Update the title and data source text
            titleNumber = get_shape_number(shapes, "Bottom 20 Promotions By Value and Incremental | Carrefour | Findus | P12M")
            headerNumber = get_shape_number(shapes, 'Bottom 20 promotions CLIENT ONLY (Replace With SO WHAT)')
            datasourcenum = get_shape_number(shapes, "Data Source | Trade Panel | Ending October 2022")
            if titleNumber is not None:
                shapes[datasourcenum].text = data_source
                shapes[titleNumber].text = shapes[titleNumber].text.replace('Carrefour', key.split('|')[0]).replace('Findus', key.split('|')[1])
                shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
                shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
                shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
                shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

            # Create and retrieve table and chart shapes
            tables, charts = createTableAndChart(slide.shapes)
            table = tables[0].table

            # Adjust the number of rows in the table based on the DataFrame
            num_rows_to_remove = len(table.rows) - df.shape[0] - 1
            #table_height = 3.84
            table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=1)

            # Populate the table with promotion data
            for row_number, row in enumerate(table.rows, start=0):
                for column_num, cell in enumerate(row.cells):
                    if column_num == 0 and row_number > 0:
                        value = df['Top Brands'][row_number - 1]
                        cell.text = str(value)
                        set_cell_font(cell, 'Nexa Book', 6)
                        cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
                    elif column_num == 1 and row_number > 0:
                        value = df[f'{prodORitem}'][row_number - 1]
                        cell.text = str(value)
                        set_cell_font(cell, 'Nexa Book', 6)
                        cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT
                    elif column_num == 2 and row_number > 0:
                        value = df['Discount Depth (%)'][row_number - 1]
                        cell.text = str(round(value * 100)) + '%'
                        set_cell_font(cell, 'Nexa Book', 6)
                    elif column_num == 3 and row_number > 0:
                        value = df['VSOD'][row_number - 1]
                        cell.text = str(round(value * 100)) + '%'
                        set_cell_font(cell, 'Nexa Book', 6)
                    elif column_num == 4 and row_number ==0:
                        cell.text = 'Shelf Price/Unit ('+currency +')'
                        set_cell_font(cell, 'Nexa Bold', 7)
                    elif column_num == 4 and row_number > 0:
                        value = df['Base Price/Unit'][row_number - 1]
                        cell.text = ' {:.2f}'.format(float(value)) if pos == 'before' else '{:.2f} '.format(float(value)) 
                        set_cell_font(cell, 'Nexa Book', 6)
                    elif column_num == 5 and row_number ==0:
                        cell.text = 'Promo Price/Unit ('+currency +')'
                        set_cell_font(cell, 'Nexa Bold', 7)
                    elif column_num == 5 and row_number > 0:
                        value = df['Promo Price/Unit'][row_number - 1]
                        cell.text = ' {:.2f}'.format(float(value)) if pos == 'before' else '{:.2f} '.format(float(value))
                        set_cell_font(cell, 'Nexa Book', 6)
                    elif column_num == 6 and row_number ==0:
                        cell.text = 'Promo Value ('+currency +')'
                        set_cell_font(cell, 'Nexa Bold', 7)
                    elif column_num == 6 and row_number > 0:
                        value = str(df['Promo Value'][row_number - 1])
                        #cell.text = add_apostrophes(value) + ' ' + currency
                        cell.text = cell.text = format_number(value, use_decimals=False, decimals=0, use_apostrophes=True, currency_symbol=None, currency_before=False) 
                        set_cell_font(cell, 'Nexa Book', 6)
                    elif column_num == 7 and row_number > 0:
                        if normalized:
                            value = df['Value Uplift (v. base) Normalized'][row_number - 1]
                        else:
                            value = df['Value Uplift (v. base)'][row_number - 1]
                        cell.text = str(round(value * 100)) + '%'
                        set_cell_font(cell, 'Nexa Book', 6)
                    elif column_num == 8 and row_number > 0:
                        if normalized:
                            value = df['Volume Uplift (v. Base) Normalized'][row_number - 1]
                        else:
                            value = df['Volume Uplift (v. Base)'][row_number - 1]
                        cell.text = str(round(value * 100)) + '%'
                        set_cell_font(cell, 'Nexa Book', 6)
                    elif column_num == 9 and row_number ==0:
                        cell.text = 'Incr Value ('+currency +')'
                        set_cell_font(cell,'Nexa Bold', 7)
                    elif column_num == 9 and row_number > 0:
                        value = df['Incr Value'][row_number - 1]
                        cell.text = str(round(value))
                        set_cell_font(cell, 'Nexa Book', 6)
                    elif column_num == 10 and row_number > 0:
                        value = df['Trade Effectiveness'][row_number - 1]
                        cell.text = str(round(value * 100)) + '%'
                        set_cell_font(cell, 'Nexa Book', 6)
                    elif column_num == 11 and row_number == 0:
                        value = 'Gross Margin %'
                        cell.text = value
                        set_cell_font(cell, 'Nexa Bold', 7)
                    elif column_num == 11 and row_number > 0:
                        value = round(df['Gross Margin %'][row_number - 1] *100)
                        cell.text = '' if value == 0 else str(value) + '%'
                        set_cell_font(cell, 'Nexa Book', 6)

                    # Adjust the position of the currency symbol if necessary
                    if pos == 'after' and row_number > 0 and column_num in (5, 6, 9):
                        cell.text = cell.text.split(' ')[1] + ' ' + cell.text.split(' ')[0]


### Slide 10

In [None]:
def newVolumeSold(prs, data, position=0, parent='Category', child = 'Sector'):
    slidenum =0
    for key  in data.keys():
            df = data[key]
            if df.shape[0]>0:
                client_manuf_brands= client_manuf + client_brands
                for client in client_manuf_brands:
                    if client in df.columns:
                        slide = prs.slides[slidenum+position]
                        total = df[df[parent].str.contains(r'Total$', case=False, na=False)]
                        dfwithouttotal=df[~df[parent].str.contains(r'Total$')]
                        df= pd.concat([total, dfwithouttotal]).reset_index(drop=True)

                        # print(df)
                        shapes = slide.shapes
                        titleNumber = get_shape_number(shapes, "Volume Sold on Deal | National | P12M")
                        titleNumber1 = get_shape_number(shapes, "Data Source l Trade Panel l Ending March 2022")
                        if titleNumber is not None:
                            shapes[titleNumber1].text = data_source
                            shapes[titleNumber].text = shapes[titleNumber].text.replace('National', key.split(' | ')[1])
                            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
                            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
                            shapes[titleNumber + 1].text_frame.paragraphs[0].font.size = Pt(16)
                            shapes[titleNumber + 1].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

                        tables, charts = createTableAndChart(slide.shapes)
                        table = tables[0].table
                        num_rows_to_remove = len(table.rows) - df.shape[0] - 1
                        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=1)

                        for row_number, row in enumerate(table.rows, start=0):
                            if row_number ==1:
                                for cell in table.rows[1].cells:
                                    cell.fill.solid()
                                    cell.fill.fore_color.rgb = RGBColor(229, 244, 243)
                            for column_num, cell in enumerate(row.cells):
                                if column_num == 0 and row_number == 1:
                                    if parent == 'Category' :
                                        cell.text = str(categories[0])
                                    else:
                                        cell.text = str(df[parent][row_number - 1]).replace('Total', '').strip()
                                    set_cell_font(cell, 'Nexa Bold', 8)
                                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                                elif column_num == 0 and row_number > 1:
                                    #if parent == 'Category'  :
                                    #    value = df['Sector'][row_number - 1]
                                    #else:
                                    value = df[child][row_number - 1]
                                    cell.text = str(value)
                                    set_cell_font(cell, 'Nexa Bold', 8)
                                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                              
                                elif column_num == 1 and row_number == 0:
                                    value = f"{parent}\nVolume Sold on Deal"
                                    cell.text = str(value)
                                    set_cell_font(cell, 'Nexa Bold', 9)
                                    for i, par in enumerate([0, 1]):
                                        cell.text_frame.paragraphs[i].alignment = PP_ALIGN.CENTER
                                        cell.text_frame.paragraphs[i].font.color.rgb = RGBColor(87, 85, 85)

                                elif column_num == 2 and row_number == 0:
                                    value = f"{client}\nVolume Sold on Deal"
                                    cell.text = str(value)
                                    set_cell_font(cell, 'Nexa Bold', 9)
                                    for i, par in enumerate([0, 1]):
                                        cell.text_frame.paragraphs[i].alignment = PP_ALIGN.CENTER
                                        cell.text_frame.paragraphs[i].font.color.rgb = RGBColor(87, 85, 85)
                        # Prepare data for the charts
                        modified_list = [None if value == 0 else value for value in df[client].tolist()]
                        non_volume = [None if value == 1 else value for value in (1 - df[client]).tolist()]
                        cleaned_vsod = [0 if np.isnan(x) else x for x in df['VSOD'].tolist()]
                        cleaned_vsod_subtracted = [0 if np.isnan(x) else x for x in (1-df['VSOD']).tolist()]
            
                        chart1 = charts[0].chart
                        chart_data = CategoryChartData()
                        # print(key,df[child].tolist())                        
                        chart_data.categories = df[child].tolist()
                        chart_data.add_series('Volume Sold on Deal', cleaned_vsod)
                        chart_data.add_series('', cleaned_vsod_subtracted)
                        chart1.replace_data(chart_data)

                        chart2 = charts[1].chart
                        chart_data = CategoryChartData()
                        chart_data.categories = df[child].tolist()
                        chart_data.add_series('Volume Sold on Deal', modified_list)
                        chart_data.add_series('', non_volume)
                        chart2.replace_data(chart_data)
                        
                        slidenum+=1


In [62]:
def VolumeSold(prs, VSOD_data, duplication, position=0):
    """
    This function updates a PowerPoint presentation with volume sold data for different client brands and categories.
    It modifies the slide shapes and populates tables and charts with the relevant data.

    Parameters:
    prs (Presentation): The PowerPoint presentation object to modify.
    VSOD_data (dict): A dictionary where keys are category/sector names and values are DataFrames containing volume sold data.
    duplication (int): The number of slides to duplicate and update.
    position (int, optional): The starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    slide_num =0
    for (key, df), client in itertools.product(VSOD_data.items(), client_brands):
        slide = prs.slides[slide_num + position]
        final = VSOD_data[key]
        # Separate the 'Grand Total' row and other data
        if final['Sector'].str.contains('Grand').any():
            total_Index = final[final['Sector'] == 'Grand Total'].index[0]
        else:
            total_Index = final[final['Segment'] == 0].index[0]
        grand_total_row = final.loc[total_Index]
        final_no_total = final.drop(index=total_Index)
        df = pd.concat([grand_total_row.to_frame().T, final_no_total]).reset_index(drop=True)

        df['Sector'] = df['Sector'].str.replace('total', '', case=False).str.strip()
        df['Segment'] = df['Segment'].astype(str)
        df['Segment'] = df['Segment'].str.replace('total', '', case=False).str.strip()
        shapes = slide.shapes
        
        # Update the title and data source text
        titleNumber = get_shape_number(shapes, "Volume Sold on Deal | National | P12M")
        titleNumber1 = get_shape_number(shapes, "Data Source l Trade Panel l Ending March 2022")
        if titleNumber is not None:
            shapes[titleNumber1].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('National', key.split(' | ')[1])
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[titleNumber + 1].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[titleNumber + 1].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Create and retrieve table and chart shapes
        tables, charts = createTableAndChart(slide.shapes)
        table = tables[0].table

        # Adjust the number of rows in the table based on the DataFrame
        num_rows_to_remove = len(table.rows) - df.shape[0] - 1
        table_height = 3.84
        table = removeRowFromTable(table, num_rows_to_remove, table_height, rowToExclude=1)

        # Populate the table with volume sold data
        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num == 0 and row_number == 1:
                    if len(key.split(' | ')) ==2 :
                        cell.text = str(categories[0])
                    else:
                        cell.text = df['Sector'][row_number - 1]
                    set_cell_font(cell, 'Nexa Bold', 8)
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                elif column_num == 0 and row_number > 1:
                    if len(key.split(' | ')) ==2 :
                        value = df['Sector'][row_number - 1]
                    else:
                        value = df['Segment'][row_number - 1]
                    cell.text = str(value)
                    set_cell_font(cell, 'Nexa Bold', 8)
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                elif column_num == 2 and row_number == 0:
                    value = f"{client}\nVolume Sold on Deal"
                    cell.text = str(value)
                    set_cell_font(cell, 'Nexa Bold', 9)
                    for i, par in enumerate([0, 1]):
                        cell.text_frame.paragraphs[i].alignment = PP_ALIGN.CENTER
                        cell.text_frame.paragraphs[i].font.color.rgb = RGBColor(87, 85, 85)
        # Prepare data for the charts
        modified_list = [None if value == 0 else value for value in df[client].tolist()]
        non_volume = [None if value == 1 else value for value in (1 - df[client]).tolist()]
        # Update the first chart
        chart1 = charts[0].chart
        chart_data = CategoryChartData()
        chart_data.categories = df['Sector'].tolist()
        chart_data.add_series('Volume Sold on Deal', df['VSOD'].tolist())
        chart_data.add_series('', (1 - df['VSOD']).tolist())
        chart1.replace_data(chart_data)

        
        # Update the second chart
        chart1 = charts[1].chart
        chart_data = CategoryChartData()
        chart_data.categories = df['Sector'].tolist()
        chart_data.add_series('Volume Sold on Deal', modified_list)
        chart_data.add_series('', non_volume)
        chart1.replace_data(chart_data)
        slide_num+=1

### Slide 11

In [63]:
def PromoShare_vs_ValueShare(prs, modified_promotionBrandsP12M, numOfDuplication, position=0):
    """
    Update PowerPoint presentation with Promo Share vs. Value Share data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    modified_promotionBrandsP12M (dict): Dictionary containing modified promotion brands data.
    numOfDuplication (int): Number of slides to duplicate and update.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(modified_promotionBrandsP12M, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]
        df = modified_promotionBrandsP12M[key]

        # Filter out 'Others' and brands with low value share
        df = df[~df['Top Brands'].str.contains('Others', case=False)]
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        df = df[df['Value Share'] > 0.01]

        # Select client brands and additional brands needed to make 9 brands
        df_client = selectClientBrands(df, 'Top Brands','Value Share')
        number_of_brands_needed = max(9 - len(df_client),0)

        # Filter out client brands and select top additional brands
        df = df[~df['Top Brands'].isin(client_brands)]
        df = df.sort_values(by='Value Share', ascending=False).reset_index(drop=True)
        df = df.head(number_of_brands_needed)

        # Concatenate client brands and additional brands
        df = pd.concat([df, df_client], ignore_index=True)
        df = df.sort_values(by='Value Share', ascending=False).reset_index(drop=True)

        # Update slide title
        shapes = slide.shapes
        titleNumber = get_shape_number(shapes, "Promo Share vs. Fair Share | Category | National | P12M | Top Brands")
        datasourcenum = get_shape_number(shapes, "Data Source l Trade Panel l Ending March 2022")
        headerNumber = get_shape_number(shapes, 'Promo share vs Value Share (Replace With SO WHAT)')
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('National', key.split(' | ')[0]) \
                .replace('Category', key.split(' | ')[1])
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Update chart with Promo Share vs. Value Share data
        tables, charts = createTableAndChart(slide.shapes)
        chart = charts[0].chart
        chart_data = CategoryChartData()
        chart_data.categories = df['Top Brands'].head(9)
        chart_data.add_series("Value Share", df['Value Share'].head(9))
        chart_data.add_series("Promo Share", df["Promo Share"].head(9))
        chart_data.add_series("VSOD", df['VSOD'].head(9))
        chart.replace_data(chart_data)

        # Update table with Promo Share vs. Value Share data
        table = tables[0].table
        num_columns_to_remove = (len(table.columns) - df.shape[0]) - 1  # Specify the number of rows to remove from the end
        table = col_cell_remove(table, num_columns_to_remove)

        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num > 0:
                    value = (df['Promo Share'][column_num - 1] / df['Value Share'][column_num - 1]) * 100
                    cell.text = str(round(value))
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                    cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(87, 85, 85)

                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                cell.text_frame.paragraphs[0].font.size = Pt(8)
                cell.text_frame.paragraphs[0].font.bold = False


In [None]:
def PromoShare_vs_ValueShare_no(prs, modified_promotionBrandsP12M, numOfDuplication, position=0):
    """
    Update PowerPoint presentation with Promo Share vs. Value Share data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    modified_promotionBrandsP12M (dict): Dictionary containing modified promotion brands data.
    numOfDuplication (int): Number of slides to duplicate and update.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(modified_promotionBrandsP12M, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]
        df = modified_promotionBrandsP12M[key]

        # Filter out 'Others' and brands with low value share
        df = df[~df['Top Brands'].str.contains('Others', case=False)]
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        df = df[df['Value Share'] > 0.01]
        df = df.sort_values(by='Value Share', ascending=False).reset_index(drop=True)
 
        shapes = slide.shapes
        titleNumber = get_shape_number(shapes, "Promo Share vs. Fair Share | Category | National | P12M | Top Brands")
        datasourcenum = get_shape_number(shapes, "Data Source l Trade Panel l Ending March 2022")
        headerNumber = get_shape_number(shapes, 'Promo share vs Value Share (Replace With SO WHAT)')
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('National', key.split(' | ')[0]) \
                .replace('Category', key.split(' | ')[1])
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Update chart with Promo Share vs. Value Share data
        tables, charts = createTableAndChart(slide.shapes)
        chart = charts[0].chart
        chart_data = CategoryChartData()
        chart_data.categories = df['Top Brands'].head(9)
        chart_data.add_series("Value Share", df['Value Share'].head(9))
        chart_data.add_series("Promo Share", df["Promo Share"].head(9))
        chart_data.add_series("VSOD", df['VSOD'].head(9))
        chart.replace_data(chart_data)

        # Update table with Promo Share vs. Value Share data
        table = tables[0].table
        num_columns_to_remove = (len(table.columns) - df.shape[0]) - 1  # Specify the number of rows to remove from the end
        table = col_cell_remove(table, num_columns_to_remove)

        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num > 0:
                    value = (df['Promo Share'][column_num - 1] / df['Value Share'][column_num - 1]) * 100
                    cell.text = str(round(value))
                    cell.text_frame.paragraphs[0].font.name = 'Nexa Book'
                    cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(87, 85, 85)

                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
                cell.text_frame.paragraphs[0].font.size = Pt(8)
                cell.text_frame.paragraphs[0].font.bold = False


### Slide 12

In [None]:
def PromoSalesTotalSize(prs, newModifiedBrands, numOfDuplication, position=0):
    """
    Update PowerPoint presentation with Promo Sales by Total Size data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    newModifiedBrands (dict): Dictionary containing modified promotion brands data.
    numOfDuplication (int): Number of slides to duplicate and update.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(newModifiedBrands, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]
        df = newModifiedBrands[key]
        if key.split(' | ')[0] in  categories: 
            market=key.split(' | ')[1]
            cat=key.split(' | ')[0]
        else:
            market=key.split(' | ')[0]    
            cat=key.split(' | ')[1]

        # Filter out 'Others' and brands with low value share
        df = df[~df['Top Brands'].str.contains('Others', case=False)]
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        df = df[df['Value Share'] > 0.01]

        # Select client brands and additional brands needed to make 10 brands
        df_client = selectClientBrands(df,'Top Brands', 'Value Share')
        number_of_brands_needed = max(10 - len(df_client),0)

        # Filter out client brands and select top additional brands
        df = df[~df['Top Brands'].isin(client_brands)]
        df = df.sort_values(by='Value Share', ascending=False).head(number_of_brands_needed)

        # Concatenate client brands and additional brands
        df = pd.concat([df_client, df], ignore_index=True)

        # Update slide title
        shapes = slide.shapes
        titleNumber = get_shape_number(shapes, "% Promo sales by total size | Total Category | Carrefour | P12M")
        datasourcenum = get_shape_number(shapes, "DATA SOURCE: Trade Panel/Retailer Data | Ending Nov 2022")
        headerNumber = get_shape_number(shapes, 'Promo Sales by total size (Replace With SO WHAT)')
        
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[6].text.replace('Total Category', f'{cat}') \
                .replace('Carrefour', f'{market}')
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        category = df['Top Brands'].head(10).tolist()
        tables, charts = createTableAndChart(slide.shapes)

        # Update table with category data
        table = tables[0].table
        num_rows_to_remove = len(table.rows) - df.shape[0]
        #table_height = 3.88
        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=0)
        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num == 0:
                    cell.text = str(category[row_number])
                    set_cell_font(cell, 'Nexa Bold', 9)
                    cell.text_frame.paragraphs[0].font.bold = False
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT

        # Update chart with Promo Sales data
        chart = charts[0].chart
        chart_data = CategoryChartData()
        chart_data.categories = category
        chart_data.add_series('Recruitment', df['Recruitment'].head(10).tolist())
        chart_data.add_series('Consumption', df['Consumption'].head(10).tolist())
        chart.replace_data(chart_data)


In [None]:
def PromoSalesTotalSize_no(prs, newModifiedBrands, numOfDuplication, position=0):
    """
    Update PowerPoint presentation with Promo Sales by Total Size data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    newModifiedBrands (dict): Dictionary containing modified promotion brands data.
    numOfDuplication (int): Number of slides to duplicate and update.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(newModifiedBrands, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]
        df = newModifiedBrands[key]
        if key.split(' | ')[0] in  categories: 
            market=key.split(' | ')[1]
            cat=key.split(' | ')[0]
        else:
            market=key.split(' | ')[0]    
            cat=key.split(' | ')[1]

        # Filter out 'Others' and brands with low value share
        df = df[~df['Top Brands'].str.contains('Others', case=False)]
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        df = df[df['Value Share'] > 0.01]
        df = df.sort_values(by='Value Share', ascending=False).reset_index(drop=True)
        # Update slide title
        shapes = slide.shapes
        titleNumber = get_shape_number(shapes, "% Promo sales by total size | Total Category | Carrefour | P12M")
        datasourcenum = get_shape_number(shapes, "DATA SOURCE: Trade Panel/Retailer Data | Ending Nov 2022")
        headerNumber = get_shape_number(shapes, 'Promo Sales by total size (Replace With SO WHAT)')
        
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[6].text.replace('Total Category', f'{cat}') \
                .replace('Carrefour', f'{market}')
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        category = df['Top Brands'].head(10).tolist()
        tables, charts = createTableAndChart(slide.shapes)

        # Update table with category data
        table = tables[0].table
        num_rows_to_remove = len(table.rows) - len(category)
        #table_height = 3.88
        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=0)
        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num == 0:
                    if row_number < len(category):
                        cell.text = str(category[row_number])
                    else:
                        cell.text = ""
                        print(f"[Warning] row_number {row_number} is out of range for category with length {len(category)}")
                    set_cell_font(cell, 'Nexa Bold', 9)
                    cell.text_frame.paragraphs[0].font.bold = False
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT

        # Update chart with Promo Sales data
        chart = charts[0].chart
        chart_data = CategoryChartData()
        chart_data.categories = category
        chart_data.add_series('Recruitment', df['Recruitment'].head(10).tolist())
        chart_data.add_series('Consumption', df['Consumption'].head(10).tolist())
        chart.replace_data(chart_data)


### Slide 13

In [None]:
def PromoSalesTypes(prs, modified_promotionBrandsP12M, numOfDuplication, position=0):
    """
    Update PowerPoint presentation with Promo Sales by Promo Type data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    modified_promotionBrandsP12M (dict): Dictionary containing modified promotion brands data.
    numOfDuplication (int): Number of slides to duplicate and update.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(modified_promotionBrandsP12M, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]
        df = modified_promotionBrandsP12M[key]
        shapes = slide.shapes
        titleNumber = get_shape_number(shapes, "% Promo sales by Promo Type | Total Category | Carrefour | P12M")
        datasourcenum = get_shape_number(shapes, "DATA SOURCE: Trade Panel/Retailer Data | Ending Nov 2022")
        headerNumber = get_shape_number(shapes, 'Promo Sales by promo type (Replace With SO WHAT)')
        if  key.split(' | ')[0] in categories:
            cat=key.split(' | ')[0]
            market=key.split(' | ')[1]
        else:
            cat= key.split(' | ')[1]
            market= key.split(' | ')[0]
            
        
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Total Category', cat) \
                .replace('Carrefour', market)
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        brands = list(df['Top Brands'].unique())  # Unique brands
        promotypes = df['Promo Type'].unique().tolist()  # Unique promo types
        
        tables, charts = createTableAndChart(slide.shapes)
        
        # Update table with category data
        table = tables[0].table
        num_rows_to_remove = len(table.rows) - len(brands)
        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=0)

        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num == 0:
                    cell.text = str(brands[row_number])
                    set_cell_font(cell, 'Nexa Bold', 9)
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT

        # Update chart with Promo Sales by Promo Type data
        chart = charts[0].chart
        chart_data = CategoryChartData()
        chart_data.categories = brands  # Brands are categories

        # Dictionary to store promo type series data
        # promo_series_data = {promo_type: [0] * len(brands) for promo_type in promotypes}
        
        promo_series_data = defaultdict(lambda: [None] * len(brands))

        # Fill values where promo sales exist
        for _, row in df.iterrows():
            brand = row['Top Brands']
            promo_type = row['Promo Type']
            sales = row['% Promo Sales']
            sales = round(sales, 2)

            brand_index = brands.index(brand)
            promo_series_data[promo_type][brand_index] = sales
        for promo_type, series_values in promo_series_data.items():
            if any(v is not None for v in series_values):  # Skip if all values are None
                clean_values = [v if v not in [None, 0.0] else '' for v in series_values]  # Replace None and 0.0 with '' for chart
                
                chart_data.add_series(promo_type, clean_values)
        chart.replace_data(chart_data)
        chart.chart_style = 3

        # Apply colors safely (avoid index errors)
        for i, series in enumerate(chart.series):
            fill = series.format.fill
            fill.solid()
            fill.fore_color.rgb = custom_colors[i % len(custom_colors)]  # Wrap color index if needed


In [None]:
def PromoSalesTypes_no(prs, modified_promotionBrandsP12M, numOfDuplication, position=0):
    """
    Update PowerPoint presentation with Promo Sales by Promo Type data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    modified_promotionBrandsP12M (dict): Dictionary containing modified promotion brands data.
    numOfDuplication (int): Number of slides to duplicate and update.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    for key, slide_num in zip(modified_promotionBrandsP12M, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]
        df = modified_promotionBrandsP12M[key]

        # Filter out 'Others', brands with low value share, and zero promo value
        df = df[~df['Top Brands'].str.contains('Others', case=False)]
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        df = df[df['Value Share'] > 0.01]
        df = df[df['Promo Value'] > 0]
        df = df.sort_values(by='Value Share', ascending=False).reset_index(drop=True)

        # Concatenate client brands and additional brands
        #df = pd.concat([df_client, df], ignore_index=True)
        df = df[df['Promo Value'] > 0]
        df = df.reset_index(drop=True)

        df['Type_total'] = df[promo_col].sum(axis=1)
        # Update slide title
        shapes = slide.shapes
        titleNumber = get_shape_number(shapes, "% Promo sales by Promo Type | Total Category | Carrefour | P12M")
        datasourcenum = get_shape_number(shapes, "DATA SOURCE: Trade Panel/Retailer Data | Ending Nov 2022")
        headerNumber = get_shape_number(shapes, 'Promo Sales by promo type (Replace With SO WHAT)')
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Total Category', key.split(' | ')[1]) \
                .replace('Carrefour', key.split(' | ')[0])
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        category = df['Top Brands'].head(10).tolist()
        tables, charts = createTableAndChart(slide.shapes)
        # Update table with category data
        table = tables[0].table
        num_rows_to_remove = len(table.rows) - df.shape[0]
        #table_height = 3.88
        table = removeRowFromTable(table, num_rows_to_remove, rowToExclude=0)
        for row_number, row in enumerate(table.rows, start=0):
            for column_num, cell in enumerate(row.cells):
                if column_num == 0:
                    cell.text = str(category[row_number])
                    set_cell_font(cell, 'Nexa Bold', 9)
                    cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT

        # Update chart with Promo Sales by Promo Type data
        chart = charts[0].chart
        chart_data = CategoryChartData()
        chart_data.categories = category
        for i in range(len(promo_col)):
            chart_data.add_series(promo_col[i], (df[promo_col[i]].astype(int).head(10) /
                                                            df['Type_total'].astype(int).head(10)).tolist())
        chart.replace_data(chart_data)
        chart.chart_style = 3
        for i, series in enumerate(chart.series):
            fill = series.format.fill
            fill.solid()
            fill.fore_color.rgb = custom_colors[i]


### Slide 14

In [70]:
def featureShare(prs, modified_promotionBrandsP12M, numOfDuplication, position=0):
    """
    Display feature share data on PowerPoint slides.

    Parameters:
    - prs (Presentation): PowerPoint presentation object.
    - modified_promotionBrandsP12M (dict): Dictionary containing promotion brand data.
    - numOfDuplication (int): Number of times to duplicate the slide.
    - position (int, optional): Starting position for slide insertion. Default is 0.

    Returns:
    None
    """
    for key, slide_num in zip(modified_promotionBrandsP12M, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]  # Get slide object
        df = modified_promotionBrandsP12M[key].head(10)  # Get top 10 rows of promotion brand data
        df = df[df['Value Share'] > 0.01].sort_values(by='Value Share', ascending=False).reset_index(drop=True)  # Filter by value share > 0.01 and sort
        df = df[~df['Top Brands'].str.contains('Others', case=False)]  # Remove rows with 'Others' in 'Top Brands' column
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        # df = selectTopBrands_client(df, 'Value Share', 9, client_brands)  # Select top brands based on value share
        shapes = slide.shapes  # Get shapes on the slide
        titleNumber = get_shape_number(shapes, "Feature Share vs. Fair Share | Retailer | P12M")  # Get shape number for title
        datasourcenum = get_shape_number(shapes, "Data Source | Nielsen | Ending Mar 2022")
        headerNumber = get_shape_number(shapes, 'Feature Share vs. Fair Share (Replace With SO WHAT)')
        ret=key.split(' | ')[0]
        cat=key.split(' | ')[1]
        if titleNumber is not None:  # If title shape is found
            shapes[datasourcenum].text = data_source  # Set data source text
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Retailer',ret+' | '+cat)  # Replace 'Retailer' with brand name
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)  # Set font size for first paragraph
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'  # Set font name for first paragraph
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)  # Set font size for second paragraph
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'  # Set font name for second paragraph

        tables, charts = createTableAndChart(slide.shapes)  # Create table and chart shapes
        chart = charts[0].chart  # Get the first chart object
        chart_data = CategoryChartData()  # Create chart data object
        chart_data.categories = df['Top Brands'].tolist()  # Set categories for chart
        chart_data.add_series('Value Share', df['Value Share'].tolist())  # Add 'Value Share' series to chart data
        chart_data.add_series('Feature Share', df['Feature Share'].tolist())  # Add 'Feature Share' series to chart data
        chart.replace_data(chart_data)  # Replace chart data

# Add documentation and inline comments


In [71]:
def featureShare_no(prs, modified_promotionBrandsP12M, numOfDuplication, position=0):
    """
    Display feature share data on PowerPoint slides.

    Parameters:
    - prs (Presentation): PowerPoint presentation object.
    - modified_promotionBrandsP12M (dict): Dictionary containing promotion brand data.
    - numOfDuplication (int): Number of times to duplicate the slide.
    - position (int, optional): Starting position for slide insertion. Default is 0.

    Returns:
    None
    """
    for key, slide_num in zip(modified_promotionBrandsP12M, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]  # Get slide object
        df = modified_promotionBrandsP12M[key].head(10)  # Get top 10 rows of promotion brand data
        df = df[df['Value Share'] > 0.01].sort_values(by='Value Share', ascending=False).reset_index(drop=True)  # Filter by value share > 0.01 and sort
        df = df[~df['Top Brands'].str.contains('Others', case=False)]  # Remove rows with 'Others' in 'Top Brands' column
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        #df = selectTopBrands_client(df, 'Value Share', 9, client_brands)  # Select top brands based on value share
        shapes = slide.shapes  # Get shapes on the slide
        titleNumber = get_shape_number(shapes, "Feature Share vs. Fair Share | Retailer | P12M")  # Get shape number for title
        datasourcenum = get_shape_number(shapes, "Data Source | Nielsen | Ending Mar 2022")
        headerNumber = get_shape_number(shapes, 'Feature Share vs. Fair Share (Replace With SO WHAT)')
        ret=key.split(' | ')[0]
        cat=key.split(' | ')[1]
        if titleNumber is not None:  # If title shape is found
            shapes[datasourcenum].text = data_source  # Set data source text
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Retailer',ret+' | '+cat)  # Replace 'Retailer' with brand name
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)  # Set font size for first paragraph
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'  # Set font name for first paragraph
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)  # Set font size for second paragraph
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'  # Set font name for second paragraph

        tables, charts = createTableAndChart(slide.shapes)  # Create table and chart shapes
        chart = charts[0].chart  # Get the first chart object
        chart_data = CategoryChartData()  # Create chart data object
        chart_data.categories = df['Top Brands'].tolist()  # Set categories for chart
        chart_data.add_series('Value Share', df['Value Share'].tolist())  # Add 'Value Share' series to chart data
        chart_data.add_series('Feature Share', df['Feature Share'].tolist())  # Add 'Feature Share' series to chart data
        chart.replace_data(chart_data)  # Replace chart data

# Add documentation and inline comments


### Slide 15

In [72]:
def displayShare(prs, modified_promotionBrandsP12M, numOfDuplication, position=0):
    """
    Display share data on PowerPoint slides.

    Parameters:
    - prs (Presentation): PowerPoint presentation object.
    - modified_promotionBrandsP12M (dict): Dictionary containing promotion brand data.
    - numOfDuplication (int): Number of times to duplicate the slide.
    - position (int, optional): Starting position for slide insertion. Default is 0.

    Returns:
    None
    """
    for key, slide_num in zip(modified_promotionBrandsP12M, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]  # Get slide object
        
        df = modified_promotionBrandsP12M[key]

        # Filter out 'Others', brands with low value share, and zero promo value
        df = df[~df['Top Brands'].str.contains('Others', case=False)]
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        df = df[df['Value Share'] > 0.01]
        df = df[df['Promo Value'] > 0]

        # Select client brands and additional brands needed to make 10 brands
        df_client = selectClientBrands(df,'Top Brands', 'Value Share')
        number_of_brands_needed = max(10 - len(df_client),0)
        df = df[~df['Top Brands'].isin(client_brands)]
        df = df.sort_values(by='Value Share', ascending=False).head(number_of_brands_needed)

        # Concatenate client brands and additional brands
        df = pd.concat([df, df_client], ignore_index=True)
        df = df[df['Promo Value'] > 0]
        df = df.reset_index(drop=True)

        shapes = slide.shapes  # Get shapes on the slide
        titleNumber = get_shape_number(shapes, "Display Share vs. Fair Share | Retailer | P12M")  # Get shape number for title
        datasourcenum = get_shape_number(shapes, "Data Source | Nielsen | Ending Mar 2022")
        headerNumber = get_shape_number(shapes, 'Display Share vs. Fair Share (Replace With SO WHAT)')
        ret=key.split(' | ')[0]
        cat=key.split(' | ')[1]
        if titleNumber is not None:  # If title shape is found
            shapes[datasourcenum].text = data_source  # Set data source text
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Retailer',ret+' | '+cat)  # Replace 'Retailer' with brand name
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)  # Set font size for first paragraph
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'  # Set font name for first paragraph
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)  # Set font size for second paragraph
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'  # Set font name for second paragraph

        tables, charts = createTableAndChart(slide.shapes)  # Create table and chart shapes
        chart = charts[0].chart  # Get the first chart object
        chart_data = CategoryChartData()  # Create chart data object
        chart_data.categories = df['Top Brands'].tolist()  # Set categories for chart
        chart_data.add_series('Value Share', df['Value Share'].tolist())  # Add 'Value Share' series to chart data
        chart_data.add_series('Display Share', df['Display Share'].tolist())  # Add 'Feature Share' series to chart data
        chart.replace_data(chart_data)  # Replace chart data

# Add documentation and inline comments


In [None]:
def displayShare_no(prs, modified_promotionBrandsP12M, numOfDuplication, position=0):
    """
    Display share data on PowerPoint slides.

    Parameters:
    - prs (Presentation): PowerPoint presentation object.
    - modified_promotionBrandsP12M (dict): Dictionary containing promotion brand data.
    - numOfDuplication (int): Number of times to duplicate the slide.
    - position (int, optional): Starting position for slide insertion. Default is 0.

    Returns:
    None
    """
    for key, slide_num in zip(modified_promotionBrandsP12M, range(numOfDuplication)):
        slide = prs.slides[slide_num + position]  # Get slide object
        
        df = modified_promotionBrandsP12M[key]

        # Filter out 'Others', brands with low value share, and zero promo value
        df = df[~df['Top Brands'].str.contains('Others', case=False)]
        df = df[~df['Top Brands'].str.contains('Grand Total', case=False)]
        df = df[df['Value Share'] > 0.01]
        df = df[df['Promo Value'] > 0]
        df = df.sort_values(by='Value Share', ascending=False).reset_index(drop=True)

        df = df.reset_index(drop=True)

        shapes = slide.shapes  # Get shapes on the slide
        titleNumber = get_shape_number(shapes, "Display Share vs. Fair Share | Retailer | P12M")  # Get shape number for title
        datasourcenum = get_shape_number(shapes, "Data Source | Nielsen | Ending Mar 2022")
        headerNumber = get_shape_number(shapes, 'Display Share vs. Fair Share (Replace With SO WHAT)')
        ret=key.split(' | ')[0]
        cat=key.split(' | ')[1]
        if titleNumber is not None:  # If title shape is found
            shapes[datasourcenum].text = data_source  # Set data source text
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Retailer',ret+' | '+cat)  # Replace 'Retailer' with brand name
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)  # Set font size for first paragraph
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'  # Set font name for first paragraph
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)  # Set font size for second paragraph
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'  # Set font name for second paragraph

        tables, charts = createTableAndChart(slide.shapes)  # Create table and chart shapes
        chart = charts[0].chart  # Get the first chart object
        chart_data = CategoryChartData()  # Create chart data object
        chart_data.categories = df['Top Brands'].tolist()  # Set categories for chart
        chart_data.add_series('Value Share', df['Value Share'].tolist())  # Add 'Value Share' series to chart data
        chart_data.add_series('Display Share', df['Display Share'].tolist())  # Add 'Feature Share' series to chart data
        chart.replace_data(chart_data)  # Replace chart data

# Add documentation and inline comments


### Slide 16

In [74]:
def PromoFrequency(prs, modified_promotionEndOfWeek, numOfDuplication, position=0):
    """
    Update PowerPoint presentation with Promo Frequency learnings data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    modified_promotionEndOfWeek (dict): Dictionary containing modified promotion end of week data.
    numOfDuplication (int): Number of slides to duplicate and update.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    # Iterate through each key-slide_num pair in modified_promotionEndOfWeek
    for key, slide_num in zip(modified_promotionEndOfWeek, range(numOfDuplication)):
        # Access the slide to be modified
        slide = prs.slides[slide_num + position]
        
        # Extract data for the current key
        df = modified_promotionEndOfWeek[key]

        # Define series for chart
        seri = ['Promo Value', 'Non Promo Value', 'Value Sales', 'Base Sales', 'VSOD']

        # Get shapes in the slide
        shapes = slide.shapes
        
        # Find and update title and currency sign shapes
        titleNumber = get_shape_number(shapes, "Promo Frequency learnings | Category | Brand | National")
        datasourcenum = get_shape_number(shapes, "Data Source | Trade Panel")
        headerNumber = get_shape_number(shapes, 'Promo Frequency learnings (Replace With SO WHAT)')
        currency_sign = get_shape_number(shapes, "Value Sales (€)")
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Category', key.split(' | ')[2]) \
                .replace('National', key.split(' | ')[1]).replace('Brand', key.split(' | ')[0])
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[currency_sign].text = shapes[currency_sign].text.replace('€', currency)
            shapes[currency_sign].text_frame.paragraphs[0].font.size = Pt(10)
            shapes[currency_sign].text_frame.paragraphs[0].font.name = 'Nexa Bold'
            shapes[currency_sign].text_frame.paragraphs[0].font.color.rgb = RGBColor(0,160,151)
            
        # Create table and chart objects
        tables, charts = createTableAndChart(slide.shapes)
        chart = charts[0].chart
        
        # Prepare chart data
        chart_data = CategoryChartData()
        chart_data.categories = df['End of Week']
        for col in seri:
            chart_data.add_series(col, df[col].tolist())
        
        # Replace data in the chart
        chart.replace_data(chart_data)


### Slide 15

In [None]:
def promoSalesPerRetailer(prs, endOfWeek, numOfDuplicates, dfGroup, position=0):
    """
    Update PowerPoint presentation with Promo Sales per Retailer data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    endOfWeek (dict): Dictionary containing end of week data.
    numOfDuplicates (int): Number of slides to duplicate and update.
    dfGroup (list): List containing dataframes grouped for each slide.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    # Define dictionaries to map chart numbers to chart order
    ch1 = {0: 0}
    ch2 = {0: 0, 1: 1}
    ch3 = {0: 0, 1: 1, 2: 2}
    ch4 = {0: 0, 1: 1, 2: 2, 3: 3}

    # Iterate through each slide to update
    i =0
    for slide_num in range(numOfDuplicates):
        # Extract dataframes for the current slide
        dfs = dfGroup[slide_num]
        # Extract brand and category information
        brand = dfs[0].split(' | ')[1]
        cat = dfs[0].split(' | ')[1]

        # Get shapes in the slide
        shapes = prs.slides[slide_num + position].shapes

        # Find and update title shape
        titleNumber = get_shape_number(shapes, 'Promo sales per retailer | Findus | Fish Fingers')
        datasourcenum = get_shape_number(shapes, "Data Source | Trade Panel")
        headerNumber = get_shape_number(shapes, 'Promo sales per retailer (Replace With SO WHAT)')
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Findus', brand).replace('Fish Fingers', cat)
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Create table and chart objects
        tables, charts = createTableAndChart(shapes)

        # Determine the appropriate chart order dictionary based on the number of charts
        chDic = ch2 if len(charts) == 2 else ch3 if len(charts) == 3 else ch4 if len(charts) == 4 else ch1
        # Update each chart in the slide
        for chartNum in range(len(charts)):
            chart = charts[chDic[chartNum]].chart
            chart_data = CategoryChartData()
            chart_data.categories = endOfWeek[dfs[chartNum]]['End of Week']
            chart_data.add_series('Non Promo Volume', endOfWeek[dfs[chartNum]]['Non Promo Volume'])
            chart_data.add_series('Promo Volume', endOfWeek[dfs[chartNum]]['Promo Volume'])
            chart.replace_data(chart_data)
            
            

        # Update table with retailer information
        table = tables[0].table
        for rowNum, row in enumerate(table.rows):
            cell = row.cells[0]
            cell.text = dfs[rowNum].split(' | ')[2]
            cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
            cell.text_frame.paragraphs[0].font.size = Pt(8)
            cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER


### Slide 16

In [76]:
def valueUplift(prs, modified_valueUplift, numOfDuplication, position=0):
    """
    Update PowerPoint presentation with Value Uplift vs Discount Depth data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    modified_valueUplift (dict): Dictionary containing modified value uplift data.
    numOfDuplication (int): Number of slides to duplicate and update.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    # Iterate through each key-slide_num pair in modified_valueUplift
    for key, slide_num in zip(modified_valueUplift, range(numOfDuplication)):
        # Access the slide to be modified
        slide = prs.slides[slide_num + position]
        
        # Extract data for the current key
        df = modified_valueUplift[key]
        #df = df[df['Value Uplift (v. base) Normalized'] !=0 ]
        # Get shapes in the slide
        shapes = slide.shapes
        
        # Find and update title shape
        titleNumber = get_shape_number(shapes, "Value Uplift vs discount depth | By Event | Category/Sector | Brand | Coop Alleanza | P12M")
        datasourcenum = get_shape_number(shapes, "Data Source | Trade Panel")
        headerNumber = get_shape_number(shapes, 'Value Uplift vs discount depth (Replace With SO WHAT)')
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = shapes[titleNumber].text.replace('Category/Sector', key.split(' | ')[2]) \
                .replace('Coop Alleanza', key.split(' | ')[1]).replace('Brand', key.split(' | ')[0])
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Create table and chart objects
        tables, charts = createTableAndChart(slide.shapes)
        chart1 = charts[0].chart  # First chart
        chart2 = charts[1].chart  # Second chart
        
        # Extract data for charts
        category = df[f'{prodORitem}'].tolist()
        x_values_discount = df['Discount Depth (%)'].tolist()
        x_values_price = df['Promo Price/Unit'].tolist()
        if normalized:
            y_values = df['Value Uplift (v. base) Normalized'].tolist()
        else:
            y_values = df['Value Uplift (v. base)'].tolist()

        x_values_discount = [mround_numpy(value, 0.05) for value in x_values_discount]
        x_values_price = [mround_numpy(value, 0.5) for value in x_values_price]

        # Update first chart with Discount Depth vs Value Uplift data
        chart_data1 = XyChartData()
        series1 = chart_data1.add_series('Scatter')
        for i in range(len(category)):
            series1.add_data_point(x_values_discount[i], y_values[i])
        chart1.replace_data(chart_data1)

        value_axis = chart1.value_axis
        min_value = min(y_values)
        max_value = max(y_values)
        value_axis.minimum_scale = 0.8 * min_value
        value_axis.maximum_scale = 1.2 * max_value

        category_axis = chart1.category_axis
        min_value = min(x_values_discount)
        max_value = max(x_values_discount)
        category_axis.minimum_scale = 0.8 * min_value
        category_axis.maximum_scale = 1.2 * max_value

        #Axis_limit_min = 0.8*min, Axis_limit_max = 1.2*max
        
        # Access the X-axis
        
        xlsx_file = BytesIO()
        with chart_data1._workbook_writer._open_worksheet(xlsx_file) as (workbook, worksheet):
            chart_data1._workbook_writer._populate_worksheet(workbook, worksheet)
            worksheet.write(0, 4, "Promo Sales")
            worksheet.write_column(1, 4, df['Promo Sales'].to_list(), None)
        chart1._workbook.update_from_xlsx_blob(xlsx_file.getvalue())

        # Update second chart with Promo Price/Unit vs Value Uplift data
        chart_data2 = XyChartData()
        series2 = chart_data2.add_series('Scatter')
        for i in range(len(category)):
            series2.add_data_point(x_values_price[i], y_values[i])
        chart2.replace_data(chart_data2)
        
        x_axis = chart2.category_axis
        
        # Loop through each X-axis category label and format as currency
        if sign.lower() == 'before':
            # Format number with commas and decimals if necessary, but no currency symbol
            x_axis.tick_labels.number_format = f'#,##0.00' if decimals == 2 else f'#,##0'
        else:
            # Format number with commas and decimals if necessary, but no currency symbol
            x_axis.tick_labels.number_format = f'#,##0.00' if decimals == 2 else f'#,##0'
        currencywithoutspace =currency.strip()  # Remove the leading space
        category_axis = chart2.category_axis
        category_axis.axis_title.text_frame.text = f"Promo Price/Unit ({currencywithoutspace})"  # Set the axis title text
        category_axis.axis_title.text_frame.paragraphs[0].font.size = Pt(8)
        category_axis.axis_title.text_frame.paragraphs[0].font.name = 'Nexa Bold'
        value_axis = chart2.value_axis
        min_value = min(y_values)
        max_value = max(y_values)
        value_axis.minimum_scale = 0.8 * min_value
        value_axis.maximum_scale = 1.2 * max_value

        category_axis = chart2.category_axis
        min_value = min(x_values_price)
        max_value = max(x_values_price)
        category_axis.minimum_scale = 0.8 * min_value
        category_axis.maximum_scale = 1.2 * max_value
        
        # if sign.lower() == 'before':
        #     x_axis.tick_labels.number_format = f'"{currency}"#,##0.00'  if decimals == 2 else f'"{currency}"#,##0'
        # else:
        #     x_axis.tick_labels.number_format = f'#,##0.00"{currency}"'  if decimals == 2 else f'#,##0"{currency}"'
       
        #x_axis.has_major_gridlines = False  # Optional: remove gridlines

        xlsx_file = BytesIO()
        with chart_data2._workbook_writer._open_worksheet(xlsx_file) as (workbook, worksheet):
            chart_data2._workbook_writer._populate_worksheet(workbook, worksheet)
            worksheet.write(0, 4, "Promo Sales")
            worksheet.write_column(1, 4, df['Promo Sales'].to_list(), None)
        chart2._workbook.update_from_xlsx_blob(xlsx_file.getvalue())


## Seasonality

In [None]:
def seasonality(prs, data, numOfDuplicates, dfGroup, position=0,slideby="Sector"):
    """
    Update PowerPoint presentation with Promo Sales per Retailer data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    endOfWeek (dict): Dictionary containing end of week data.
    numOfDuplicates (int): Number of slides to duplicate and update.
    dfGroup (list): List containing dataframes grouped for each slide.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    # Define dictionaries to map chart numbers to chart order
    ch1 = {0: 0}
    ch2 = {0: 0, 1: 1}
    ch3 = {0: 0, 1: 1, 2: 2}
    ch4 = {0: 0, 1:1 , 2: 2, 3: 3}
    #ch4 = {3:3,2:2,1:0,0:1}
    ch5 = {0: 0, 1: 2, 2: 1, 3: 3, 4: 4}
    ch6 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4,5: 5}
    # Iterate through each slide to update
    i =0
    updated_dfGroup = []

    for group in dfGroup:
        # If the group has more than 6 rows, split it into chunks of size 6
        if len(group) > 6:
            for i in range(0, len(group), 6):
                updated_dfGroup.append(group[i:i+6])  # Add chunks to the updated list
        else:
            updated_dfGroup.append(group)  # Add the group as is if it's 6 or fewer rows

    # Update numOfDuplicates based on the new dfGroup
    # print(len(updated_dfGroup))
    numOfDuplicates = len(updated_dfGroup)
    # print(updated_dfGroup)
    for slide_num in range(numOfDuplicates):
        dfs = updated_dfGroup[slide_num]
        # Extract brand and category information
        retailer_ = dfs[0].split(' | ')[0]
        cat_ = dfs[0].split(' | ')[1]
        shapes = prs.slides[slide_num+ position].shapes

        # # Find and update title shape
        titleNumber = get_shape_number(shapes, 'Seasonality Index | By Sector | National | P3Y')
        datasourcenum = get_shape_number(shapes, "DATA SOURCE: Trade Panel/Retailer Data | Ending Dec 2023")
        headerNumber = get_shape_number(shapes, 'Seasonality Index (Replace with SO WHAT)')
        if titleNumber is not None:
            shapes[datasourcenum].text = data_source
            shapes[titleNumber].text = f'Seasonality Index | By {slideby} | {cat_} | {retailer_} | P3Y'
            shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
            shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
            shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
            shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

        # Create table and chart objects
        tables, charts = createTableAndChart(shapes)
        # Determine the appropriate chart order dictionary based on the number of charts
        chDic = ch2 if len(charts) == 2 else ch3 if len(charts) == 3 else ch4 if len(charts) == 4 else ch5 if len(charts) == 5 else ch6  if len(charts) == 6 else ch1
        for chartNum in range(len(charts)):
            chart = charts[chDic[chartNum]].chart
            chart_data = CategoryChartData()
            chart_df = data[dfs[chartNum]]
            chart_df['year'] = chart_df['year'].astype(int)
            chart_data.categories = data[dfs[chartNum]]['MonthYear']
            chart_data.add_series('Sales index', data[dfs[chartNum]]['Sales index'])
            chart.replace_data(chart_data)
            chart.series[0].has_data_labels = True

            
            value_axis = chart.value_axis
            vertical_cross_value = 100
            value_axis.crosses = XL_AXIS_CROSSES.CUSTOM
            value_axis.crosses_at = vertical_cross_value
            value_axis.minimum_scale = 0
            value_axis.maximum_scale = 200
            max_indices = chart_df.loc[chart_df.groupby('year')['Sales index'].idxmax()]
            # print(key, max_indices)
            for i, point in enumerate(chart.series[0].points):
                value = chart_df.iloc[i]['Sales index']
                data_label = point.data_label
                if i in max_indices.index:  # Check if the point is one of the maximums
                    data_label.has_text_frame = True
                    data_label.text_frame.text = f"{value}%"
                    data_label.position = XL_LABEL_POSITION.CENTER
                    data_label.text_frame.paragraphs[0].runs[0].font.size = Pt(8)
            chart.replace_data(chart_data)


        # Update table with retailer information
        table = tables[0].table
        for rowNum, row in enumerate(table.rows):
            cell = row.cells[0]
            cell.text = dfs[rowNum].split(' | ')[-1]
            cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
            cell.text_frame.paragraphs[0].font.size = Pt(8)
            cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER


In [None]:
def Promotional_Frequency(prs, data, numOfDuplicates, dfGroup, position=0):
    """
    Update PowerPoint presentation with Promo Sales per Retailer data.

    Parameters:
    prs (Presentation): PowerPoint presentation object to modify.
    endOfWeek (dict): Dictionary containing end of week data.
    numOfDuplicates (int): Number of slides to duplicate and update.
    dfGroup (list): List containing dataframes grouped for each slide.
    position (int, optional): Starting slide position in the presentation. Defaults to 0.

    Returns:
    Replace the slides with new data
    """
    # Define dictionaries to map chart numbers to chart order
        
    ch1 = {0: 0}
    ch2 = {0: 0, 1: 1}
    ch3 = {0: 0, 1: 1, 2: 2}
    ch4 = {0: 0, 1: 1, 2: 2, 3: 3}
    # Iterate through each slide to update
    i =0

    for slide_num in range(numOfDuplicates):
            # Extract dataframes for the current slide
            dfs = dfGroup[slide_num]
            # Extract brand and category information
            for i in range(len(dfs)):
                client = dfs[0].split(' | ')[1]
                retailer_ = dfs[0].split(' | ')[2]
                col_ = dfs[0].split(' | ')[0]
                market= dfs[0].split(' | ')[-1]
                # # Get shapes in the slide
                shapes = prs.slides[slide_num + position].shapes
                # # Find and update title shape
                titleNumber = get_shape_number(shapes, 'Promotional Frequency Analysis | Economy | Bucegi | P12M')
                datasourcenum = get_shape_number(shapes, "DATA SOURCE: Trade Panel/Retailer Data | Ending Apr 2024")
                headerNumber = get_shape_number(shapes, 'Promotional Frequency Analysis (Replace with So What)')
                if titleNumber is not None:
                    shapes[datasourcenum].text = data_source
                    shapes[titleNumber].text = shapes[titleNumber].text.replace('Economy', col_).replace('Bucegi', client + " | " + market)
                    shapes[titleNumber].text_frame.paragraphs[0].font.size = Pt(12)
                    shapes[titleNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'
                    shapes[headerNumber].text_frame.paragraphs[0].font.size = Pt(16)
                    shapes[headerNumber].text_frame.paragraphs[0].font.name = 'Nexa Bold (Headings)'

            # Create table and chart objects
            tables, charts = createTableAndChart(shapes)
            # Determine the appropriate chart order dictionary based on the number of charts
            chDic = ch2 if len(charts) == 2 else ch3 if len(charts) == 3 else ch4 if len(charts) == 4 else ch1 

            for chartNum in range(len(charts)):
                chart = charts[chDic[chartNum]].chart
                chart_data = CategoryChartData()
                chart_df = data.get(dfs[chartNum], None)
                if chart_df is None:
                    print(f"Key '{dfs[chartNum]}' not found in data!")

                # Apply conditions to Value Uplift
                
                chart_df['Weekly VSOD'] = np.where((chart_df['VSOD']>.2)&(chart_df['Value Uplift (v. base) Normalized'] != ''),1,None)
                chart_df['try'] = 0
                chart_df['New Uplift'] = 0
                chart_df['try'] = np.where((chart_df['Value Uplift (v. base) Normalized']>=2),1.8,chart_df['Value Uplift (v. base) Normalized'])
                chart_df['New Uplift'] = np.where((chart_df['Weekly VSOD']==1)&(chart_df['Value Uplift (v. base) Normalized']>0.05),chart_df['try'],None)
                
                # Add series to chart
                chart_data.categories = chart_df['End of Week'].astype(str)
                chart_data.add_series('Weekly VSOD', chart_df['Weekly VSOD'])
                chart_data.add_series('New Uplift', chart_df['New Uplift'])
                # Replace chart data
                chart.replace_data(chart_data)

                # Apply formatting and data labels
                for series_idx, series in enumerate(chart.series):
                    if series_idx == 1:
                        # Show data label if >200%
                        for point_idx, point in enumerate(series.points):
                            value = (chart_df['Value Uplift (v. base) Normalized'].astype(float).replace(np.nan,0).iloc[point_idx] * 100)
                            data_label = point.data_label
                            data_label.has_text_frame = False
                                
                            if value >=200:
                                point.marker.format.fill.solid()
                                point.marker.format.fill.fore_color.rgb = RGBColor(230,229,229)
                                point.marker.format.line.color.rgb = RGBColor(230,229,229)
                                data_label = point.data_label
                                data_label.has_text_frame = True
                                data_label.text_frame.text = (str(round(value) ) +"%") 
                                data_label.position = XL_LABEL_POSITION.CENTER
                                paragraph = point.data_label.text_frame.paragraphs[0]
                                run = paragraph.runs[0] if paragraph.runs else paragraph.add_run()
                                run.font.size = Pt(8)
                                run.font.color.rgb = RGBColor(0, 160, 151)
    
                    chart.replace_data(chart_data)

            # Update table with retailer information
            table = tables[0].table
            for rowNum, row in enumerate(table.rows):
                cell = row.cells[0]
                cell.text = dfs[rowNum].split(' | ')[2]
                cell.text_frame.paragraphs[0].font.name = 'Nexa Bold'
                cell.text_frame.paragraphs[0].font.size = Pt(8)
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER


# Cleaning Part

In [None]:
def splitListpromo(dfDictionary, markets, splitList):
    """
    Splits the DataFrames in dfDictionary according to the given markets and splitList.

    Parameters:
    dfDictionary (dict): A dictionary where keys are strings representing market pairs and values are DataFrames.
    markets (list): A list of markets to consider for splitting.
    splitList (list): A list of integers representing the number of splits.

    Returns:
    list: A list of lists, where each inner list contains the split DataFrames for a market.
    """
    splitedDf = []  # Initialize the list to store split DataFrames
    split,prev =0,0
    for market in markets:
        lis = splitList if len(market)==1 else splitList
        # print(lis)
        while split < (len(lis)):
            # Calculate the start and end indices for the current split
            start = sum(lis[prev:split]) if split-prev != 0 else split-prev
            end = sum(lis[prev:split+1]) if split-prev != 0 else lis[split]     
            
            if len(market[start:end]) >0:
                # print(len(market[start:end]))
                splitedDf.append(market[start:end])
            else:
                prev = split
                break
            split+=1
    
    return splitedDf


In [None]:
def cleaningData(data):
    """
    Clean and preprocess data in a dictionary of DataFrames.

    Parameters:
    - data (dict): Dictionary containing DataFrames.

    Returns:
    - dict: Dictionary containing cleaned DataFrames.
    """
    cleaned_data = {}
    
    # Iterate over each key-value pair in the input dictionary
    for key in data:
      
        # df=DetectHeader(data[key])
        df =data[key].copy()
        # Set column names and skip the first row
        if  df.columns.isna().any():
            continue
        # Perform specific cleaning operations based on the DataFrame columns and key
        if df.shape[0] > 0 and not 'National' in key:
            if 'Top Brands' in df.columns and f'{prodORitem}' in df.columns:
                df['Top Brands'] = df['Top Brands'].fillna(method='ffill')
                df[f'{prodORitem}'].fillna('', inplace=True)
                df.fillna(0, inplace=True)
                df['Top Brands'] = df['Top Brands'].apply(lambda x: 'Grand Total' if 'Grand Total' in x else x.replace('Total', '').strip())
                df = df.reset_index(drop=True)
            
            elif 'Top Brands' in df.columns:
                df['Top Brands'] = df['Top Brands'].fillna(method='ffill')
                #df.fillna(0, inplace=True)
                if normalized:
                    df.loc[:,~ df.columns.isin(['VSOD IYA','Value Uplift Normalized IYA'])] = df.loc[:,~ df.columns.isin(['VSOD IYA','Value Uplift Normalized IYA'])].fillna(0)
                    df['Promo Value Uplift vs YA'] = np.where(df['Value Uplift Normalized IYA'].isna(), None, df['Value Uplift Normalized IYA'] - 1)
                else:
                    df.loc[:, df.columns.isin(['VSOD IYA','Value Uplift IYA'])] = df.loc[:, df.columns.isin(['VSOD IYA','Value Uplift IYA'])].fillna(0)
                    df['Promo Value Uplift vs YA'] = np.where(df['Value Uplift IYA'].isna(), None, df['Value Uplift IYA'] - 1)
               
                df['VSOD Evaluation vs YA'] = np.where(df['VSOD IYA'].isna(), None, df['VSOD IYA'] - 1)

                df['Top Brands'] = df['Top Brands'].apply(lambda x: 'Grand Total' if 'Grand Total' in x else x.replace('Total', '').strip())
                df = df[~df['Top Brands'].str.contains('Total', case=False)]
                df = df[df['Total Size'] == 0].reset_index(drop=True)

       
                
            elif 'End of Week' in df.columns and f'{prodORitem}' in df.columns:
                df[f'{prodORitem}'] = df[f'{prodORitem}'].fillna(method='ffill')
                if normalized:
                    df = df[(df['Value Uplift (v. base) Normalized'] >= 0)]
                else:
                    df = df[(df['Value Uplift (v. base)'] >= 0)]
                    df['End of Week'] = pd.to_datetime(df['End of Week'], errors='coerce')
                    df = df[df['End of Week'].dt.year.isin([2023, 2024])]

                # df = df[(df['End of Week'].str.contains('2023|2024')) & (df['End of Week'].notna())]
                # df['End of Week'] = pd.to_datetime(df['End of Week'])
                new_start = "2023-07-31"
                df = df[(df['End of Week'] >= new_start) & (df['End of Week'] <= end_date)]
                df = df[~df[f'{prodORitem}'].str.contains('Total', case=False)].reset_index(drop=True)
                df = df[df['Promo Sales'] > 1000]
                if normalized:
                    df = df.dropna(subset=['Value Uplift (v. base) Normalized'])
                    df =  df[df['Value Uplift (v. base) Normalized']<10]
                else:
                    df = df.dropna(subset=['Value Uplift (v. base)'])
                    df = df[df['Value Uplift (v. base']<10]
                df.fillna(0, inplace=True)
                df = df.reset_index(drop=True)
                
            elif 'End of Week' in df.columns:
                df['End of Week'] = df['End of Week'].astype(str)
                df = df[~df['End of Week'].str.contains('Total', case=False)].reset_index(drop=True)
                df['End of Week'] = pd.to_datetime(df['End of Week'])
                df['End of Week'] = df['End of Week'].dt.strftime("%d-%b-%y")
                df = df[(df['End of Week'].str.contains('-22|-23|-24|Jan-25')) & (df['End of Week'].notna())]
                df['End of Week'] = pd.to_datetime(df['End of Week'])
                df = df[(df['End of Week'] >= start_date) & (df['End of Week'] <= end_date)]
                # df = df.dropna()
                
            elif 'Grand Total' in df.columns:
                if 'Sector' == df.columns[1]:
                    df[direct_parent["Sector"]].fillna(method='ffill', inplace= True)
                    df['Sector'] = df['Sector'].replace(0, np.nan)
                    df['Sector'].fillna(method='ffill', inplace=True)
                    df['Sector'] = df.apply(lambda row: row[direct_parent["Sector"]] if 'Total' in row[direct_parent["Sector"]] and row[direct_parent["Sector"]] != categories[0] else row['Sector'], axis=1)

                elif 'Segment' == df.columns[1]:
                    df['Segment'] = df['Segment'].replace(0, np.nan)  
                    df[direct_parent["Segment"]].fillna(method='ffill', inplace= True)          
                    df['Segment'] = df.apply(lambda row: row[direct_parent["Segment"]] if 'Total' in row[direct_parent["Segment"]] and row[direct_parent["Segment"]] != categories[0] else row['Segment'], axis=1)
                    df['Segment'].fillna(method='ffill', inplace=True)
                elif 'SubSegment' == df.columns[1]:
                    df['SubSegment'] = df['SubSegment'].replace(0, np.nan)
                    df[direct_parent["SubSegment"]].fillna(method='ffill', inplace= True)          
                    df['SubSegment'] = df.apply(lambda row: row[direct_parent["SubSegment"]] if 'Total' in row[direct_parent["SubSegment"]] and row[direct_parent["SubSegment"]] != categories[0] else row['SubSegment'], axis=1)
                    df['SubSegment'].fillna(method='ffill', inplace=True)
                elif 'SubCategory' == df.columns[1]:
                    df['SubCategory'] = df['SubCategory'].replace(0, np.nan)
                    df[direct_parent["SubCategory"]].fillna(method='ffill', inplace= True)          
                    df['SubCategory'] = df.apply(lambda row: row[direct_parent["SubCategory"]] if 'Total' in row[direct_parent["SubCategory"]] and row[direct_parent["SubCategory"]] != categories[0] else row['SubCategory'], axis=1)
                    df['SubCategory'].fillna(method='ffill', inplace=True)
                df = df.reset_index(drop=True)
        df.fillna(0, inplace=True)
            # Check if the key matches specific categories and modify the key accordingly

        if df.shape[0] > 0:
            cleaned_data[key] = df

    return cleaned_data


In [None]:
def cleaningdata_with_grand_total(data):
    """
    Clean and preprocess data in a dictionary of DataFrames.

    Parameters:
    - data (dict): Dictionary containing DataFrames.

    Returns:
    - dict: Dictionary containing cleaned DataFrames.
    """
    cleaningdata_with_grand_total = {}
    
    # Iterate over each key-value pair in the input dictionary
    for key in data:
   
        # df=DetectHeader(data[key])
        df =data[key].copy()

        if df.shape[0] > 0 and not 'National' in key:
            if 'Top Brands' in df.columns and f'{prodORitem}' in df.columns:
                df['Top Brands'] = df['Top Brands'].fillna(method='ffill')
                df[f'{prodORitem}'].fillna('', inplace=True)
                df.fillna(0, inplace=True)
                df['Top Brands'] = df['Top Brands'].apply(lambda x: 'Grand Total' if 'Grand Total' in x else x.replace('Total', '').strip())
            
            elif 'Top Brands' in df.columns:
                df['Top Brands'] = df['Top Brands'].fillna(method='ffill')
                if normalized:
                    df.loc[:,~ df.columns.isin(['VSOD IYA','Value Uplift Normalized IYA'])] = df.loc[:,~ df.columns.isin(['VSOD IYA','Value Uplift Normalized IYA'])].fillna(0)
                    df['Promo Value Uplift vs YA'] = np.where(df['Value Uplift Normalized IYA'].isna(), None, df['Value Uplift Normalized IYA'] - 1)
                else:
                    df.loc[:, df.columns.isin(['VSOD IYA','Value Uplift IYA'])] = df.loc[:, df.columns.isin(['VSOD IYA','Value Uplift IYA'])].fillna(0)
                    df['Promo Value Uplift vs YA'] = np.where(df['Value Uplift IYA'].isna(), None, df['Value Uplift IYA'] - 1)
               
                df['VSOD Evaluation vs YA'] = np.where(df['VSOD IYA'].isna(), None, df['VSOD IYA'] - 1)
                df['Top Brands'] = df['Top Brands'].apply(lambda x: 'Grand Total' if 'Grand Total' in x else x.replace('Total', '').strip())
                #df = df[~df['Top Brands'].str.contains('Total', case=False)]
                df = df[df['Total Size'] == 0].reset_index(drop=True)

            elif 'End of Week' in df.columns and f'{prodORitem}' in df.columns:
                df[f'{prodORitem}'] = df[f'{prodORitem}'].fillna(method='ffill')
                df = df[(df['End of Week'].str.contains('2023|2024')) & (df['End of Week'].notna())]
                df['End of Week'] = pd.to_datetime(df['End of Week'])
                df = df[(df['End of Week'] >= start_date) & (df['End of Week'] <= end_date)]
                df = df[~df[f'{prodORitem}'].str.contains('Total', case=False)].reset_index(drop=True)
                
                df = df[df['Promo Sales'] > 1000]
                if normalized:
                    df = df.dropna(subset=['Value Uplift (v. base) Normalized'])
                else:
                    df = df.dropna(subset=['Value Uplift (v. base)'])
                df.fillna(0, inplace=True)
                df = df.reset_index(drop=True)
                
            elif 'End of Week' in df.columns:
                df['End of Week'] = df['End of Week'].astype(str)
                df = df[~df['End of Week'].str.contains('Total', case=False)].reset_index(drop=True)
                df['End of Week'] = pd.to_datetime(df['End of Week'])
                df['End of Week'] = df['End of Week'].dt.strftime("%d-%b-%y")
                df = df[(df['End of Week'].str.contains('-21|-22|-23|Jan-24')) & (df['End of Week'].notna())]
                df['End of Week'] = pd.to_datetime(df['End of Week'])
                df = df[(df['End of Week'] >= start_date) & (df['End of Week'] <= end_date)]
                df = df.dropna()
                
            elif 'Grand Total' in df.columns:
                df['Sector'].fillna(method='ffill', inplace=True)
                df.fillna(0, inplace=True)
            
            # Check if the key matches specific categories and modify the key accordingly
            if key.split(' | ')[0] in categories and len(key.split(' | ')) == 3:
                modified_key = key.split(' | ')[1] + ' | ' + key.split(' | ')[2] + ' | ' + key.split(' | ')[0]
                if df.shape[0] > 0:
                    cleaningdata_with_grand_total[modified_key] = df
            else:
                if df.shape[0] > 0:
                    cleaningdata_with_grand_total[key] = df
    
    return cleaningdata_with_grand_total


In [None]:
def cleaning13New(data, size_fix=1):
    """
    Clean and process data for specific brands and regions.

    Parameters:
    - data (dict): Dictionary containing raw data.

    Returns:
    - dict: Dictionary containing cleaned and processed data.
    """
    data_cleaned = {}
    
    # Define maximum total size for each combination of product type and region
    
    for key, df in data.items():
        # Skip processing if the region is 'NATIONAL' or 'National'
        if 'NATIONAL' in areas or 'National' in key:
            continue
        
        new_data = []

        df=data[key].copy()

        df['Top Brands'].fillna(method='ffill', inplace=True)
        
        # Filter out rows where 'Top Brands' is 'Grand Total' or 'Other'
        df = df[(df['Top Brands'] != 'Grand Total') & (df['Top Brands'] != 'Other')]
        # Remove 'GR' suffix from 'Total Size' and convert it to integer
        df['Total Size'] = df['Total Size'].str.extract('(\d+)', expand=False)
        df.fillna('0',inplace=True)
        df['Total Size'] = df['Total Size'].astype(int)
        df = df[df['Value Share'] > 0.01]
        
        # Sort data by 'Value Share' in descending order
        df = df.sort_values(by='Value Share', ascending=False).reset_index(drop=True)
        for i, brand in enumerate(df['Top Brands'].unique()):
            product_key = key.split('|')[0] + '|' + key.split('|')[1]
            max_size = max_total_size.get(product_key, None)            
            # Filter rows for the current brand and check if total size is within the maximum allowed size
            if max_size is not None:
                
                brand_df = df[(df['Top Brands'] == brand) & (df['Total Size'] <= max_size*size_fix)]
            else:
                brand_df = pd.DataFrame()
                
            brand_total = df[(df['Top Brands'].str.strip() == (brand + ' Total').strip())]['Promo Value'].values
            
            
            if not brand_df.empty and brand_total.size > 0 and brand_total[0] > 0:
                
                brand_sum = brand_df['Promo Value'].sum() / brand_total[0]
     
                new_data.append({'Top Brands': brand, 'Recruitment': brand_sum, 'Consumption': 1 - brand_sum, 'Value Share': df['Value Share'][i], 'SUM':brand_df['Promo Value'].sum()})
        
        # Create a new DataFrame with cleaned data
        new = pd.DataFrame(new_data)
        new.fillna(0, inplace=True)
        # Add cleaned data to the dictionary if it contains non-zero rows
        if new.shape[0] != 0:
            data_cleaned[key] = new
        
    return data_cleaned


In [None]:
def dict_to_pivot_general(data_dict, pivot_col, value_col, aggfunc='sum', fill_value=pd.NA):
    result = {}
    for k, df in data_dict.items():
        frtcol = df.columns[0]
        seccol = df.columns[1]
        id_cols = [frtcol, seccol]
        df_clean = df.dropna(subset=[pivot_col, value_col], how='any')
        df_clean[seccol] = df_clean[seccol].fillna("NaN")
 
        pivot_df = df_clean.pivot_table(
            index=id_cols,
            columns=pivot_col,
            values=value_col,
            aggfunc=aggfunc,
            fill_value=fill_value
        ).reset_index()

        if isinstance(pivot_df.columns, pd.MultiIndex):
            pivot_df.columns = [col if not isinstance(col, tuple) else col[-1] for col in pivot_df.columns]
       
        # Identify brand columns
        brand_cols = pivot_df.columns[len(id_cols):]
        pivot_df['Grand Total'] = pivot_df[brand_cols].max(axis=1, skipna=True)
        total_row = {col: 'Grand Total' if col == frtcol else "" for col in id_cols}
        for col in brand_cols:
            total_row[col] = pivot_df[col].max(skipna=True)
        total_row['Grand Total'] = pivot_df['Grand Total'].max(skipna=True)
 
        # Append to DataFrame
        pivot_df = pd.concat([pivot_df, pd.DataFrame([total_row])], ignore_index=True)
 
        result[k] = pivot_df
 
    return result

In [None]:
def merging(VSODClient_Cleaned, VSODCleaned, col):
    """
    Merge two dictionaries of DataFrames based on a common column.

    Parameters:
    - VSODClient_Cleaned (dict): Dictionary containing cleaned VSOD client DataFrames.
    - VSODCleaned (dict): Dictionary containing cleaned VSOD DataFrames.

    Returns:
    - dict: Dictionary containing merged DataFrames.
    """
    merged_dict = {}
    for key in VSODClient_Cleaned:
        dfclient=VSODClient_Cleaned[key].copy()
        dftot=VSODCleaned[key].copy()
        dfclient[col[1]] = np.where(dfclient[col[1]].isna(), dfclient[col[0]], dfclient[col[1]])
        dftot[col[1]] = np.where(dftot[col[1]].isna(), dftot[col[0]], dftot[col[1]])
        merged_df = pd.merge(dfclient,dftot, on=col, how='left')
        merged_df['Grand Total'] = merged_df['Grand Total'].fillna(0)
        merged_df = merged_df.fillna(0)
        if merged_df.shape[0]>0:
            merged_dict[key] = merged_df     
    return merged_dict


In [None]:
def splitkeys(dic, lis, parent,clientlist):
    """
    Splits the keys of a dictionary into new keys based on unique values in a specified column.
    Parameters:
    dic (dict): The input dictionary with DataFrames as values.
    lis (list): A list of sector names to filter by (if needed).
    parent (str): The column name used for splitting (e.g., 'Sector').
    
    Returns:
    dict: A new dictionary with updated keys and filtered DataFrames.
    """
    splitvsod = {}
    for key in dic.keys():
        for key in dic.keys():
            # Split the key into parts and check if all parts are in the valid list
    
                # Get a copy of the current DataFrame
            s = dic[key].copy()        
            for value in s[parent].unique():
                if isinstance(value, str) and not value.endswith("Total"):
                    new_key = f"{key} | {value}"                
                    filtered_df = s[s[parent].isin([value, f"{value} Total"])]   
                    for cli in clientlist:
                        needed_col = [filtered_df.columns[0], filtered_df.columns[1], "VSOD", cli]
                        missing_cols = [col for col in needed_col if col not in filtered_df.columns]
                        
                        if not missing_cols:
                            filtered_dfnew = filtered_df[needed_col]
                            splitvsod[new_key + " | " + cli] = filtered_dfnew
                        else:
                            print(f"Skipping {cli}: missing columns {missing_cols}")

    keys_to_remove = [
        k for k in splitvsod.keys() 
        if k.split(" | ")[-2] not in lis
    ]
    for k in keys_to_remove:
        del splitvsod[k]
    return splitvsod

In [None]:
def concatAttribute(dic, marketList):
    """
    This function takes a dictionary of DataFrames and a list of markets, and concatenates
    the DataFrames by adding a 'SOURCE' column to each DataFrame.

    Parameters:
    dic (dict): A dictionary where keys are strings in the format 'market | source', and
                values are DataFrames containing market data.
    marketList (list): A list of market names (strings).

    Returns:
    dict: A dictionary where keys are market names and values are concatenated DataFrames
          with an added 'SOURCE' column.
    """
    # Initialize a defaultdict to store the resulting DataFrames
    marketDic = defaultdict(list)
    
    # Iterate through the list of markets
    for market in marketList:
        # Iterate through the items in the dictionary
        for key, value in dic.items():
            # Check if the market name matches the key's market part
            if market == key.split(' | ')[1]:
                # Extract the source part from the key and assign it to the 'SOURCE' column
                value['SOURCE'] = list(set(key.split(' | ')) - set([market]))[0]
                value = value[value['Value Share'] >0.01]
                value = value[~value['Top Brands'].str.contains('Other')].reset_index(drop=True)
                # Only include rows where 'SOURCE' is not 'National'
                if (value['SOURCE'] != 'National').all():
                    marketDic[market].append(value)

        # Concatenate all DataFrames in the list for each market
        if len(marketDic[market]) != 0:
            marketDic[market] = pd.concat(marketDic[market])
    
    return marketDic

def fillingMissingBrands(dic):
    """
    This function fills in missing brands for each market and source combination in the
    provided dictionary of DataFrames.

    Parameters:
    dic (dict): A dictionary where keys are market names and values are DataFrames
                containing market data with 'Top Brands' and 'SOURCE' columns.

    Returns:
    dict: The input dictionary with missing brands filled in each DataFrame.
    """
    # Iterate through the dictionary items
    for key, value in dic.items():
        # Get the unique list of top brands in the DataFrame
        brandList = value['Top Brands'].unique().tolist()
        # Iterate through the unique sources in the DataFrame
        for source in value['SOURCE'].unique():
            # Check if the number of unique brands for the source is less than the total unique brands
            if value[value['SOURCE'] == source]['Top Brands'].nunique() != len(brandList):
                # Find the missing brands for the source
                missingBrand = list(set(brandList) - set(value[value['SOURCE'] == source]['Top Brands'].unique()))
                # Create a DataFrame for the missing brands with the current source
                missingBrand = pd.DataFrame({'Top Brands': missingBrand, 'SOURCE': source}).explode('Top Brands')
                # Concatenate the missing brands DataFrame with the original DataFrame
                value = pd.concat([value, missingBrand]).replace(np.nan, 0).reset_index(drop=True)
        # Update the dictionary with the filled DataFrame
        dic[key] = value
    
    return dic


In [None]:
def filter_data(modified_promotionProductsP12M):
    cleaned_data = {}
    for key in modified_promotionProductsP12M:
        df = modified_promotionProductsP12M[key]
        df = df[df[f'{prodORitem}'] != '']
        df=df.sort_values(by=['Promo Share'], ascending=False)
        # Filter and sort the DataFrame
        df['cumulative promo share'] = df['Promo Share'].cumsum()
        df = df[df['Discount Depth (%)'] >= 0.05]
        df = df[df['VSOD'] >= 0.05]
        df = df[df['cumulative promo share'] <= 0.8]
        df = df.sort_values(by='Incr Value', ascending=False).reset_index(drop=True)
        df = df.head(50)
        df['index'] = str(df.index + 1)
        df = df.reset_index(drop=True)
        if df.shape[0] >0:
            cleaned_data[key] = df
        #else:
            #print(key)
    return cleaned_data
        
        
def filter_data_Top(modified_promotionProductsP12M):
    cleaned_data = {}
    for key in modified_promotionProductsP12M:
        combined_df = pd.DataFrame() 
        for client in client_brands:
            
            df = modified_promotionProductsP12M[key]
            # Filter the DataFrame for the current client brand
            df = df[df[f'{prodORitem}'] != '']
            df = df[df['Top Brands'] == client]
            df = df.sort_values(by='Top Brands')
            df['Promo Share'] = pd.to_numeric(df['Promo Share'], errors='coerce')
            df['cumulative promo share'] = df.groupby('Top Brands')['Promo Share'].cumsum()
            df = df[df['cumulative promo share'] <= 0.8]
            df = df[df['Discount Depth (%)'] >= 0.05]
            df = df[df['VSOD'] >= 0.05]
            if df.shape[0] >0:
                combined_df = pd.concat([combined_df, df])
                combined_df = combined_df.sort_values(by='Incr Value', ascending=False).head(20).reset_index(drop=True)
        if combined_df.shape[0] > 0:
            cleaned_data[key] = combined_df.reset_index(drop=True)  # Store the combined DataFrame for the current key
                
    return cleaned_data
   
def filter_data_Bot(modified_promotionProductsP12M):
    cleaned_data = {}
    for key in modified_promotionProductsP12M:
        combined_df = pd.DataFrame() 
        for client in client_brands:
            df = modified_promotionProductsP12M[key]
            df = df[df[f'{prodORitem}'] != '']
            df = df[df['Top Brands'] == client]
            df = df.sort_values(by='Top Brands')
            df['Promo Share'] = pd.to_numeric(df['Promo Share'], errors='coerce')
            df['cumulative promo share'] = df.groupby('Top Brands')['Promo Share'].cumsum()
            df = df[df['Discount Depth (%)'] >= 0.05]
            df = df[df['VSOD'] >= 0.05]
            if df.shape[0] >0:
                combined_df = pd.concat([combined_df, df])
                combined_df = combined_df.sort_values(by='Incr Value', ascending=False).tail(20).reset_index(drop=True)
                combined_df = combined_df.sort_values(by ='Incr Value', ascending= True).reset_index(drop=True)
        if combined_df.shape[0] > 0:
            cleaned_data[key] = combined_df.reset_index(drop=True)  # Store the combined DataFrame for the current key
    return cleaned_data
                        

In [None]:
def MonthYear_clean(data,column):
    month_year_data={}
    for key in data.keys():
        df=data[key].copy()
        df = df[~((df[column].str.contains('Total', case=False)) & (df[column] != categories[0]))].reset_index(drop=True)

        # df = df[~df[column].str.contains('Total', case=False)].reset_index(drop=True)
        df['year'] = pd.to_datetime(df['MonthYear'], format='%b-%y').dt.year
        df['year'] = df['year'].astype(int)
        yearly_avg_sales = df.groupby(['year', column])['Value Sales'].transform('mean').reset_index(drop=True) 
        yearly_avg_sales = yearly_avg_sales.replace(0, float('nan'))
        # Calculate 'Sales index' and handle NaN values gracefully
        df['Sales index'] = (df['Value Sales'] / yearly_avg_sales * 100).fillna(0).astype(int)
        if df.shape[0]>0:
            month_year_data[key] = df
    return month_year_data

def groupingkeys(data):
    grouped = defaultdict(list)

    for sublist in data:
        for entry in sublist:
            prefix = " | ".join(entry.split(" | ")[:2])  # Extract first two parts
            grouped[prefix].append(entry)
    result = list(grouped.values())
    return result