In [1]:
%run "..\general_functions\generalFunctions.ipynb"
%run "..\Innovation CBC Slide Duplicate\Innovation CBC Replacement Function.ipynb"

### Parameters

In [2]:
filePath = r"Innovation CBC Datasets/Edgewell Mexico Inno Extract.xlsx"
data_source = "DATA SOURCE: Consumer Test | June 2025"
market='Walmart'
sheet_name1='Performance'
sheet_name2='Ranking'
wdtext="WD assumption = 50 WD"
unitorvolume= 'Unit'
unitorvolshare= 'New ' + unitorvolume + ' Share' #New Volume Share
client= 'Edgewell'

### Reading Data in Excel

In [3]:
performance = pd.read_excel(filePath,'Performance').rename(columns={'Unnamed: 2': 'Product', 'Unnamed: 3': 'ProductSize'})
rank = pd.read_excel(filePath,'Ranking')
sourcing = pd.read_excel(filePath,"Sourcing")
fair = pd.read_excel(filePath,"Fair")
merged = pd.merge(sourcing, fair, left_on="Rescaled share diff", right_on="Fair Share", how="inner", suffixes=('', '_f'))


### Data Cleaning

In [4]:
numberOfSlides=performance.Grouping.value_counts().reset_index()
numberOfSlides['Count']=numberOfSlides['count'].apply(lambda x : 1 if x<=5 else ((x//5) +( 0 if x%5 ==0 else 1)) )

In [5]:
# Grouping the 'performance' DataFrame by the 'Grouping' column and getting unique 'Sheets names' for each group
columnName = performance.groupby('Grouping')['Sheets names'].unique().reset_index()

# Creating a dictionary where each key is a unique 'Grouping' value and each value is a DataFrame containing 
# 'New volume share' column and the columns listed in 'Sheets names' for that group
dfDic = {columnName['Grouping'].iloc[i]: rank[[unitorvolshare] + list(columnName['Sheets names'].iloc[i])] for i in range(columnName.shape[0])}

# Initializing an empty dictionary to store split DataFrames
dfDicSplit = {}

# Column name to be prioritized in the final DataFrames
col = unitorvolshare

# Iterating over each key-value pair in dfDic
for key, value in dfDic.items():
    # Checking if the number of columns (excluding 'New volume share') is greater than 5
    if value.shape[1] - 1 > 5:
        numOfColumns = 0
        # Splitting the DataFrame into chunks of 5 columns if there are more than 5 columns
        while numOfColumns < value.shape[1] - 1:
            print('split number = ', numOfColumns)
            # Creating a new DataFrame with the next set of 5 columns
            dfDicSplit[key + '_Num ' + str(numOfColumns)] = value.iloc[:, numOfColumns + 1:numOfColumns + 6]
            # Adding the 'New volume share' column to the new DataFrame
            dfDicSplit[key + '_Num ' + str(numOfColumns)][col] = value[col]
            # Reordering columns to place 'New volume share' at the front
            columns_order = [col] + [cl for cl in dfDicSplit[key + '_Num ' + str(numOfColumns)].columns if cl != col]
            dfDicSplit[key + '_Num ' + str(numOfColumns)] = dfDicSplit[key + '_Num ' + str(numOfColumns)].reindex(columns=columns_order)
            numOfColumns += 5
    else:
        # If the number of columns (excluding 'New volume share') is 5 or fewer, add the DataFrame as is to dfDicSplit
        dfDicSplit[key] = value
        dfDicSplit[key]["To color"] = rank["To color"]


In [6]:
# Creating a dictionary to store the count of columns (excluding 'New volume share' and the 'Grouping' column)
# for each DataFrame in dfDicSplit
duplicationList={key:value.shape[1]-2 for key,value in dfDicSplit.items()}
duplicationList

{'Advanced Protection': 1,
 'Baby': 1,
 'Beautycare': 4,
 'Kids': 1,
 'Ozono': 3,
 'Silk Hydration': 3,
 'Sport': 4}

In [7]:
# Grouping the 'performance' DataFrame by the 'Grouping' column and aggregating 'Sheets names' and 'Product' into lists
grouped = performance.groupby('Grouping')[['Sheets names', 'Product']].agg(list)

# Creating a dictionary where each key is a 'Grouping' value and each value is a dictionary 
# with 'Sheets names' as keys and 'Product' as values
result = grouped.apply(lambda x: dict(zip(x['Sheets names'], x['Product'])), axis=1).to_dict()

# Initializing an empty list to store the number of products minus one for each grouping
sum_list = []

# Iterating over each inner dictionary in the result dictionary
for inner_dict in result.values():
    # Calculating the number of products for the current grouping
    inner_sum = len(inner_dict.values())
    # Appending the number of products minus one to sum_list
    sum_list.append(inner_sum - 1)


### Duplication Stage

In [8]:
startIndex=8
index = [0,list(duplicationList.values()),7,[i+startIndex for i in sum_list]]
duplication = [performance.Grouping.nunique(),1,performance.Grouping.nunique(),1]
section_names = ["Innovation Summary","Innovation ranking","Innovation potential","Innovation sourcing"]

path = os.getcwd() + '//Inno CBC base Oct 2024.pptx'
new_pre = os.getcwd() + '//slide duplicated.pptx'

In [9]:
print(len(index))
print(len(duplication))
print(len(section_names))
print(index)
print(duplication)
print(section_names)
print(sum(duplication))


4
4
4
[0, [1, 1, 4, 1, 3, 3, 4], 7, [8, 8, 11, 8, 10, 10, 11]]
[7, 1, 7, 1]
['Innovation Summary', 'Innovation ranking', 'Innovation potential', 'Innovation sourcing']
16


In [10]:
## Shouldn't be hased in the first run
slideDuplication(index,duplication,section_names,path,new_pre)

In [11]:
prs = Presentation(new_pre)

In [12]:
def calculate_position(end):
    return sum(duplication[i] * (1 if isinstance(index[i], int) else len(index[i])) for i in range(end))

### Summary

### Innovation Summary (Slide 1)

In [14]:
p=0
numOfDuplicatesSummarySlide = len(list(duplicationList.values()))*duplication[0]
Innovation_Summary(prs, performance,grouped, numOfDuplicates=numOfDuplicatesSummarySlide, position=calculate_position(p))
p+=1

### Innovation Ranking (Slide 2)

In [15]:
numOfDuplicatesFirstSlide = len(list(duplicationList.values())) * duplication[1]
innovationRanking(prs,dfDicSplit,performance,unitorvolshare,client,numOfDuplicates=numOfDuplicatesFirstSlide,position=calculate_position(p))
p+=1

Advanced Protection
Baby
Beautycare
Kids
Ozono
Silk Hydration
Sport


### Innovation Potential (Slide 3)

In [16]:
innovationPotential(prs,performance, wdtext,unitorvolume,numOfDuplicates = duplication[p],position=calculate_position(p))
p+=1

### Innovation Sourcing (Slide 4)

In [17]:
numOfDuplicatesThirdSlide = len(list(sum_list)) * duplication[2]
innovationSourcing(prs,performance, merged,result, sum_list, numOfDuplicates = numOfDuplicatesThirdSlide, position=calculate_position(p))

In [18]:
from datetime import date
today = str(date.today())
outputPath=os.getcwd() + "\\Innovation CBC "+market+" output "+today+".pptx"
prs.save(outputPath)
app = win32.Dispatch("PowerPoint.Application")
presentation = app.Presentations.Open(outputPath)

In [44]:
summary_slide_count = max(duplication)
ranking_slide_count = max(duplication)
potential_slide_count = max(duplication) 
sourcing_slide_count = max(duplication)

print(f"Slide counts - Summary: {summary_slide_count}, Ranking: {ranking_slide_count}, Potential: {potential_slide_count}, Sourcing: {sourcing_slide_count}")


app = win32.Dispatch("PowerPoint.Application")
presentation = app.Presentations.Open(outputPath)

new_order = []
max_slides_per_section = max(summary_slide_count,ranking_slide_count, potential_slide_count, sourcing_slide_count)
for round_num in range(max_slides_per_section):
    if round_num < summary_slide_count:
        summary_slide_index =  round_num +1
        new_order.append(summary_slide_index)
    if round_num < ranking_slide_count:
        ranking_slide_index = round_num +1  + summary_slide_count 
        new_order.append(ranking_slide_index)
        
    if round_num < potential_slide_count:
        potential_slide_index = ranking_slide_count + summary_slide_count + round_num  +1
        new_order.append(potential_slide_index)
        
    if round_num < sourcing_slide_count:
        sourcing_slide_index = ranking_slide_count + potential_slide_count + summary_slide_count + round_num +1
        new_order.append(sourcing_slide_index)

print(f"New slide order (1-based): {new_order}")
def reorder_slides_in_place(presentation, new_order):
    total_slides = presentation.Slides.Count
    
    slides_temp = []
    
    for slide_index in new_order:
        if slide_index <= total_slides:
            original_slide = presentation.Slides(slide_index)
            original_slide.Copy()
            time.sleep(0.5) 
            new_slide = presentation.Slides.Paste(presentation.Slides.Count + 1)
            slides_temp.append(new_slide)
    
    for i in range(total_slides, 0, -1):
        presentation.Slides(i).Delete()
    
    print(f"Reordered {len(slides_temp)} slides successfully")
    return presentation

# Reorder the slides
presentation = reorder_slides_in_place(presentation, new_order)

reordered_outputPath = os.getcwd() + "\\Innovation CBC "+market+" reordered output "+today+".pptx"
def remove_all_sections(presentation):
    try:
        section_count = presentation.SectionProperties.Count
        print(f"Removing {section_count} sections...")
        for i in range(section_count, 0, -1):
            presentation.SectionProperties.Delete(i, False)  # False = keep slides
        print("All sections removed.")
        return presentation
    except Exception as e:
        print(f"Error removing sections: {e}")

presentation = remove_all_sections(presentation)

# Add Section names
for i, key in enumerate(dfDicSplit):
    print(i)
    print(key)
    presentation.SectionProperties.AddBeforeSlide(i*4+1,key)

presentation.SaveAs(reordered_outputPath)


print(f"Reordered presentation saved as: {reordered_outputPath}")
print("Slide reordering complete! All formatting preserved.")

Slide counts - Summary: 7, Ranking: 7, Potential: 7, Sourcing: 7
[1]
[1, 8, 15, 22, 2]
[1, 8, 15, 22, 2, 9, 16, 23, 3]
[1, 8, 15, 22, 2, 9, 16, 23, 3, 10, 17, 24, 4]
[1, 8, 15, 22, 2, 9, 16, 23, 3, 10, 17, 24, 4, 11, 18, 25, 5]
[1, 8, 15, 22, 2, 9, 16, 23, 3, 10, 17, 24, 4, 11, 18, 25, 5, 12, 19, 26, 6]
[1, 8, 15, 22, 2, 9, 16, 23, 3, 10, 17, 24, 4, 11, 18, 25, 5, 12, 19, 26, 6, 13, 20, 27, 7]
New slide order (1-based): [1, 8, 15, 22, 2, 9, 16, 23, 3, 10, 17, 24, 4, 11, 18, 25, 5, 12, 19, 26, 6, 13, 20, 27, 7, 14, 21, 28]
Reordered 28 slides successfully
Removing 4 sections...
All sections removed.
0
Advanced Protection
1
Baby
2
Beautycare
3
Kids
4
Ozono
5
Silk Hydration
6
Sport
Reordered presentation saved as: c:\Users\Salma Hany\Documents\Slide-Automate\Innovation CBC Slide Duplicate\Innovation CBC Walmart reordered output 2025-08-19.pptx
Slide reordering complete! All formatting preserved.
