#### The purpose of this code is to automate the process of reading data from multiple spreadsheets (each representing a specific division), cleaning, and aggregating of project content category data and purchase order (PO) data on a divisional basis. This helps in maintaining an organized record and simplify further Purchase order creation process.

In [3]:
import pandas as pd
import numpy as np
import time
import warnings
import datetime

warnings.filterwarnings('ignore')
startTime = time.time()

pd.set_option('display.float_format', '{:.0f}'.format)
pd.set_option("display.max_columns", 100) #local settings 

current_date = datetime.datetime.now()
date = current_date.strftime("%Y_%m")

# Define a function for the common operations
def process_division(filename, division_name):
    df = pd.read_excel(f'..\\dbs\\{filename}', sheet_name='Summary')
    df.iloc[1, 10] = "Purchase_Month"
    df.columns = df.iloc[1]
    df = df[2:]
    df['Division'] = division_name
    df = df[df['Purchase_Month'] == date]
    df = df[['Project Number', 'Content Category', 'Division']]
    df = df.drop_duplicates(subset=['Project Number', 'Content Category'])
    return df

# use a loop to process all divisions
divisions = {
    "1_file.xlsx": "1db",
    "2_file.xlsx": "2db",
    "3_file": "3db",
    "4_file": "4db",
    "5_file.xlsx": "5db",
    "6_file.xlsx": "6db",
    "7_file.xlsx": "7db",
    "8_file.xlsx": "8db",
    "9_file.xlsx": "9db",
    "10_file.xlsx": "10db",
    "11_file.xlsx": "11db"
}

data_frames = []
for filename, division_name in divisions.items():
    data_frames.append(process_division(filename, division_name))

total = pd.concat(data_frames, axis=0)
total_x = pd.pivot_table(total, index=['Division', 'Project Number'], values='Content Category', aggfunc=lambda x: ', '.join(x))

total_x = total_x.reset_index()
total_x.columns = ['Division_name', 'Project_number', 'Category_to_select']
total_x = total_x.sort_values('Division_name', ascending=True)

executionTime = round((time.time() - startTime),2)
print('\nExecution time in seconds: ' + str(executionTime) + ' seconds')

total_x.to_excel(f'Proj_numbers({date}).xlsx', index=False)



Execution time in seconds: 30.83 seconds
