In [37]:
from datetime import datetime, date, time
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.drawing.image import Image
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Font, Border, Side
import numpy as np
import pandas as pd
import os
import re

## Data ingestion

In [38]:
file_path = '../data'
file_name = 'FR Report 01 Nov 24 12h00..xlsx'

time_str, time_match = pd.Timestamp.now().strftime("%Hh%M"), re.search(r'(\d{2})h(\d{2})', file_name)
if time_match:
    hour, minute = time_match.groups()
    time_str = f'{hour}h{minute}'

file_path_frequency = f'../data/frequency-reports-{pd.Timestamp.now().strftime("%Y-%m-%d")} {time_str}'
os.makedirs(file_path_frequency, exist_ok=True)

In [39]:
df = pd.read_excel(os.path.join(file_path, file_name))

FileNotFoundError: [Errno 2] No such file or directory: '../data/FR Report 29 Oct 24 14h59.xlsx'

In [28]:
df.columns

Index(['Account', 'Waybill', 'Waybill Date', 'Service', 'Reference', 'Shipper',
       'Consignee', 'Orig Hub', 'Orig Place', 'Dest Hub', 'Dest Place',
       'Pieces', 'Chrg Mass', 'Due Date', 'POD Recipient', 'POD Date',
       'POD Time', 'Booking Date', 'Last Event', 'Last Event Hub',
       'Last Event Date', 'Last Event Time', 'SLA Transit Days',
       'Delivery Agent', 'POD Image Present'],
      dtype='object')

## Data cleaning

In [29]:
def safe_to_datetime(x):
    if pd.isna(x):
        return pd.NaT
    elif isinstance(x, (pd.Timestamp, np.datetime64, datetime)):
        return pd.to_datetime(x)
    elif isinstance(x, time):
        return pd.to_datetime(datetime.combine(date.today(), x))
    else:
        try:
            return pd.to_datetime(x)
        except:
            return pd.NaT

In [30]:
date_columns = ['Due Date', 'Waybill Date', 'Last Event Date']
for col in date_columns:
    df[col] = df[col].apply(safe_to_datetime)

## Data output

In [23]:
# Function to create an Excel file from scratch with a logo and heading
def create_excel_with_logo(df_not_pod, df_pod, account, file_path_frequency, logo_path):
    # Create a workbook and add a worksheet
    wb = Workbook()
    ws = wb.active
    ws.title = 'Current deliveries'
    
    # Insert the logo
    ws.merge_cells('A1:J11')
    logo = Image(logo_path)
    logo.width = 555  # Resize the logo (width in pixels)
    logo.height = 184  # Resize the logo (height in pixels)
    logo.anchor = 'B2'  # Position the logo in the top-left corner (A1)
    ws.add_image(logo)
    
    # Merge cells for the heading
    ws.merge_cells('K6:M6')
    ws['K6'] = 'FREQUENCY REPORT'  # Add the heading in the merged cell
    ws['K6'].font = Font(size=14, bold=True) 

    # Create a border style
    thin = Side(border_style="thin", color="000000")
    border = Border(top=thin, left=thin, right=thin, bottom=thin)

    # Add the table from the DataFrame (df_not_pod)
    for r_idx, row in enumerate(dataframe_to_rows(df_not_pod, index=False, header=True), start=13):
        for c_idx, value in enumerate(row, 1):
            cell = ws.cell(row=r_idx, column=c_idx, value=value)
            # Apply border to the headings (first row)
            if r_idx == 13:  # First row of the table
                cell.font = Font(bold=True)  # Make the font bold
                cell.border = border  # Apply border to the headings

    # Add a new sheet for completed deliveries
    ws_pod = wb.create_sheet('Completed deliveries')

    # Insert the same logo and heading in the second sheet
    ws_pod.merge_cells('A1:J11')
    logo2 = Image(logo_path)
    logo2.width = 555  # Resize the logo (width in pixels)
    logo2.height = 184  # Resize the logo (height in pixels)
    logo2.anchor = 'B2'
    ws_pod.add_image(logo2)
    
    ws_pod.merge_cells('K6:M6')
    ws_pod['K6'] = 'FREQUENCY REPORT'  # Add the heading in the merged cell
    ws_pod['K6'].font = Font(size=14, bold=True) 

    # Add the table from df_pod to the second sheet
    for r_idx, row in enumerate(dataframe_to_rows(df_pod, index=False, header=True), start=13):
        for c_idx, value in enumerate(row, 1):
            cell = ws_pod.cell(row=r_idx, column=c_idx, value=value)
            # Apply border to the headings (first row)
            if r_idx == 13:  # First row of the table
                cell.font = Font(bold=True)  # Make the font bold
                cell.border = border  # Apply border to the headings

    # Save the workbook to the file path
    wb.save(f'{file_path_frequency}/{account}.xlsx')

# Example usage
account_list = df['Account'].unique()
logo_path = '../assets/logo.png'  # Specify the path to your logo image
for account in account_list:
    # Split the DataFrame by Account
    df_account = df[df['Account'] == account]
    df_account = df_account.sort_values(by=['Last Event', 'Waybill Date'], ascending=[True, False])

    # Split the DataFrame by Last Event
    df_not_pod = df_account[~df_account['Last Event'].isin(["POD Details Captured", "POD Image Scanned"])]
    df_pod = df_account[df_account['Last Event'].isin(["POD Details Captured", "POD Image Scanned"])]

    # Call the function to create the Excel file with logo and heading
    create_excel_with_logo(df_not_pod, df_pod, account, file_path_frequency, logo_path)



In [31]:
# Function to append tables to a premade Excel template
def append_to_template(df_not_pod, df_pod, account, file_path_frequency, template_path):
    # Load the template workbook
    wb = load_workbook(template_path)
    
    # Select the first sheet and append data for "Current deliveries"
    ws = wb['Current deliveries']
    
    # Find the first empty row to start appending data
    start_row = ws.max_row + 2  # +2 to leave some space after template content
    
    # Create a border style
    thin = Side(border_style="thin", color="000000")
    border = Border(top=thin, left=thin, right=thin, bottom=thin)
    
    # Append df_not_pod table to the 'Current deliveries' sheet
    for r_idx, row in enumerate(dataframe_to_rows(df_not_pod, index=False, header=True), start=start_row):
        for c_idx, value in enumerate(row, 1):
            cell = ws.cell(row=r_idx, column=c_idx, value=value)
            # Apply bold font and borders to the header row
            if r_idx == start_row:  # First row of the table
                cell.font = Font(bold=True)
                cell.border = border
    
    # Add data for "Completed deliveries" in the second sheet
    ws_pod = wb['Completed deliveries']
    
    # Find the first empty row to start appending data in the 'Completed deliveries' sheet
    start_row_pod = ws_pod.max_row + 2
    
    # Append df_pod table to the 'Completed deliveries' sheet
    for r_idx, row in enumerate(dataframe_to_rows(df_pod, index=False, header=True), start=start_row_pod):
        for c_idx, value in enumerate(row, 1):
            cell = ws_pod.cell(row=r_idx, column=c_idx, value=value)
            # Apply bold font and borders to the header row
            if r_idx == start_row_pod:  # First row of the table
                cell.font = Font(bold=True)
                cell.border = border

    # Save the modified workbook to a new file
    wb.save(f'{file_path_frequency}/{account}.xlsx')

# Example usage
account_list = df['Account'].unique()
template_path = '../assets/Frequency Report Template.xlsx'  # Specify the path to your Excel template
for account in account_list:
    # Split the DataFrame by Account
    df_account = df[df['Account'] == account]
    df_account = df_account.sort_values(by=['Last Event', 'Waybill Date'], ascending=[True, False])

    # Split the DataFrame by Last Event
    df_not_pod = df_account[~df_account['Last Event'].isin(["POD Details Captured", "POD Image Scanned"])]
    df_pod = df_account[df_account['Last Event'].isin(["POD Details Captured", "POD Image Scanned"])]

    # Call the function to append data to the template
    append_to_template(df_not_pod, df_pod, account, file_path_frequency, template_path)


FileNotFoundError: [Errno 2] No such file or directory: '../data/template/template.xlsx'