In [1]:
from datetime import datetime, date, time
import numpy as np
import pandas as pd
import os
import re

## Data ingestion

In [2]:
file_path = '../data'
file_name = 'FR Report 22 Oct 24 08h00.xlsx'

time_str, time_match = pd.Timestamp.now().strftime("%Hh%M"), re.search(r'(\d{2})h(\d{2})', file_name)
if time_match:
    hour, minute = time_match.groups()
    time_str = f'{hour}h{minute}'

file_path_frequency = f'../data/frequency-reports-{pd.Timestamp.now().strftime("%Y-%m-%d")} {time_str}'
file_path_booking = '../data/booking-reports'

os.makedirs(file_path_frequency, exist_ok=True)
os.makedirs(file_path_booking, exist_ok=True)

In [3]:
df = pd.read_excel(os.path.join(file_path, file_name))

In [4]:
df.columns

Index(['Account', 'Waybill Date', 'Waybill', 'Service', 'Shipper', 'Consignee',
       'Orig Hub', 'Orig Place', 'Dest Hub', 'Dest Place', 'Pieces',
       'Chrg Mass', 'Booking Date', 'POD Date', 'POD Time', 'POD Recipient',
       'POD Image Present', 'SLA Transit Days', 'Due Date', 'Last Event',
       'Last Event Hub', 'Last Event Date', 'Last Event Time',
       'Delivery Agent'],
      dtype='object')

## Data cleaning

In [5]:
def safe_to_datetime(x):
    if pd.isna(x):
        return pd.NaT
    elif isinstance(x, (pd.Timestamp, np.datetime64, datetime)):
        return pd.to_datetime(x)
    elif isinstance(x, time):
        return pd.to_datetime(datetime.combine(date.today(), x))
    else:
        try:
            return pd.to_datetime(x)
        except:
            return pd.NaT

In [6]:
date_columns = ['Due Date', 'Waybill Date', 'Last Event Date']
for col in date_columns:
    df[col] = df[col].apply(safe_to_datetime)

## Data manipulation

In [7]:
# Build rules based on other columns

## Data output

### Frequency reports

In [8]:
account_list = df["Account"].unique()
for account in account_list:
    # Split the DataFrame by Account
    df_account = df[df["Account"] == account]
    df_account = df_account.sort_values(
        by=["Last Event", "Waybill Date"], ascending=[True, False]
    )

    # Split the DataFrame by Last Event
    df_not_pod = df_account[
        ~df_account["Last Event"].isin(["POD Details Captured", "POD Image Scanned"])
    ]
    df_pod = df_account[
        df_account["Last Event"].isin(["POD Details Captured", "POD Image Scanned"])
    ]

    # Save to Excel file with multiple sheets
    with pd.ExcelWriter(
        f"{file_path_frequency}/{account}.xlsx",
        engine="openpyxl",
    ) as writer:
        df_not_pod.to_excel(writer, sheet_name="Current deliveries", index=False)
        df_pod.to_excel(writer, sheet_name="Completed deliveries", index=False)

### Booking reports

In [9]:
# Sort the DataFrame by 'Booking Date' column
df_booking_date = df.sort_values(by='Booking Date', ascending=True)

# Create an Excel writer object
with pd.ExcelWriter(f'{file_path_booking}/booking-report-{pd.Timestamp.now().strftime("%Y-%m-%d")}.xlsx', engine='openpyxl') as writer:
    # Group the DataFrame by 'Dest Hub' and write each group to a separate sheet
    for category, group in df_booking_date.groupby('Dest Hub'):
        group.to_excel(writer, sheet_name=category, index=False)