In [5]:
from datetime import datetime, date, time
import numpy as np
import pandas as pd
import os

## Data ingestion

In [6]:
file_path = '../data'

datetime = datetime.now()
formatted_datetime = datetime.strftime("%Y-%m-%d %H:%M:%S")

file_path_frequency = '../data/frequency-reports '+formatted_datetime
file_path_booking = '../data/booking-reports '+formatted_datetime

os.makedirs(file_path_frequency, exist_ok=True)
os.makedirs(file_path_booking, exist_ok=True)

In [7]:
df = pd.read_excel(f'{file_path}/FR Report 16 Oct 14h00.-2.xlsx')

In [8]:
df.columns

Index(['Waybill Date', 'Waybill', 'Account', 'Reference', 'Service', 'Shipper',
       'Consignee', 'Orig Hub', 'Orig Place', 'Dest Hub', 'Dest Place',
       'Pieces', 'Chrg Mass', 'Booking Date', 'POD Date', 'POD Time',
       'POD Recipient', 'POD Image Present', 'SLA Transit Days', 'Due Date',
       'Last Event', 'Last Event Date', 'Delivery Agent'],
      dtype='object')

## Data cleaning

In [9]:
def safe_to_datetime(x):
    if pd.isna(x):
        return pd.NaT
    elif isinstance(x, (pd.Timestamp, np.datetime64, datetime)):
        return pd.to_datetime(x)
    elif isinstance(x, time):
        return pd.to_datetime(datetime.combine(date.today(), x))
    else:
        try:
            return pd.to_datetime(x)
        except:
            return pd.NaT

In [10]:
date_columns = ['Due Date', 'Waybill Date', 'Last Event Date']
for col in date_columns:
    df[col] = df[col].apply(safe_to_datetime)

## Data manipulation

In [11]:
# Build rules based on other columns

## Data output

### Frequency Reports

In [12]:
account_list = df['Account'].unique()
for account in account_list:
    # Split the DataFrame by Account
    df_account = df[df['Account'] == account]
    df_account = df_account.sort_values(by=['Last Event', 'Waybill Date'], ascending=[True, False])

    # Split the DataFrame by Last Event
    df_not_pod = df_account[~df_account['Last Event'].isin(["POD Details Captured", "POD Image Scanned"])]
    df_pod = df_account[df_account['Last Event'].isin(["POD Details Captured", "POD Image Scanned"])]

    # Save to Excel file with multiple sheets
    with pd.ExcelWriter(f'{file_path_frequency}/{account}.xlsx') as writer:
        df_not_pod.to_excel(writer, sheet_name='Current deliveries', index=False)
        df_pod.to_excel(writer, sheet_name='Completed deliveries', index=False)

### Booking Reports

In [13]:
location_hub_list = df['Dest Hub'].unique()

# Sort the DataFrame by 'Booking Date' column
df_booking = df.sort_values(by='Booking Date', ascending=True)

# Remove rows where 'Booking Date' is not populated
df_booking = df_booking.dropna(subset=['Booking Date'])

# Remove rows where 'Last Event' has value of POD Image Scanned or POD Details Captured
df_booking = df_booking[~df_booking['Last Event'].isin(['POD Image Scanned', 'POD Details Captured'])]

# Create an Excel writer object
with pd.ExcelWriter(f'{file_path_booking}/Booking Report '+formatted_datetime+'.xlsx', engine='openpyxl') as writer:
    # Group the DataFrame by 'Dest Hub' and write each group to a separate sheet
    for category, group in df_booking.groupby('Dest Hub'):
        group.to_excel(writer, sheet_name=category, index=False)