In [402]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from datetime import datetime
# pd.options.display.float_format = '{:.2f}'.format
# pd.set_option('display.float_format', '{:.6f}'.format)

import warnings
import os
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

# Mapping

In [469]:
supplier_code_map = {
    'Toronto': 'CAN-TOR-ON',
    'Hamilton': 'CAN-HAM-ON',
    'Sarnia': 'CAN-SAR-ON',
    'London': 'CAN-LON-ON',
    # 'Calgary': '', # Code?
    # 'Edmonton': '', # Code?
    'Kamloops': 'CAN-KAM-BC',
    'Vancouver': 'CAN-BUR-BC', # Code?
    'Winnipeg': 'CAN-WIN-MB',
    'Thunder Bay': 'CAN-TBY-ON',
}

inventory_item_map = {
    'Regular': 'RUL87',
    'Premium': 'PUL91',
    'Diesel': 'ULSD',
    'Dyd Diesel': 'ULSDD',
    'Winter Diesel': 'WULSD'
}

supplier_item_map = {
    'RUL87': 'GAS',
    'PUL91': 'GAS',
    'ULSD': 'DIESEL',
    'WULSD': 'DIESEL',
    'ULSDD': 'DYED',
}

## Load csv

In [415]:
df_csv = pd.read_csv('Supplier Cost Import 01.26.25.csv')

## East

In [404]:
df_east = pd.read_excel("East.xlsx", engine = 'openpyxl')
df_east = df_east.dropna(how='all').dropna(how='all', axis = 1)

In [410]:
import pandas as pd
from datetime import timedelta

def get_effective_date(data):
    original_datetime = pd.to_datetime(
        data['Unnamed: 1'][data['Unnamed: 1'].str.contains('Effective') == True]
        .str.split('Effective date: ')
        .str[-1]
    ).values[0]
    
    # Add 1 minute
    adjusted_datetime = pd.Timestamp(original_datetime) + timedelta(minutes=1)
    
    # Manually format date to '1/25/25 0:01'
    formatted_datetime = f"{adjusted_datetime.month}/{adjusted_datetime.day}/{adjusted_datetime.strftime('%y')} {adjusted_datetime.hour}:{adjusted_datetime.strftime('%M')}"
    
    return formatted_datetime

In [409]:
formatted_datetime = get_effective_date(df_east)

In [411]:
def parse_east(data, update_datetime):
    df = data.iloc[5:]

    results = []
    i = 0
    while i < len(df):
        row = df.iloc[i]
    
        # Detect city names (non-NaN in column 0, NaNs elsewhere)
        if pd.notna(row[0]) and row[1:].isna().all():
            city = row[0]
            # Look ahead to find 'Base Price' row
            for j in range(i + 1, min(i + 10, len(df))):
                if str(df.iloc[j, 0]).strip() == "Base Price":
                    grades = df.iloc[j - 1]  # One row above 'Base Price' has fuel grade headers
                    prices = df.iloc[j]
    
                    price_dict = {"grades": {}}
                    for k in range(1, len(grades)):
                        if pd.notna(grades[k]):
                            price_dict['grades'][grades[k]]=prices[k]
                            if grades[k] == 'Diesel':
                                price_dict['grades']['Winter Diesel']=prices[k]
                    result = {"city": city}
                    result.update(price_dict)
                    results.append(result)
                    i = j  # skip to after 'Base Price'
                    break
        i += 1
    return results

In [412]:
results = parse_east(df_east, formatted_datetime)

In [424]:
def get_dataframe(results, columns):
    num = 0
    df = pd.DataFrame(columns =columns)
    for result in results:
        if result['city'] in supplier_code_map:
            for grade, price in result['grades'].items():
                df.loc[num] = [
                    supplier_code_map[result['city']], 
                     supplier_item_map[inventory_item_map[grade]],
                     inventory_item_map[grade],
                     formatted_datetime,
                     price]
                num += 1
    return df

In [427]:
df_east_cleaned = get_dataframe(results, df_csv.columns)

# Rest

In [430]:
df_rest = pd.read_excel("Rest.xlsx", engine = 'openpyxl')
df_rest = df_rest.dropna(how='all').dropna(how='all', axis = 1)

In [431]:
formatted_datetime = get_effective_date(df_rest)

In [448]:
def parse_rest(data, update_datetime):
    df = data.iloc[5:-6]

    results =[]

    for row_num in range(len(df)):
        row = df.iloc[row_num]
    
        result = {}
        result['grades'] = {}
    
        if pd.notna(row[0]) and pd.isna(row[1]):
            # print(row[0])
            city = row[0].split(',')[0]
            result['city'] = city
    
            for j in range(row_num+2, min(row_num+5, len(df_rest))):
                # print(df_rest.iloc[j])
                grade = df.iloc[j][0]
                price = df.iloc[j][1]
                result['grades'][grade]=price
                if grade == 'Diesel':
                    result['grades']['Dyd Diesel']=price
                    result['grades']['Winter Diesel']=price
            results.append(result)
    return results

In [449]:
results = parse_rest(df_rest, formatted_datetime)

In [450]:
df_rest_cleaned = get_dataframe(results, df_csv.columns)

## Merge

In [462]:
df_concat =pd.concat([df_east_cleaned, df_rest_cleaned]).sort_values(by = ['Supplier Code', 'Inventory Item', 'Cost'], ascending = True)

In [463]:
df_concat =df_concat.drop_duplicates(subset = ['Supplier Code', 'Inventory Item'], keep = 'last')

In [387]:
df_merge = pd.merge(left = df_csv, right = df_concat[['Supplier Code', 'Inventory Item', 'Cost', 'Effective DateTime']], how = 'left', on = ['Supplier Code', 'Inventory Item'], 
        suffixes = ('', '_new'))

In [464]:
df_update = df_csv.copy()

In [465]:
df_update['Cost'] = df_merge['Cost_new'].combine_first(df_update['Cost'])

In [466]:
df_update['Effective DateTime'] = df_merge['Effective DateTime_new'].combine_first(df_update['Effective DateTime'])

In [467]:
df_update

Unnamed: 0,Supplier Code,Supplier Item,Inventory Item,Effective DateTime,Cost
0,CAN-BUR-BC,GAS,PUL91,7/19/25 0:01,1.303
1,CAN-BUR-BC,GAS,RUL87,7/19/25 0:01,1.128
2,CAN-BUR-BC,DYED,ULSDD,7/19/25 0:01,1.352
3,CAN-BUR-BC,DIESEL,ULSD,7/19/25 0:01,1.352
4,CAN-BUR-BC,DIESEL,WULSD,7/19/25 0:01,1.352
5,CAN-KAM-BC,GAS,PUL91,7/19/25 0:01,1.243
6,CAN-KAM-BC,GAS,RUL87,7/19/25 0:01,1.053
7,CAN-KAM-BC,DYED,ULSDD,7/19/25 0:01,1.235
8,CAN-KAM-BC,DIESEL,ULSD,7/19/25 0:01,1.235
9,CAN-KAM-BC,DIESEL,WULSD,7/19/25 0:01,1.235


# End