In [18]:
import pandas as pd
import boto3
from io import BytesIO
import os
from datetime import datetime, timedelta

pd.set_option('display.max_columns', None)

Pushing all local fils to S3

In [19]:
client = boto3.client("s3")
names = [
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\LPnS_Model\LPnS_Wave_Optimizer_V4.xlsm",                               # LPNS Wave Plan 
    r"\\ant.amazon.com\dept-eu\Amazon-Flex-Europe\Data\OE\AMZL\UTR Model\UTRChangeLog.xlsx",                             # UTR Buffers
    r"\\ant\dept-eu\TBA\UK\Business Analyses\CentralOPS\Scheduling\UK\FlexData\SPR Planner3.xlsx",                       # SPRs
    r"\\ant\dept-eu\TBA\UK\Business Analyses\CentralOPS\Scheduling\UK\FlexData\UK_Flex_Schedule_Ahead_Percentage.xlsm",  # SA Table
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_Flex_SCMS_Attributes_21d.txt",                             # SCMS
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_CVP_Plan_14d.txt",                                              # CVP Data + 2W SnOP Forecast
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_CVP_Plan_All.txt",                                              # CVP Data + SnOP Forecast
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_Flex_SCMS_Attributes_120d.txt",    
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_Route_Summary_SPR.txt",                                    # SPR Historicals
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_Route_Planning_Agg_Pivot.txt",
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_RoBL_Data_For_UTR_Mod.txt",
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_Provider_Demand.txt",
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_Siphon_Data_Pivot.txt",
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_Routing_DSP_to_Flex.txt",
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_Routing_DSP_to_Flex_21d.txt",
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_Date_Station_Cycle.txt",
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_Flex_Fill_At_Sequence.txt",
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_DEA_PM_Data_Grp_Pivot.txt",                                # DEA Data
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_RoBL_Data_For_RLD_Mod.txt",                                # RoBL Data for UTR + Reporting
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_Flex_Block_Actuals_by_Cycle.txt",                               # Historical Blocks
    r"\\ant\dept-eu\Amazon-Flex-Europe\EU-OE\LPnS\ETL\UK_AMZL_CO_Volume_Forecast.txt",                                   # CO 48H Forecast ND
    r"\\ant\dept-eu\EUCentralOPS\Volume-Management\Control Tower\Forecast\EU_SameDay\logs\SD_forecaster_daily_logs.csv"  # CO 48H Forecast SD
]


for file in names:
    buffer = BytesIO()
    fmt = os.path.splitext(file)[1].lower()
    name = os.path.splitext(os.path.basename(file))[0]

    if fmt == ".txt":
        df = pd.read_csv(file, sep="\t")
        df.to_parquet(buffer, index=False)
        save_fmt = ".parquet"
    elif fmt == ".csv":
        df = pd.read_csv(file)
        df.to_parquet(buffer, index=False)
        save_fmt = ".parquet"
    elif fmt in [".xlsx", ".xlsm", ".xls"]:
        save_fmt = fmt
        with open(file, "rb") as f:
            buffer.write(f.read())
    
    buffer.seek(0)

    client.put_object(
        Bucket= "uk-flex-scheduling",
        Key = f"{name}{save_fmt}",
        Body = buffer.getvalue(),
        ContentType="application/octet-stream"
        )

    print(f"✅ Uploaded {name}")

✅ Uploaded LPnS_Wave_Optimizer_V4
✅ Uploaded UTRChangeLog
✅ Uploaded SPR Planner3
✅ Uploaded UK_Flex_Schedule_Ahead_Percentage
✅ Uploaded UK_AMZL_Flex_SCMS_Attributes_21d
✅ Uploaded UK_CVP_Plan_14d
✅ Uploaded UK_CVP_Plan_All
✅ Uploaded UK_AMZL_Flex_SCMS_Attributes_120d
✅ Uploaded UK_AMZL_Route_Summary_SPR
✅ Uploaded UK_AMZL_Route_Planning_Agg_Pivot
✅ Uploaded UK_AMZL_RoBL_Data_For_UTR_Mod
✅ Uploaded UK_AMZL_Provider_Demand
✅ Uploaded UK_Siphon_Data_Pivot
✅ Uploaded UK_AMZL_Routing_DSP_to_Flex
✅ Uploaded UK_AMZL_Routing_DSP_to_Flex_21d
✅ Uploaded UK_AMZL_Date_Station_Cycle
✅ Uploaded UK_AMZL_Flex_Fill_At_Sequence
✅ Uploaded UK_AMZL_DEA_PM_Data_Grp_Pivot
✅ Uploaded UK_AMZL_RoBL_Data_For_RLD_Mod
✅ Uploaded UK_Flex_Block_Actuals_by_Cycle
✅ Uploaded UK_AMZL_CO_Volume_Forecast
✅ Uploaded SD_forecaster_daily_logs


S3 Downloader

In [46]:
s3_client = boto3.client("s3")

BUCKET = "uk-flex-scheduling" 

def get_s3_object(bucket, prefix, header=0, sheet_name=0):
    try:
        response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)
        contents = response.get('Contents', [])
        
        if len(contents) >= 1:
            # Sort by last modified and get the most recent file
            latest_file = sorted(contents, key=lambda x: x['LastModified'], reverse=True)[0]
            key = latest_file['Key']
            
            # Skip if it's just a directory
            if key.endswith('/'):
                raise ValueError(f"No files found in directory: {key}")
                
            obj = s3_client.get_object(Bucket=bucket, Key=key)
            data = obj['Body'].read()
            
            if key.lower().endswith('.parquet'):
                return pd.read_parquet(BytesIO(data))
            elif key.lower().endswith('.csv'):
                return pd.read_csv(BytesIO(data))
            elif key.lower().endswith(('.xlsx', '.xls', '.xlsm')):
                return pd.read_excel(BytesIO(data), header=header, sheet_name=sheet_name)
            elif key.lower().endswith('.txt'):
                return pd.read_csv(BytesIO(data), sep='\t', encoding='utf-8')
        else:
            raise ValueError("No objects found in the prefix")
            
    except Exception as e:
        print(f"Error reading from S3: {str(e)}")
        raise

Get SNOP FCST out

In [84]:

df = get_s3_object(BUCKET, "UK_CVP_Plan_14d.parquet")

cols = ["joinkey", "week", "ofd_date", "station", "cycle", "value", "flex_share_cvp", "flex_spr_cvp", "source"]

df["ofd_volume"] = df["ofd_volume"].astype(int)
df["week"] = pd.to_datetime(df["ofd_date"]).dt.isocalendar().week
df["flex_spr"] = df["flex_spr"].astype(int, errors="ignore")
df["flex_routes_available"] = df["flex_routes_available"].astype(int, errors="ignore")
df["source"] = "snop_fcst"
df = df.rename(columns={
    "flex_share": "flex_share_cvp",
    "flex_spr": "flex_spr_cvp",
    "ofd_volume": "value"
})

df["joinkey"] = df["ofd_date"].astype(str) + df["station"] + df["cycle"]

snop_fcst = df[cols]
snop_fcst.head()

Unnamed: 0,joinkey,week,ofd_date,station,cycle,value,flex_share_cvp,flex_spr_cvp,source
0,2025-05-18VCL2CYCLE_SD_B,20,2025-05-18,VCL2,CYCLE_SD_B,673,1.0,20.0,snop_fcst
1,2025-05-21DWR1CYCLE_SD_A,21,2025-05-21,DWR1,CYCLE_SD_A,2909,1.0,48.0,snop_fcst
2,2025-06-09DWN2CYCLE_SD_C,24,2025-06-09,DWN2,CYCLE_SD_C,2796,1.0,46.0,snop_fcst
3,2025-06-14DCR2CYCLE_SD_D,24,2025-06-14,DCR2,CYCLE_SD_D,1478,1.0,20.0,snop_fcst
4,2025-05-29DRM2CYCLE_1,22,2025-05-29,DRM2,CYCLE_1,69634,0.03,48.0,snop_fcst


Get CO ND Forecast out

In [85]:

df = get_s3_object(BUCKET, "UK_AMZL_CO_Volume_Forecast.parquet")

cols = ["joinkey", "ofd_date", "station", "cycle", "value", "source"]

df["published"] = pd.to_datetime(df["published"])
df = df.loc[df.groupby(["ofddate", "stationcode", "wavegroupname"])["published"].idxmax()]
df["source"] = "co_fcst"
df = df.rename(columns={
    "ofddate": "ofd_date",
    "stationcode": "station",
    "wavegroupname": "cycle",
    "volume": "value"
})

df["joinkey"] = df["ofd_date"] + df["station"] + df["cycle"]

co_nd_fcst = df[cols]
co_nd_fcst.head()

Unnamed: 0,joinkey,ofd_date,station,cycle,value,source
61,2025-05-28DAB1CYCLE_1,2025-05-28,DAB1,CYCLE_1,29495,co_fcst
1,2025-05-28DBH3CYCLE_1,2025-05-28,DBH3,CYCLE_1,52414,co_fcst
99,2025-05-28DBI2CYCLE_1,2025-05-28,DBI2,CYCLE_1,47396,co_fcst
51,2025-05-28DBI4CYCLE_1,2025-05-28,DBI4,CYCLE_1,31809,co_fcst
39,2025-05-28DBI5CYCLE_1,2025-05-28,DBI5,CYCLE_1,37002,co_fcst


Get CO SD Forecast out

In [86]:

df = get_s3_object(BUCKET, "SD_forecaster_daily_logs.parquet")

cols = ["joinkey", "ofd_date", "station", "cycle", "value", "source"]

# Rename columns first
df = df.rename(columns={
    "Node": "station",
    "OFD_DATE": "ofd_date",
    "Expected OFD ": "value"
})

# Filter for UK
df = df[df["Country"] == "UK"]

# Filter for today and tomorrow
today = datetime.today().date()
tomorrow = today + timedelta(days=1)
df = df[df["ofd_date"].isin([today.strftime("%Y-%m-%d"), tomorrow.strftime("%Y-%m-%d")])]

# Cycle naming
df["cycle"] = "CYCLE_SD_" + df["Cycle"]

# Joining Key

df["joinkey"] = df["ofd_date"] + df["station"] + df["cycle"]

df["source"] = "co_fcst"
co_sd_fcst = df[cols]


Join all the tables together

In [87]:
forecasts = pd.concat(
    [snop_fcst, co_nd_fcst, co_sd_fcst],
    axis=0,
    ignore_index=True,
    sort=False
)

# Separate Column for SNOP FCST (2W, for weely SA)
cond_1 = forecasts["source"] == "snop_fcst"
forecasts["snop_fcst"] = forecasts["value"][cond_1]

# Separate Column for CO FCST (48H, for D-2 SA)
cond_2 = forecasts["source"] == "co_fcst"
forecasts["co_fcst"] = forecasts["value"][cond_2]

# Ensuring each row has its Flex Share and SPR
forecasts = forecasts.drop(["flex_share_cvp", "flex_spr_cvp"], axis=1)
forecasts = forecasts.merge(
    snop_fcst[["joinkey", "flex_share_cvp", "flex_spr_cvp"]],
    how="left",
    on="joinkey"
)

forecasts["source"].value_counts()
forecasts[cond_2].head()


Unnamed: 0,joinkey,week,ofd_date,station,cycle,value,source,snop_fcst,co_fcst,flex_share_cvp,flex_spr_cvp
6477,2025-05-28DAB1CYCLE_1,,2025-05-28,DAB1,CYCLE_1,29495,co_fcst,,29495.0,0.04,48.0
6478,2025-05-28DBH3CYCLE_1,,2025-05-28,DBH3,CYCLE_1,52414,co_fcst,,52414.0,0.035,48.0
6479,2025-05-28DBI2CYCLE_1,,2025-05-28,DBI2,CYCLE_1,47396,co_fcst,,47396.0,0.03,48.0
6480,2025-05-28DBI4CYCLE_1,,2025-05-28,DBI4,CYCLE_1,31809,co_fcst,,31809.0,0.035,45.0
6481,2025-05-28DBI5CYCLE_1,,2025-05-28,DBI5,CYCLE_1,37002,co_fcst,,37002.0,0.035,48.0


SCMS Data

In [121]:

df = get_s3_object(BUCKET, "UK_AMZL_Flex_SCMS_Attributes_21d.parquet")

cols = ["joinkey", "week", "day", "ofd_date", "station", "cycle", "wave_capacity", "wave_start_time", "wave_frequency", "wave_end_time", "valid_from", "wave_max"]

df = df.rename(columns={
    "start_time": "wave_start_time",
    "end_time": "wave_end_time",
    "max_wave_capacity": "wave_capacity"
})

df["week"] = pd.to_datetime(df["ofd_date"]).dt.isocalendar().week
df["wave_start_time"] = pd.to_datetime(df["wave_start_time"], format="%H:%M").dt.strftime("%H:%M")
df["wave_frequency"] = pd.to_timedelta(df["wave_frequency"]).dt.total_seconds()/60
df["wave_end_time"] = pd.to_datetime(df["wave_end_time"], format="%H:%M").dt.strftime("%H:%M")
df["valid_from"] = pd.to_datetime(df["valid_from"], format="%Y-%m-%d").dt.strftime("%Y-%m-%d")

df["wave_minutes"] = (pd.to_datetime(df["wave_end_time"], format="%H:%M") - pd.to_datetime(df["wave_start_time"], format="%H:%M")).dt.total_seconds()/60
df["wave_minutes"] = df["wave_minutes"].astype(int)

df["max_wave_count"] = (df["wave_minutes"] / df["wave_frequency"]).astype(int, errors="ignore")
max_wave_cond = df["max_wave_count"] > 15
df["wave_max"] = df["max_wave_count"]
df.loc[max_wave_cond, "wave_max"] = 15

df["joinkey"] = df["ofd_date"] + df["station"] + df["cycle"]
df = df[cols]

scms_data = df.copy()

scms_data.head()

Unnamed: 0,joinkey,week,day,ofd_date,station,cycle,wave_capacity,wave_start_time,wave_frequency,wave_end_time,valid_from,wave_max
0,2025-06-10DBN5CYCLE_1,24,Tue,2025-06-10,DBN5,CYCLE_1,12.0,13:30,15.0,19:15,2025-04-05,15.0
1,2025-05-27DCE1CYCLE_SD_A,22,Tue,2025-05-27,DCE1,CYCLE_SD_A,26.0,07:00,15.0,10:00,2025-04-02,12.0
2,2025-05-27DXP1CYCLE_1,22,Tue,2025-05-27,DXP1,CYCLE_1,27.0,13:30,15.0,17:30,2025-04-05,15.0
3,2025-06-02DBS3CYCLE_1,23,Mon,2025-06-02,DBS3,CYCLE_1,24.0,13:30,15.0,17:30,2025-04-05,15.0
4,2025-05-29DNE2CYCLE_SD_A,22,Thu,2025-05-29,DNE2,CYCLE_SD_A,40.0,07:00,15.0,10:00,2025-04-02,12.0


UTR Data

In [96]:

df = get_s3_object(BUCKET, "UTRChangeLog.xlsx", sheet_name="DailyRange")

df = df.rename(columns={
    "JoinKey": "joinkey",
    "Buffer": "utr_buffer",
    "OFDDate": "ofd_date",
    "Station": "station",
    "Cycle": "cycle"
    })

df["ofd_date"] = pd.to_datetime(df["ofd_date"], format="%Y-%m-%d")
df["joinkey"] = df["ofd_date"].astype(str) + df["station"] + df["cycle"]
df = df[df["ofd_date"].between(pd.Timestamp.today().normalize(), pd.Timestamp.today().normalize() + pd.Timedelta(days=21))]

utr_data = df.copy()

utr_data.head()


Unnamed: 0,joinkey,ofd_date,station,cycle,utr_buffer
147,2025-05-28DAB1CYCLE_1,2025-05-28,DAB1,CYCLE_1,0
148,2025-05-29DAB1CYCLE_1,2025-05-29,DAB1,CYCLE_1,0
149,2025-05-30DAB1CYCLE_1,2025-05-30,DAB1,CYCLE_1,0
150,2025-05-31DAB1CYCLE_1,2025-05-31,DAB1,CYCLE_1,0
151,2025-06-01DAB1CYCLE_1,2025-06-01,DAB1,CYCLE_1,0


Calculated SPR Data

In [97]:

df = get_s3_object(BUCKET, "SPR Planner3.xlsx", header=1, sheet_name="Data")

cols = ["cycle", "station", "Calc SPR"]

df = df[cols]
df = df.rename(columns={
    "Calc SPR": "calc_spr"
})

spr_data = df.copy()

spr_data.head()

  warn(msg)


Unnamed: 0,cycle,station,calc_spr
0,CYCLE_1,DAB1,48
1,CYCLE_1,DBH3,48
2,CYCLE_1,DBI2,48
3,CYCLE_1,DBI4,45
4,CYCLE_1,DBI5,48


SA% Table

In [98]:
df = get_s3_object(BUCKET, "UK_Flex_Schedule_Ahead_Percentage.xlsm")

df = df.rename(columns={
    "DOW": "day",
    "Station": "station",
    "Cycle": "cycle",
    "SA% EOA": "sa%_eoa",
    "SA% WK": "sa%_week",
    "SA% D3": "sa%_d3",
    "SA% D2": "sa%_d2",
    "SA% D1": "sa%_d1",
    "SA% D0": "sa%_d0"
})
df = df.fillna(0)
sa_table = df.copy()

sa_table.head()

Unnamed: 0,day,station,cycle,sa%_eoa,sa%_week,sa%_d3,sa%_d2,sa%_d1,sa%_d0
0,Sun,DAB1,CYCLE_1,0.9,0.45,0.5,0.7,0.85,0.0
1,Mon,DAB1,CYCLE_1,0.9,0.45,0.5,0.7,0.85,0.0
2,Tue,DAB1,CYCLE_1,0.9,0.4,0.45,0.6,0.75,0.0
3,Wed,DAB1,CYCLE_1,0.9,0.45,0.5,0.7,0.85,0.0
4,Thu,DAB1,CYCLE_1,0.9,0.45,0.5,0.7,0.85,0.0


Get current Wave Plan

In [99]:

df = get_s3_object(BUCKET, "LPnS_Wave_Optimizer_V4.xlsm", header=1, sheet_name="WavePlanSD")

columns = [
    "Date", "Station", "Cycle",
    "D-1", "D-2", "D-3", "D-4", "D-5", "D-6", "D-7", "D-8", "D-9", "D-10", "D-11", "D-12", "D-13", "D-14", "D-15",
    "C-1", "C-2", "C-3", "C-4", "C-5", "C-6", "C-7", "C-8", "C-9", "C-10", "C-11", "C-12", "C-13", "C-14", "C-15"
    ]

df = df[columns]
df.columns = [col.strip().replace(" ", "_").replace("-", "_") for col in df.columns]

df = df.rename(columns={
    "Date": "ofd_date",
    "Station": "station",
    "Cycle": "cycle"
})

df["joinkey"] = df["ofd_date"].astype(str) + df["station"] + df["cycle"]
df = df.fillna(0)

wave_plan = df.copy()

wave_plan.head()



Unnamed: 0,ofd_date,station,cycle,D_1,D_2,D_3,D_4,D_5,D_6,D_7,D_8,D_9,D_10,D_11,D_12,D_13,D_14,D_15,C_1,C_2,C_3,C_4,C_5,C_6,C_7,C_8,C_9,C_10,C_11,C_12,C_13,C_14,C_15,joinkey
0,2025-06-01,DAB1,CYCLE_1,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,0.0,0.0,0.0,0.0,0.0,0.0,15,15,15.0,15.0,15.0,15.0,15.0,15.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-06-01DAB1CYCLE_1
1,2025-06-02,DAB1,CYCLE_1,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,0.0,0.0,0.0,0.0,0.0,0.0,15,15,15.0,15.0,15.0,15.0,15.0,15.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-06-02DAB1CYCLE_1
2,2025-06-03,DAB1,CYCLE_1,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,0.0,0.0,0.0,0.0,0.0,0.0,15,15,15.0,15.0,15.0,15.0,15.0,15.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-06-03DAB1CYCLE_1
3,2025-06-04,DAB1,CYCLE_1,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,0.0,0.0,0.0,0.0,0.0,0.0,15,15,15.0,15.0,15.0,15.0,15.0,15.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-06-04DAB1CYCLE_1
4,2025-06-05,DAB1,CYCLE_1,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,0.0,0.0,0.0,0.0,0.0,0.0,15,15,15.0,15.0,15.0,15.0,15.0,15.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-06-05DAB1CYCLE_1


Join it all together

In [132]:

# Get all columns in theri original order
cols = list(dict.fromkeys(list(scms_data.columns) + list(forecasts.columns) + list(utr_data.columns) + list(spr_data.columns) + list(sa_table.columns) + list(wave_plan.columns)))

# Start with SCMS data and join Forecasts

df = scms_data.merge( # Join SCSMS + Forecasts
    forecasts,
    on="joinkey",
    how="left",
    suffixes=('', '_a')
) \
    .merge(  # Join UTR buffers
    utr_data,
    on="joinkey",
    how="left",
    suffixes=('', '_b')
) \
    .merge( # Join SPR Plan
    spr_data,
    on=["cycle", "station"],
    how="left",
    suffixes=('', '_c')
) \
    .merge( # Join SA Table
    sa_table,
    on=["day", "station", "cycle"],
    how="left",
    suffixes=('', '_d')
) \
    .merge( # Join Wave Plan
    wave_plan,
    on="joinkey",
    how="left",
    suffixes=('', '_e')
)

# Drop unnecesary cols Sort by Date/Cycle/Station
df = df[cols].drop("joinkey", axis=1)
df = df.sort_values(["ofd_date", "cycle", "station"], ascending=False)

# Only include current Week and next week
cw = datetime.now().isocalendar().week
nw = cw + 1
relevant_weeks = df["week"].isin([cw, nw])

df = df[relevant_weeks]

df.head()

Unnamed: 0,week,day,ofd_date,station,cycle,wave_capacity,wave_start_time,wave_frequency,wave_end_time,valid_from,wave_max,value,source,snop_fcst,co_fcst,flex_share_cvp,flex_spr_cvp,utr_buffer,calc_spr,sa%_eoa,sa%_week,sa%_d3,sa%_d2,sa%_d1,sa%_d0,D_1,D_2,D_3,D_4,D_5,D_6,D_7,D_8,D_9,D_10,D_11,D_12,D_13,D_14,D_15,C_1,C_2,C_3,C_4,C_5,C_6,C_7,C_8,C_9,C_10,C_11,C_12,C_13,C_14,C_15
3943,23,Sun,2025-06-08,DXN1,CYCLE_SD_D,20.0,19:30,15.0,20:00,2025-05-11,2.0,469.0,snop_fcst,469.0,,1.0,19.0,20.0,19.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3667,23,Sun,2025-06-08,DXM5,CYCLE_SD_D,26.0,19:15,15.0,20:00,2025-04-20,3.0,1651.0,snop_fcst,1651.0,,1.0,21.0,5.0,23.0,0.0,0.45,0.5,0.7,0.8,0.9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1789,23,Sun,2025-06-08,DNG2,CYCLE_SD_D,28.0,19:00,15.0,20:00,2025-06-04,4.0,750.0,snop_fcst,750.0,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3987,23,Sun,2025-06-08,DLS2,CYCLE_SD_D,20.0,19:15,15.0,20:00,2025-04-02,3.0,1236.0,snop_fcst,1236.0,,1.0,21.0,20.0,21.0,0.0,0.45,0.5,0.7,0.8,0.925,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3632,23,Sun,2025-06-08,DIG1,CYCLE_SD_D,40.0,19:30,15.0,20:00,2025-04-23,2.0,727.0,snop_fcst,727.0,,1.0,16.0,25.0,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


Upload Inputs to S3

In [133]:
buffer = BytesIO()
df.to_parquet(buffer)

client.put_object(
    Bucket=BUCKET,
    Key="scheduling_inputs.parquet",
    Body=buffer.getvalue(),
    ContentType="application/octet-stream"
)

print("✅ Uploaded scheduling_inputs.parquet")


✅ Uploaded scheduling_inputs.parquet


In [134]:
df = get_s3_object(BUCKET, "scheduling_inputs.parquet")

df.query('D_1.isna() == False').head()

Unnamed: 0,week,day,ofd_date,station,cycle,wave_capacity,wave_start_time,wave_frequency,wave_end_time,valid_from,wave_max,value,source,snop_fcst,co_fcst,flex_share_cvp,flex_spr_cvp,utr_buffer,calc_spr,sa%_eoa,sa%_week,sa%_d3,sa%_d2,sa%_d1,sa%_d0,D_1,D_2,D_3,D_4,D_5,D_6,D_7,D_8,D_9,D_10,D_11,D_12,D_13,D_14,D_15,C_1,C_2,C_3,C_4,C_5,C_6,C_7,C_8,C_9,C_10,C_11,C_12,C_13,C_14,C_15
2595,23,Sat,2025-06-07,DXN1,CYCLE_SD_D,20.0,19:30,15.0,20:00,2025-05-11,2.0,450.0,snop_fcst,450.0,,1.0,19.0,20.0,19.0,,,,,,,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1773,23,Sat,2025-06-07,DXM5,CYCLE_SD_D,26.0,19:15,15.0,20:00,2025-04-20,3.0,1540.0,snop_fcst,1540.0,,1.0,23.0,5.0,23.0,0.0,0.45,0.5,0.7,0.8,0.9,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.0,26.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
425,23,Sat,2025-06-07,DLS2,CYCLE_SD_D,20.0,19:15,15.0,20:00,2025-04-02,3.0,1180.0,snop_fcst,1180.0,,1.0,21.0,20.0,21.0,0.0,0.45,0.5,0.7,0.8,0.925,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,20.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1100,23,Sat,2025-06-07,DIG1,CYCLE_SD_D,40.0,19:30,15.0,20:00,2025-04-23,2.0,794.0,snop_fcst,794.0,,1.0,15.0,25.0,15.0,,,,,,,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3565,23,Sat,2025-06-07,DHA2,CYCLE_SD_D,26.0,19:00,15.0,20:00,2025-04-02,4.0,1668.0,snop_fcst,1668.0,,1.0,22.0,40.0,22.0,0.0,0.45,0.5,0.675,0.775,0.9,2.5,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.0,23.0,26.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [124]:
df.columns

Index(['joinkey', 'week', 'day', 'ofd_date', 'station', 'cycle',
       'wave_capacity', 'wave_start_time', 'wave_frequency', 'wave_end_time',
       'valid_from', 'wave_max', 'value', 'source', 'snop_fcst', 'co_fcst',
       'flex_share_cvp', 'flex_spr_cvp', 'utr_buffer', 'calc_spr', 'sa%_eoa',
       'sa%_week', 'sa%_d3', 'sa%_d2', 'sa%_d1', 'sa%_d0', 'D_1', 'D_2', 'D_3',
       'D_4', 'D_5', 'D_6', 'D_7', 'D_8', 'D_9', 'D_10', 'D_11', 'D_12',
       'D_13', 'D_14', 'D_15', 'C_1', 'C_2', 'C_3', 'C_4', 'C_5', 'C_6', 'C_7',
       'C_8', 'C_9', 'C_10', 'C_11', 'C_12', 'C_13', 'C_14', 'C_15'],
      dtype='object')

In [125]:
bucket="uk-flex-scheduling"

print("Starting SA Table download")
df = get_s3_object(bucket, "UK_Flex_Schedule_Ahead_Percentage.xlsm")
print(df.head())

df = df.rename(columns={
    "DOW": "day",
    "Station": "station",
    "Cycle": "cycle",
    "SA% EOA": "sa%_eoa",
    "SA% WK": "sa%_week",
    "SA% D3": "sa%_d3",
    "SA% D2": "sa%_d2",
    "SA% D1": "sa%_d1",
    "SA% D0": "sa%_d0"
})

df = df.fillna(0)
sa_table = df.copy()

sa_table.head()

Starting SA Table download
   DOW Station    Cycle  SA% EOA  SA% WK  SA% D3  SA% D2  SA% D1  SA% D0
0  Sun    DAB1  CYCLE_1      0.9    0.45    0.50     0.7    0.85     NaN
1  Mon    DAB1  CYCLE_1      0.9    0.45    0.50     0.7    0.85     NaN
2  Tue    DAB1  CYCLE_1      0.9    0.40    0.45     0.6    0.75     NaN
3  Wed    DAB1  CYCLE_1      0.9    0.45    0.50     0.7    0.85     NaN
4  Thu    DAB1  CYCLE_1      0.9    0.45    0.50     0.7    0.85     NaN


Unnamed: 0,day,station,cycle,sa%_eoa,sa%_week,sa%_d3,sa%_d2,sa%_d1,sa%_d0
0,Sun,DAB1,CYCLE_1,0.9,0.45,0.5,0.7,0.85,0.0
1,Mon,DAB1,CYCLE_1,0.9,0.45,0.5,0.7,0.85,0.0
2,Tue,DAB1,CYCLE_1,0.9,0.4,0.45,0.6,0.75,0.0
3,Wed,DAB1,CYCLE_1,0.9,0.45,0.5,0.7,0.85,0.0
4,Thu,DAB1,CYCLE_1,0.9,0.45,0.5,0.7,0.85,0.0


In [None]:
from utils.inputs_processing import InputsProcessor
from src.config import *

processor= InputsProcessor(
    bucket=scheduling_bucket,
    local_files=local_files
)

df = processor.process_sa_table()

df.head()

ModuleNotFoundError: No module named 'utils'