In [5]:
import os
import re


from supabase import create_client, Client
supabase: Client = create_client(
    os.getenv("SUPABASE_URL"),
    os.getenv("SUPABASE_KEY")
)


In [15]:
import pandas as pd
from datetime import datetime

all_data = []
batch_size = 1000
offset = 0

while True:
       response = supabase.table("last_year_outbound").select("*").range(offset, offset + batch_size - 1).execute()
       batch = response.data
       if not batch:
           break
       all_data.extend(batch)
       if len(batch) < batch_size:
           break
       offset += batch_size

df = pd.DataFrame(all_data)



In [16]:
df.shape

(35692, 75)

In [17]:
# Data cleaning and preprocessing

# 1. Remove rows where "Container No." is null or blank
df = df[df["Container No."].notnull() & (df["Container No."].astype(str).str.strip() != "")]

# 2. Remove all special characters and strip leading/trailing spaces from specified columns
def remove_special_chars(val):
    if pd.isnull(val):
        return val
    # Remove special characters and then strip leading/trailing spaces
    return re.sub(r'[^A-Za-z0-9 ]+', '', str(val)).strip()

for col in ["Container No.", "Release Number", "Import invoice"]:
    if col in df.columns:
        df[col] = df[col].apply(remove_special_chars)

# 3. Add new column "Power" as 'Piece' * 'Wattage'
if "Piece" in df.columns and "Wattage" in df.columns:
    df["Power"] = pd.to_numeric(df["Piece"], errors='coerce') * pd.to_numeric(df["Wattage"], errors='coerce')
else:
    df["Power"] = None

# 4. Add new column "MegaWattage" as (Power / 10^6)
df["MegaWattage"] = df["Power"] / 1_000_000

# 5. Create a new column "Ref1" as concat of "Container No." and "Release Number"
if "Container No." in df.columns and "Release Number" in df.columns:
    df["Ref1"] = df["Release Number"].astype(str) + df["Container No."].astype(str) 
else:
    df["Ref1"] = None

# 6. Create a new column "Ref2" as concat of "Container No." and "Release Number"
if "Container No." in df.columns and "Release Number" in df.columns:
    df["Ref2"] = df["Release Number"].astype(str) + df["Container No."].astype(str) +df["Wattage"].astype(str) 
else:
    df["Ref2"] = None

# 7. Add Status column based on Inbound and Outbound dates

today = datetime.now().date()

def get_status(row):
    if pd.isna(row['Outbound date']):
        if pd.isna(row['Inbound date']):
            return "On Sea"
        else:
            return "In-Stock"
    else:
        # Convert Outbound date to date object for comparison
        outbound_date = pd.to_datetime(row['Outbound date']).date() if pd.notna(row['Outbound date']) else None
        if outbound_date and outbound_date > today:
            return "In-Stock"
        else:
            return "Outbounded"

df['Current_Status'] = df.apply(get_status, axis=1)

# 8. Add Outbound status column based on Outbound date
def get_outbound_class(row):
    if pd.isna(row['Outbound date']) or row['Outbound date'] == pd.Timestamp(0):
        return "not-outbounded"
    elif pd.to_datetime(row['Outbound date']).date() > today:
        return "outbound-planned"
    else:
        return "outbounded"

df['Outbound_status'] = df.apply(get_outbound_class, axis=1)

# 9. Add Release Status column based on Release date and Current status
def get_release_status(row):
    if not pd.isna(row['Release date']) or row.get('Current_status') == "Outbounded":
        return "Released"
    else:
        return "Not released"

df['Release_Status'] = df.apply(get_release_status, axis=1)


# 10. Add Delivery Status column based on Ref1 count
ref_counts = df['Ref1'].map(df['Ref1'].value_counts())
df['Delivery_Status'] = ref_counts.apply(lambda x: "Partial_delivery" if x > 1 else "Full_delivery")


# Remove 'Total Wattage' and 'MW' columns
df = df.drop(['Total Wattage', 'MW'], axis=1)

# Display the transformed DataFrame
df


Unnamed: 0,id,Name JA partner,WH location,Type of WH (bonded/non),Container No.,Product type,Product reference,Port of Loading,Port of destination,Inbound ref.,...,Comments,created_at,Power,MegaWattage,Ref1,Ref2,Current_Status,Outbound_status,Release_Status,Delivery_Status
0,178461,Abreu Logistics,Alverca,Non bonded fixed,MSKU9118793,,JAM72S10-405/MR,,,MSKU9118793,...,,2025-04-21T18:13:45.07389+00:00,131220.0,0.13122,NoneMSKU9118793,NoneMSKU9118793405,Outbounded,outbounded,Not released,Full_delivery
1,178462,Abreu Logistics,Alverca,Non bonded fixed,MSKU9118793,,JAM72S10-405/MR,,,MSKU9118793,...,,2025-04-21T18:13:45.07389+00:00,109350.0,0.10935,W2305443MSKU9118793,W2305443MSKU9118793405,Outbounded,outbounded,Released,Full_delivery
2,178463,Abreu Logistics,Alverca C,Non bonded fixed,MRSU4078934,,JAM72S20-455/MR,,,MRSU4078934,...,,2025-04-21T18:13:45.07389+00:00,310310.0,0.31031,2311300007MRSU4078934,2311300007MRSU4078934455,Outbounded,outbounded,Released,Full_delivery
3,178464,Abreu Logistics,Palmela 3,Non bonded fixed,TGBU6914673,,JAM72S20-455/MR,,,TGBU6914673,...,,2025-04-21T18:13:45.07389+00:00,310310.0,0.31031,W2301066TGBU6914673,W2301066TGBU6914673455,Outbounded,outbounded,Released,Full_delivery
4,178465,Abreu Logistics,Palmela 3,Non bonded fixed,OOLU9647103,,JAM72S20-460/MR,,,OOLU9647103,...,,2025-04-21T18:13:45.07389+00:00,313720.0,0.31372,W2212502OOLU9647103,W2212502OOLU9647103460,Outbounded,outbounded,Released,Full_delivery
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35687,214148,Real Logistics,TER,Bonded floating,TGBU8018676,Modules,JAM72D42-630/LB,Shanghai,Gdansk,,...,028810WROWAR,2025-04-21T18:13:45.07389+00:00,312480.0,0.31248,2411120009TGBU8018676,2411120009TGBU8018676630,Outbounded,outbounded,Released,Full_delivery
35688,214149,Real Logistics,TER,Bonded floating,CCLU7974539,Modules,JAM72D42-630/LB,Shanghai,Gdansk,,...,028810WROWAR,2025-04-21T18:13:45.07389+00:00,312480.0,0.31248,2411120009CCLU7974539,2411120009CCLU7974539630,Outbounded,outbounded,Released,Full_delivery
35689,214150,Real Logistics,GDA,Bonded floating,OOLU9159541,Modules,JAM54D41-425/GB,Qingdao,Gdansk,,...,028910WROWAR,2025-04-21T18:13:45.07389+00:00,397800.0,0.39780,2411220012OOLU9159541,2411220012OOLU9159541425,Outbounded,outbounded,Released,Full_delivery
35690,214151,Seacon,,,EMCU8848530,,JAM66D45-610/LB,,,,...,,2025-04-21T18:13:45.07389+00:00,439200.0,0.43920,2412090050EMCU8848530,2412090050EMCU8848530610,Outbounded,outbounded,Released,Full_delivery


In [18]:
# Display as a table
df.shape

(35692, 81)

In [24]:
#convert df to cvs
df.to_csv("output.csv", index=False)

# # Convert DataFrame to list of dicts
# records = df.to_dict(orient="records")

# batch_size = 500

# for i in range(0, len(records), batch_size):
#     batch = records[i:i+batch_size]
#     try:
#         response = supabase.table("archive_data").insert(batch).execute()
#         # If response.data is None or empty, print error info
#         if not response.data:
#             print(f"Error inserting batch {i//batch_size + 1}: {response.__dict__}")
#         else:
#             print(f"Batch {i//batch_size + 1} inserted successfully.")
#     except Exception as e:
#         print(f"Exception inserting batch {i//batch_size + 1}: {e}")