In [1]:
import pandas as pd

In [6]:
def process_products(sf_file, robo_file, start_date, end_date, output_file):
    """
    Compare products from Salesforce and Robo files, filter by date range, and merge the results.
    """
    # Load files
    sf = pd.read_excel(sf_file)
    sf.columns = sf.columns.str.strip()

    if robo_file.endswith(".csv"):
        robo = pd.read_csv(robo_file)
    else:
        robo = pd.read_excel(robo_file)
    robo.columns = robo.columns.str.strip()

    # Parse dates
    sf["Product Date"] = pd.to_datetime(sf["Product Date"], format="%m/%d/%Y, %I:%M %p", errors="coerce")
    robo["ProductDate"] = pd.to_datetime(robo["ProductDate"], format="%Y-%m-%d", errors="coerce")

    # Convert date range to datetime
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    # Filter by date range
    sf = sf[(sf["Product Date"] >= start_date) & (sf["Product Date"] <= end_date)]
    robo = robo[(robo["ProductDate"] >= start_date) & (robo["ProductDate"] <= end_date)]

    # Print shapes for debug
    print("Shape of sf:", sf.shape)
    print("Shape of robo:", robo.shape)

    # Clean IDs
    robo["ProductId"] = robo["ProductId"].apply(lambda x: str(int(float(x))) if pd.notnull(x) else None)
    sf["Roboticket ID"] = sf["Roboticket ID"].apply(lambda x: str(int(float(x))) if pd.notnull(x) else None)

    # Merge on Roboticket ID <-> ProductId
    merged = pd.merge(
        sf,
        robo,
        how="outer",
        left_on="Roboticket ID",
        right_on="ProductId",
        indicator=True,
        suffixes=("_SF", "_Robo")
    )

    # Select and rename relevant columns
    merged = merged.rename(columns={
        "ProductId": "ProductId_Robo",
        "Product Date": "Product Date SF",
        "ProductDate": "Product Date Robo"
    })

    columns_to_output = [
        "ProductId_Robo",
        "Roboticket ID",
        "Product Name",
        "ProductName",
        "Product Date SF",
        "Product Date Robo",
        "ProductType",
        "Product Type",
        "StadiumId",
        "Stadium ID",
        "_merge"
    ]

    columns_to_output = [col for col in columns_to_output if col in merged.columns]
    merged = merged[columns_to_output]

    # Rename merge labels
    merged["_merge"] = merged["_merge"].replace({
        "left_only": "exists_in_sf",
        "right_only": "exists_in_robo"
    })

    # Split subsets
    only_in_sf = merged[merged["_merge"] == "exists_in_sf"]
    only_in_robo = merged[merged["_merge"] == "exists_in_robo"]
    both = merged[merged["_merge"] == "both"]

    # Write to Excel
    with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
        both.to_excel(writer, sheet_name="Both", index=False)
        only_in_sf.to_excel(writer, sheet_name="Exists_in_SF", index=False)
        only_in_robo.to_excel(writer, sheet_name="Exists_in_Robo", index=False)

    print(f"Results saved to {output_file}")

    return {
        "sf": sf,
        "robo": robo,
        "merged": merged,
        "only_in_sf": only_in_sf,
        "only_in_robo": only_in_robo,
        "both": both
    }

In [8]:
# Example usage
sf_file = "New Products Report-2025-07-06-05-20-17.xlsx"
robo_file = "products2425.xlsx"
start_date = "2024-01-01"
end_date = "2025-12-31"
output_file = "Processed_Products.xlsx"

results = process_products(sf_file, robo_file, start_date, end_date, output_file)

# Access the results
sf_filtered = results["sf"]
robo_filtered = results["robo"]
merged = results["merged"]
only_in_robo = results["only_in_robo"]
only_in_sf = results["only_in_sf"]
both = results["both"]

print("Shape of merged:", merged.shape)


Shape of sf: (242, 5)
Shape of robo: (227, 6)
Results saved to Processed_Products.xlsx
Shape of merged: (242, 11)


  merged["_merge"] = merged["_merge"].replace({


In [5]:
print('Shape of only_in_robo:', only_in_robo.shape)
only_in_robo.to_excel('Only_in_Robo.xlsx', index=False)
only_in_robo

Shape of only_in_robo: (153, 11)


Unnamed: 0,ProductId_Robo,Roboticket ID,Product Name,ProductName,Product Date SF,Product Date Robo,ProductType,Product Type,StadiumId,Stadium ID,_merge
1,1029,,,גוש מעודד,NaT,2024-07-28,Voucher,,1.0,,exists_in_robo
2,1060,,,הנחת מנהל,NaT,2024-09-09,Voucher,,1.0,,exists_in_robo
4,1093,,,משה מיצרי,NaT,2024-08-18,Voucher,,1.0,,exists_in_robo
5,1126,,,עובד בנק יהב,NaT,2024-08-07,Voucher,,1.0,,exists_in_robo
6,1159,,,ברטרים,NaT,2024-08-08,Voucher,,1.0,,exists_in_robo
...,...,...,...,...,...,...,...,...,...,...,...
235,865,,,חבילה לסופיה 13-15 לינואר: צדויטה אולימפיה,NaT,2025-01-13,Merchndise,,,,exists_in_robo
236,895,,,המרת שינוי לתעריף מבוגר קטגוריה C,NaT,2024-07-21,Voucher,,1.0,,exists_in_robo
239,928,,,one family,NaT,2024-09-19,Voucher,,1.0,,exists_in_robo
240,994,,,שמביק,NaT,2024-08-01,Voucher,,1.0,,exists_in_robo


In [37]:
print('Shape of only_in_sf:', only_in_sf.shape)
only_in_sf

Shape of only_in_sf: (15, 11)


Unnamed: 0,ProductId_Robo,Roboticket ID,Product Name,ProductName,Product Date SF,Product Date Robo,ProductType,Product Type,StadiumId,Stadium ID,_merge
50,,200126,הפועל ירושלים נגד בני הרצליה,,2024-01-03 16:00:00,NaT,,Match,,1.0,exists_in_sf
51,,200127,הפועל ירושלים נגד הפועל אילת,,2024-04-14 17:00:00,NaT,,Match,,1.0,exists_in_sf
52,,200130,הפועל ירושלים נגד הפועל חולון,,2024-02-19 16:00:00,NaT,,Match,,1.0,exists_in_sf
53,,200131,הפועל ירושלים נגד הפועל חיפה,,2024-01-01 16:00:00,NaT,,Match,,1.0,exists_in_sf
54,,200132,הפועל ירושלים נגד הפועל עפולה,,2024-03-31 17:00:00,NaT,,Match,,1.0,exists_in_sf
55,,200133,הפועל ירושלים נגד הפועל תל אביב,,2024-03-10 17:00:00,NaT,,Match,,1.0,exists_in_sf
56,,200134,הפועל ירושלים נגד מכבי עירוני רמת גן,,2024-04-23 17:00:00,NaT,,Match,,1.0,exists_in_sf
57,,200135,הפועל ירושלים נגד מכבי תל אביב,,2024-02-05 16:00:00,NaT,,Match,,1.0,exists_in_sf
58,,200136,הפועל ירושלים נגד עירוני נס ציונה,,2024-02-04 16:00:00,NaT,,Match,,1.0,exists_in_sf
59,,200137,הפועל ירושלים נגד עירוני קריית אתא,,2024-02-25 16:00:00,NaT,,Match,,1.0,exists_in_sf
