In [1]:
import pandas as pd

### Check season 15/16:

In [2]:
# --- Load files ---
sf_df = pd.read_excel('New Assets Report - Gil-2025-07-06-23-49-50.xlsx')
robo_df = pd.read_excel('tickets_season_15.xlsx')

# --- Parse dates ---
sf_df["Product Date"] = pd.to_datetime(sf_df["Product Date"], errors="coerce")
robo_df["Product Date"] = pd.to_datetime(robo_df["Product Date"], errors="coerce")

# --- Filter SF by date range ---
start_date = "2015-09-01"
end_date = "2016-06-30"
sf_df = sf_df[
    (sf_df["Product Date"] >= start_date) &
    (sf_df["Product Date"] <= end_date)
]

# Check that you have data
print("SF rows after filtering:", len(sf_df))
print("Robo rows:", len(robo_df))

SF rows after filtering: 73544
Robo rows: 73544


In [3]:
# --- Helpers ---
def strip_tz(series):
    series = pd.to_datetime(series, errors="coerce")
    if pd.api.types.is_datetime64tz_dtype(series):
        return series.dt.tz_localize(None)
    return series

def uniq_sorted_list(s):
    return sorted(pd.Series(s).dropna().unique().tolist())

# --- Normalize dates (remove tz if present) ---
sf_df["Product Date"]   = strip_tz(sf_df["Product Date"])
robo_df["Product Date"] = strip_tz(robo_df["Product Date"])

keys = ["Asset Name", "Product Date"]

# --- Row counts ---
robo_counts = (
    robo_df.groupby(keys).size().reset_index(name="Robo_Count")
)
sf_counts = (
    sf_df.groupby(keys).size().reset_index(name="SF_Count")
)

# --- Price counts + lists ---
robo_prices = (
    robo_df.groupby(keys)
           .agg(Robo_Price_Count=("Price", "nunique"),
                Robo_Prices=("Price", uniq_sorted_list))
           .reset_index()
)
sf_prices = (
    sf_df.groupby(keys)
         .agg(SF_Price_Count=("Price", "nunique"),
              SF_Prices=("Price", uniq_sorted_list))
         .reset_index()
)

# --- Merge everything ---
compare_counts = (
    robo_counts.merge(sf_counts, on=keys, how="outer")
               .merge(robo_prices, on=keys, how="outer")
               .merge(sf_prices, on=keys, how="outer")
)

# Fill NaNs (counts → 0; price lists → [])
for col in ["Robo_Count","SF_Count","Robo_Price_Count","SF_Price_Count"]:
    compare_counts[col] = compare_counts[col].fillna(0).astype(int)

for col in ["Robo_Prices","SF_Prices"]:
    compare_counts[col] = compare_counts[col].apply(lambda v: v if isinstance(v, list) else [])

# --- Price match and row-count diff ---
compare_counts["Price_Match"] = compare_counts.apply(
    lambda r: set(r["Robo_Prices"]) == set(r["SF_Prices"]), axis=1
)
compare_counts["Difference"] = compare_counts["Robo_Count"] - compare_counts["SF_Count"]

# --- Sort & select columns (side-by-side) ---
compare_counts = compare_counts.sort_values(by=["Product Date","Asset Name"])[[
    "Asset Name", "Product Date",
    "Robo_Count", "SF_Count",
    "Robo_Price_Count", "SF_Price_Count",
    "Robo_Prices", "SF_Prices",
    "Price_Match",
    "Difference"
]].reset_index(drop=True)

compare_counts


  if pd.api.types.is_datetime64tz_dtype(series):
  if pd.api.types.is_datetime64tz_dtype(series):


Unnamed: 0,Asset Name,Product Date,Robo_Count,SF_Count,Robo_Price_Count,SF_Price_Count,Robo_Prices,SF_Prices,Price_Match,Difference
0,הפועל ירושלים נגד הפועל חולון,2015-10-03,6,6,2,2,"[100, 150]","[100, 150]",True,0
1,הפועל ירושלים נגד הפועל חולון,2015-10-04,6,6,1,1,[70],[70],True,0
2,הפועל ירושלים נגד הפועל חולון,2015-10-06,4,4,3,3,"[0, 40, 70]","[0, 40, 70]",True,0
3,הפועל ירושלים נגד נפטונאס קלייפדה,2015-10-06,1,1,1,1,[0],[0],True,0
4,הפועל ירושלים נגד הפועל חולון,2015-10-07,827,827,1,1,[0],[0],True,0
...,...,...,...,...,...,...,...,...,...,...
376,הפועל ירושלים נגד הפועל אילת,2016-06-05,450,450,7,7,"[0, 120, 150, 200, 220, 300, 350]","[0, 120, 150, 200, 220, 300, 350]",True,0
377,הפועל ירושלים נגד הפועל אילת,2016-06-06,171,171,7,7,"[0, 120, 150, 200, 220, 300, 350]","[0, 120, 150, 200, 220, 300, 350]",True,0
378,הפועל ירושלים נגד מכבי ראשון לציון,2016-06-07,4484,4484,7,7,"[0, 120, 150, 200, 220, 300, 350]","[0, 120, 150, 200, 220, 300, 350]",True,0
379,הפועל ירושלים נגד מכבי ראשון לציון,2016-06-08,912,912,8,8,"[0, 120, 150, 200, 220, 300, 350, 500]","[0, 120, 150, 200, 220, 300, 350, 500]",True,0


### Check season 16/17:

In [4]:
# --- Load files ---
sf_df = pd.read_excel('New Assets Report - Gil-2025-07-07-00-00-01.xlsx')
robo_df = pd.read_excel('tickets_season_16.xlsx')

# --- Parse dates ---
sf_df["Product Date"] = pd.to_datetime(sf_df["Product Date"], errors="coerce")
robo_df["Product Date"] = pd.to_datetime(robo_df["Product Date"], errors="coerce")

# --- Filter SF by date range ---
start_date = "2016-09-01"
end_date = "2017-06-30"
sf_df = sf_df[
    (sf_df["Product Date"] >= start_date) &
    (sf_df["Product Date"] <= end_date)
]

# Check that you have data
print("SF rows after filtering:", len(sf_df))
print("Robo rows:", len(robo_df))

SF rows after filtering: 76551
Robo rows: 76551


In [6]:
# --- Helpers ---
def strip_tz(series):
    series = pd.to_datetime(series, errors="coerce")
    if pd.api.types.is_datetime64tz_dtype(series):
        return series.dt.tz_localize(None)
    return series

def uniq_sorted_list(s):
    return sorted(pd.Series(s).dropna().unique().tolist())

# --- Normalize dates (remove tz if present) ---
sf_df["Product Date"]   = strip_tz(sf_df["Product Date"])
robo_df["Product Date"] = strip_tz(robo_df["Product Date"])

keys = ["Asset Name", "Product Date"]

# --- Row counts ---
robo_counts = (
    robo_df.groupby(keys).size().reset_index(name="Robo_Count")
)
sf_counts = (
    sf_df.groupby(keys).size().reset_index(name="SF_Count")
)

# --- Price counts + lists ---
robo_prices = (
    robo_df.groupby(keys)
           .agg(Robo_Price_Count=("Price", "nunique"),
                Robo_Prices=("Price", uniq_sorted_list))
           .reset_index()
)
sf_prices = (
    sf_df.groupby(keys)
         .agg(SF_Price_Count=("Price", "nunique"),
              SF_Prices=("Price", uniq_sorted_list))
         .reset_index()
)

# --- Merge everything ---
compare_counts = (
    robo_counts.merge(sf_counts, on=keys, how="outer")
               .merge(robo_prices, on=keys, how="outer")
               .merge(sf_prices, on=keys, how="outer")
)

# Fill NaNs (counts → 0; price lists → [])
for col in ["Robo_Count","SF_Count","Robo_Price_Count","SF_Price_Count"]:
    compare_counts[col] = compare_counts[col].fillna(0).astype(int)

for col in ["Robo_Prices","SF_Prices"]:
    compare_counts[col] = compare_counts[col].apply(lambda v: v if isinstance(v, list) else [])

# --- Price match and row-count diff ---
compare_counts["Price_Match"] = compare_counts.apply(
    lambda r: set(r["Robo_Prices"]) == set(r["SF_Prices"]), axis=1
)
compare_counts["Difference"] = compare_counts["Robo_Count"] - compare_counts["SF_Count"]

# --- Sort & select columns (side-by-side) ---
compare_counts = compare_counts.sort_values(by=["Product Date","Asset Name"])[[
    "Asset Name", "Product Date",
    "Robo_Count", "SF_Count",
    "Robo_Price_Count", "SF_Price_Count",
    "Robo_Prices", "SF_Prices",
    "Price_Match",
    "Difference"
]].reset_index(drop=True)

compare_counts


  if pd.api.types.is_datetime64tz_dtype(series):
  if pd.api.types.is_datetime64tz_dtype(series):


Unnamed: 0,Asset Name,Product Date,Robo_Count,SF_Count,Robo_Price_Count,SF_Price_Count,Robo_Prices,SF_Prices,Price_Match,Difference
0,הפועל ירושלים נגד בני הרצליה,2016-09-29,692,692,1,1,[0],[0.0],True,0
1,הפועל ירושלים נגד בני הרצליה,2016-10-03,34,34,1,1,[0],[0.0],True,0
2,הפועל ירושלים נגד עירוני נהריה,2016-10-03,1,1,1,1,[77],[77.0],True,0
3,הפועל ירושלים נגד בני הרצליה,2016-10-05,21,21,1,1,[0],[0.0],True,0
4,הפועל ירושלים נגד בני הרצליה,2016-10-06,543,543,1,1,[0],[0.0],True,0
...,...,...,...,...,...,...,...,...,...,...
442,הפועל ירושלים נגד עירוני נהריה,2017-06-02,1293,1293,9,9,"[0, 40, 50, 70, 80, 110, 130, 160, 210]","[0.0, 40.0, 50.0, 70.0, 80.0, 110.0, 130.0, 16...",True,0
443,הפועל ירושלים נגד עירוני נהריה,2017-06-05,460,460,7,7,"[50, 80, 110, 130, 160, 210, 260]","[50.0, 80.0, 110.0, 130.0, 160.0, 210.0, 260.0]",True,0
444,הפועל ירושלים נגד עירוני נהריה,2017-06-06,977,977,8,8,"[0, 50, 80, 110, 130, 160, 210, 260]","[0.0, 50.0, 80.0, 110.0, 130.0, 160.0, 210.0, ...",True,0
445,הפועל ירושלים נגד עירוני נהריה,2017-06-07,998,998,10,10,"[0, 40, 50, 70, 80, 110, 130, 160, 210, 1000]","[0.0, 40.0, 50.0, 70.0, 80.0, 110.0, 130.0, 16...",True,0


### Check season 17/18:

In [7]:
# --- Load files ---
sf_df = pd.read_excel('New Assets Report - Gil-2025-07-07-00-02-19.xlsx')
robo_df = pd.read_excel('tickets_season_17.xlsx')

# --- Parse dates ---
sf_df["Product Date"] = pd.to_datetime(sf_df["Product Date"], errors="coerce")
robo_df["Product Date"] = pd.to_datetime(robo_df["Product Date"], errors="coerce")

# --- Filter SF by date range ---
start_date = "2017-09-01"
end_date = "2018-06-30"
sf_df = sf_df[
    (sf_df["Product Date"] >= start_date) &
    (sf_df["Product Date"] <= end_date)
]

# Check that you have data
print("SF rows after filtering:", len(sf_df))
print("Robo rows:", len(robo_df))

SF rows after filtering: 35946
Robo rows: 35946


In [8]:
# --- Helpers ---
def strip_tz(series):
    series = pd.to_datetime(series, errors="coerce")
    if pd.api.types.is_datetime64tz_dtype(series):
        return series.dt.tz_localize(None)
    return series

def uniq_sorted_list(s):
    return sorted(pd.Series(s).dropna().unique().tolist())

# --- Normalize dates (remove tz if present) ---
sf_df["Product Date"]   = strip_tz(sf_df["Product Date"])
robo_df["Product Date"] = strip_tz(robo_df["Product Date"])

keys = ["Asset Name", "Product Date"]

# --- Row counts ---
robo_counts = (
    robo_df.groupby(keys).size().reset_index(name="Robo_Count")
)
sf_counts = (
    sf_df.groupby(keys).size().reset_index(name="SF_Count")
)

# --- Price counts + lists ---
robo_prices = (
    robo_df.groupby(keys)
           .agg(Robo_Price_Count=("Price", "nunique"),
                Robo_Prices=("Price", uniq_sorted_list))
           .reset_index()
)
sf_prices = (
    sf_df.groupby(keys)
         .agg(SF_Price_Count=("Price", "nunique"),
              SF_Prices=("Price", uniq_sorted_list))
         .reset_index()
)

# --- Merge everything ---
compare_counts = (
    robo_counts.merge(sf_counts, on=keys, how="outer")
               .merge(robo_prices, on=keys, how="outer")
               .merge(sf_prices, on=keys, how="outer")
)

# Fill NaNs (counts → 0; price lists → [])
for col in ["Robo_Count","SF_Count","Robo_Price_Count","SF_Price_Count"]:
    compare_counts[col] = compare_counts[col].fillna(0).astype(int)

for col in ["Robo_Prices","SF_Prices"]:
    compare_counts[col] = compare_counts[col].apply(lambda v: v if isinstance(v, list) else [])

# --- Price match and row-count diff ---
compare_counts["Price_Match"] = compare_counts.apply(
    lambda r: set(r["Robo_Prices"]) == set(r["SF_Prices"]), axis=1
)
compare_counts["Difference"] = compare_counts["Robo_Count"] - compare_counts["SF_Count"]

# --- Sort & select columns (side-by-side) ---
compare_counts = compare_counts.sort_values(by=["Product Date","Asset Name"])[[
    "Asset Name", "Product Date",
    "Robo_Count", "SF_Count",
    "Robo_Price_Count", "SF_Price_Count",
    "Robo_Prices", "SF_Prices",
    "Price_Match",
    "Difference"
]].reset_index(drop=True)

compare_counts

  if pd.api.types.is_datetime64tz_dtype(series):
  if pd.api.types.is_datetime64tz_dtype(series):


Unnamed: 0,Asset Name,Product Date,Robo_Count,SF_Count,Robo_Price_Count,SF_Price_Count,Robo_Prices,SF_Prices,Price_Match,Difference
0,הפועל ירושלים נגד בודוצ'נוסט,2017-09-27,2,2,1,1,[100],[100.0],True,0
1,הפועל ירושלים נגד מכבי ראשון לציון,2017-09-27,2,2,1,1,[90],[90.0],True,0
2,הפועל ירושלים נגד בודוצ'נוסט,2017-10-02,2,2,1,1,[170],[170.0],True,0
3,הפועל ירושלים נגד בודוצ'נוסט,2017-10-03,2,2,2,2,"[70, 100]","[70.0, 100.0]",True,0
4,הפועל ירושלים נגד מכבי ראשון לציון,2017-10-03,45,45,2,2,"[60, 90]","[60.0, 90.0]",True,0
...,...,...,...,...,...,...,...,...,...,...
258,הפועל ירושלים נגד הפועל גלבוע גליל,2018-05-30,286,286,8,8,"[0, 40, 60, 70, 90, 130, 150, 180]","[0.0, 40.0, 60.0, 70.0, 90.0, 130.0, 150.0, 18...",True,0
259,הפועל ירושלים נגד הפועל גלבוע גליל,2018-05-31,10,10,4,4,"[0, 60, 90, 180]","[0.0, 60.0, 90.0, 180.0]",True,0
260,הפועל ירושלים נגד הפועל גלבוע גליל,2018-06-01,31,31,5,5,"[40, 60, 70, 90, 150]","[40.0, 60.0, 70.0, 90.0, 150.0]",True,0
261,הפועל ירושלים נגד הפועל גלבוע גליל,2018-06-02,334,334,8,8,"[0, 40, 60, 70, 90, 150, 180, 250]","[0.0, 40.0, 60.0, 70.0, 90.0, 150.0, 180.0, 25...",True,0


### Check season 18/19:

In [9]:
# --- Load files ---
sf_df = pd.read_excel('New Assets Report - Gil-2025-07-07-00-03-56.xlsx')
robo_df = pd.read_excel('tickets_season_18.xlsx')

# --- Parse dates ---
sf_df["Product Date"] = pd.to_datetime(sf_df["Product Date"], errors="coerce")
robo_df["Product Date"] = pd.to_datetime(robo_df["Product Date"], errors="coerce")

# --- Filter SF by date range ---
start_date = "2018-09-01"
end_date = "2019-06-30"
sf_df = sf_df[
    (sf_df["Product Date"] >= start_date) &
    (sf_df["Product Date"] <= end_date)
]

# Check that you have data
print("SF rows after filtering:", len(sf_df))
print("Robo rows:", len(robo_df))

SF rows after filtering: 49246
Robo rows: 49246


In [10]:
# --- Helpers ---
def strip_tz(series):
    series = pd.to_datetime(series, errors="coerce")
    if pd.api.types.is_datetime64tz_dtype(series):
        return series.dt.tz_localize(None)
    return series

def uniq_sorted_list(s):
    return sorted(pd.Series(s).dropna().unique().tolist())

# --- Normalize dates (remove tz if present) ---
sf_df["Product Date"]   = strip_tz(sf_df["Product Date"])
robo_df["Product Date"] = strip_tz(robo_df["Product Date"])

keys = ["Asset Name", "Product Date"]

# --- Row counts ---
robo_counts = (
    robo_df.groupby(keys).size().reset_index(name="Robo_Count")
)
sf_counts = (
    sf_df.groupby(keys).size().reset_index(name="SF_Count")
)

# --- Price counts + lists ---
robo_prices = (
    robo_df.groupby(keys)
           .agg(Robo_Price_Count=("Price", "nunique"),
                Robo_Prices=("Price", uniq_sorted_list))
           .reset_index()
)
sf_prices = (
    sf_df.groupby(keys)
         .agg(SF_Price_Count=("Price", "nunique"),
              SF_Prices=("Price", uniq_sorted_list))
         .reset_index()
)

# --- Merge everything ---
compare_counts = (
    robo_counts.merge(sf_counts, on=keys, how="outer")
               .merge(robo_prices, on=keys, how="outer")
               .merge(sf_prices, on=keys, how="outer")
)

# Fill NaNs (counts → 0; price lists → [])
for col in ["Robo_Count","SF_Count","Robo_Price_Count","SF_Price_Count"]:
    compare_counts[col] = compare_counts[col].fillna(0).astype(int)

for col in ["Robo_Prices","SF_Prices"]:
    compare_counts[col] = compare_counts[col].apply(lambda v: v if isinstance(v, list) else [])

# --- Price match and row-count diff ---
compare_counts["Price_Match"] = compare_counts.apply(
    lambda r: set(r["Robo_Prices"]) == set(r["SF_Prices"]), axis=1
)
compare_counts["Difference"] = compare_counts["Robo_Count"] - compare_counts["SF_Count"]

# --- Sort & select columns (side-by-side) ---
compare_counts = compare_counts.sort_values(by=["Product Date","Asset Name"])[[
    "Asset Name", "Product Date",
    "Robo_Count", "SF_Count",
    "Robo_Price_Count", "SF_Price_Count",
    "Robo_Prices", "SF_Prices",
    "Price_Match",
    "Difference"
]].reset_index(drop=True)

compare_counts


  if pd.api.types.is_datetime64tz_dtype(series):
  if pd.api.types.is_datetime64tz_dtype(series):


Unnamed: 0,Asset Name,Product Date,Robo_Count,SF_Count,Robo_Price_Count,SF_Price_Count,Robo_Prices,SF_Prices,Price_Match,Difference
0,הפועל ירושלים נגד מכבי ראשון לציון,2018-10-06,2,2,1,1,[180],[180.0],True,0
1,הפועל ירושלים נגד לייטקבליס,2018-10-07,2,2,1,1,[80],[80.0],True,0
2,הפועל ירושלים נגד מכבי ראשון לציון,2018-10-07,11,11,5,5,"[0, 45, 110, 180, 400]","[0.0, 45.0, 110.0, 180.0, 400.0]",True,0
3,הפועל ירושלים נגד לייטקבליס,2018-10-08,5,5,1,1,[180],[180.0],True,0
4,הפועל ירושלים נגד מכבי ראשון לציון,2018-10-08,261,261,5,5,"[0, 45, 80, 110, 180]","[0.0, 45.0, 80.0, 110.0, 180.0]",True,0
...,...,...,...,...,...,...,...,...,...,...
368,הפועל ירושלים נגד הפועל באר שבע,2019-05-25,8,8,3,3,"[50, 90, 130]","[50.0, 90.0, 130.0]",True,0
369,הפועל ירושלים נגד הפועל באר שבע,2019-05-26,50,50,10,10,"[0, 25, 40, 50, 60, 70, 80, 90, 100, 250]","[0.0, 25.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0...",True,0
370,הפועל ירושלים נגד הפועל באר שבע,2019-05-27,127,127,11,11,"[0, 25, 40, 50, 60, 70, 80, 90, 130, 180, 400]","[0.0, 25.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0...",True,0
371,הפועל ירושלים נגד הפועל באר שבע,2019-05-28,101,101,10,10,"[0, 25, 40, 50, 70, 80, 90, 110, 130, 180]","[0.0, 25.0, 40.0, 50.0, 70.0, 80.0, 90.0, 110....",True,0


### Check season 19/20:

In [11]:
# --- Load files ---
sf_df = pd.read_excel('New Assets Report - Gil-2025-07-07-00-05-38.xlsx')
robo_df = pd.read_excel('tickets_season_19.xlsx')

# --- Parse dates ---
sf_df["Product Date"] = pd.to_datetime(sf_df["Product Date"], errors="coerce")
robo_df["Product Date"] = pd.to_datetime(robo_df["Product Date"], errors="coerce")

# --- Filter SF by date range ---
start_date = "2019-09-01"
end_date = "2020-06-30"
sf_df = sf_df[
    (sf_df["Product Date"] >= start_date) &
    (sf_df["Product Date"] <= end_date)
]

# Check that you have data
print("SF rows after filtering:", len(sf_df))
print("Robo rows:", len(robo_df))

SF rows after filtering: 40441
Robo rows: 40441


In [12]:
# --- Helpers ---
def strip_tz(series):
    series = pd.to_datetime(series, errors="coerce")
    if pd.api.types.is_datetime64tz_dtype(series):
        return series.dt.tz_localize(None)
    return series

def uniq_sorted_list(s):
    return sorted(pd.Series(s).dropna().unique().tolist())

# --- Normalize dates (remove tz if present) ---
sf_df["Product Date"]   = strip_tz(sf_df["Product Date"])
robo_df["Product Date"] = strip_tz(robo_df["Product Date"])

keys = ["Asset Name", "Product Date"]

# --- Row counts ---
robo_counts = (
    robo_df.groupby(keys).size().reset_index(name="Robo_Count")
)
sf_counts = (
    sf_df.groupby(keys).size().reset_index(name="SF_Count")
)

# --- Price counts + lists ---
robo_prices = (
    robo_df.groupby(keys)
           .agg(Robo_Price_Count=("Price", "nunique"),
                Robo_Prices=("Price", uniq_sorted_list))
           .reset_index()
)
sf_prices = (
    sf_df.groupby(keys)
         .agg(SF_Price_Count=("Price", "nunique"),
              SF_Prices=("Price", uniq_sorted_list))
         .reset_index()
)

# --- Merge everything ---
compare_counts = (
    robo_counts.merge(sf_counts, on=keys, how="outer")
               .merge(robo_prices, on=keys, how="outer")
               .merge(sf_prices, on=keys, how="outer")
)

# Fill NaNs (counts → 0; price lists → [])
for col in ["Robo_Count","SF_Count","Robo_Price_Count","SF_Price_Count"]:
    compare_counts[col] = compare_counts[col].fillna(0).astype(int)

for col in ["Robo_Prices","SF_Prices"]:
    compare_counts[col] = compare_counts[col].apply(lambda v: v if isinstance(v, list) else [])

# --- Price match and row-count diff ---
compare_counts["Price_Match"] = compare_counts.apply(
    lambda r: set(r["Robo_Prices"]) == set(r["SF_Prices"]), axis=1
)
compare_counts["Difference"] = compare_counts["Robo_Count"] - compare_counts["SF_Count"]

# --- Sort & select columns (side-by-side) ---
compare_counts = compare_counts.sort_values(by=["Product Date","Asset Name"])[[
    "Asset Name", "Product Date",
    "Robo_Count", "SF_Count",
    "Robo_Price_Count", "SF_Price_Count",
    "Robo_Prices", "SF_Prices",
    "Price_Match",
    "Difference"
]].reset_index(drop=True)

compare_counts


  if pd.api.types.is_datetime64tz_dtype(series):
  if pd.api.types.is_datetime64tz_dtype(series):


Unnamed: 0,Asset Name,Product Date,Robo_Count,SF_Count,Robo_Price_Count,SF_Price_Count,Robo_Prices,SF_Prices,Price_Match,Difference
0,הפועל ירושלים נגד עירוני נהריה,2019-10-03,19,19,2,2,"[0, 180]","[0.0, 180.0]",True,0
1,הפועל ירושלים נגד אאק אתונה,2019-10-04,1,1,1,1,[130],[130.0],True,0
2,הפועל ירושלים נגד עירוני נהריה,2019-10-04,5,5,2,2,"[40, 70]","[40.0, 70.0]",True,0
3,הפועל ירושלים נגד אאק אתונה,2019-10-05,3,3,1,1,[130],[130.0],True,0
4,הפועל ירושלים נגד עירוני נהריה,2019-10-05,9,9,2,2,"[40, 70]","[40.0, 70.0]",True,0
...,...,...,...,...,...,...,...,...,...,...
231,הפועל ירושלים נגד פרישטרי,2020-03-04,2167,2167,11,11,"[0, 20, 40, 60, 70, 90, 100, 110, 130, 190, 250]","[0.0, 20.0, 40.0, 60.0, 70.0, 90.0, 100.0, 110...",True,0
232,הפועל ירושלים נגד מכבי תל אביב,2020-03-05,206,206,15,15,"[0, 45, 75, 100, 125, 150, 240, 270, 300, 375,...","[0.0, 45.0, 75.0, 100.0, 125.0, 150.0, 240.0, ...",True,0
233,הפועל ירושלים נגד מכבי תל אביב,2020-03-06,197,197,6,6,"[0, 60, 125, 150, 270, 300]","[0.0, 60.0, 125.0, 150.0, 270.0, 300.0]",True,0
234,הפועל ירושלים נגד מכבי תל אביב,2020-03-07,32,32,3,3,"[0, 125, 300]","[0.0, 125.0, 300.0]",True,0


### Check season 21/22:

In [13]:
# --- Load files ---
sf_df = pd.read_excel('New Assets Report - Gil-2025-07-07-00-07-31.xlsx')
robo_df = pd.read_excel('tickets_season_21.xlsx')

# --- Parse dates ---
sf_df["Product Date"] = pd.to_datetime(sf_df["Product Date"], errors="coerce")
robo_df["Product Date"] = pd.to_datetime(robo_df["Product Date"], errors="coerce")

# --- Filter SF by date range ---
start_date = "2021-09-01"
end_date = "2022-06-30"
sf_df = sf_df[
    (sf_df["Product Date"] >= start_date) &
    (sf_df["Product Date"] <= end_date)
]

# Check that you have data
print("SF rows after filtering:", len(sf_df))
print("Robo rows:", len(robo_df))

SF rows after filtering: 60610
Robo rows: 60610


In [14]:
# --- Helpers ---
def strip_tz(series):
    series = pd.to_datetime(series, errors="coerce")
    if pd.api.types.is_datetime64tz_dtype(series):
        return series.dt.tz_localize(None)
    return series

def uniq_sorted_list(s):
    return sorted(pd.Series(s).dropna().unique().tolist())

# --- Normalize dates (remove tz if present) ---
sf_df["Product Date"]   = strip_tz(sf_df["Product Date"])
robo_df["Product Date"] = strip_tz(robo_df["Product Date"])

keys = ["Asset Name", "Product Date"]

# --- Row counts ---
robo_counts = (
    robo_df.groupby(keys).size().reset_index(name="Robo_Count")
)
sf_counts = (
    sf_df.groupby(keys).size().reset_index(name="SF_Count")
)

# --- Price counts + lists ---
robo_prices = (
    robo_df.groupby(keys)
           .agg(Robo_Price_Count=("Price", "nunique"),
                Robo_Prices=("Price", uniq_sorted_list))
           .reset_index()
)
sf_prices = (
    sf_df.groupby(keys)
         .agg(SF_Price_Count=("Price", "nunique"),
              SF_Prices=("Price", uniq_sorted_list))
         .reset_index()
)

# --- Merge everything ---
compare_counts = (
    robo_counts.merge(sf_counts, on=keys, how="outer")
               .merge(robo_prices, on=keys, how="outer")
               .merge(sf_prices, on=keys, how="outer")
)

# Fill NaNs (counts → 0; price lists → [])
for col in ["Robo_Count","SF_Count","Robo_Price_Count","SF_Price_Count"]:
    compare_counts[col] = compare_counts[col].fillna(0).astype(int)

for col in ["Robo_Prices","SF_Prices"]:
    compare_counts[col] = compare_counts[col].apply(lambda v: v if isinstance(v, list) else [])

# --- Price match and row-count diff ---
compare_counts["Price_Match"] = compare_counts.apply(
    lambda r: set(r["Robo_Prices"]) == set(r["SF_Prices"]), axis=1
)
compare_counts["Difference"] = compare_counts["Robo_Count"] - compare_counts["SF_Count"]

# --- Sort & select columns (side-by-side) ---
compare_counts = compare_counts.sort_values(by=["Product Date","Asset Name"])[[
    "Asset Name", "Product Date",
    "Robo_Count", "SF_Count",
    "Robo_Price_Count", "SF_Price_Count",
    "Robo_Prices", "SF_Prices",
    "Price_Match",
    "Difference"
]].reset_index(drop=True)

compare_counts


  if pd.api.types.is_datetime64tz_dtype(series):
  if pd.api.types.is_datetime64tz_dtype(series):


Unnamed: 0,Asset Name,Product Date,Robo_Count,SF_Count,Robo_Price_Count,SF_Price_Count,Robo_Prices,SF_Prices,Price_Match,Difference
0,הפועל ירושלים נגד פינאר קרשיאקה,2021-09-30,4,4,2,2,"[110, 150]","[110.0, 150.0]",True,0
1,הפועל ירושלים נגד פינאר קרשיאקה,2021-10-01,3,3,2,2,"[80, 110]","[80.0, 110.0]",True,0
2,הפועל ירושלים נגד פינאר קרשיאקה,2021-10-02,10,10,3,3,"[50, 80, 130]","[50.0, 80.0, 130.0]",True,0
3,הפועל ירושלים נגד פינאר קרשיאקה,2021-10-03,962,962,6,6,"[0, 50, 80, 110, 130, 150]","[0.0, 50.0, 80.0, 110.0, 130.0, 150.0]",True,0
4,הפועל ירושלים נגד פינאר קרשיאקה,2021-10-04,419,419,6,6,"[0, 50, 80, 110, 130, 150]","[0.0, 50.0, 80.0, 110.0, 130.0, 150.0]",True,0
...,...,...,...,...,...,...,...,...,...,...
174,הפועל ירושלים נגד הפועל חולון,2022-05-26,1873,1873,10,10,"[0, 45, 50, 65, 75, 100, 150, 200, 250, 300]","[0.0, 45.0, 50.0, 65.0, 75.0, 100.0, 150.0, 20...",True,0
175,הפועל ירושלים נגד הפועל חולון,2022-05-27,756,756,8,8,"[0, 45, 50, 75, 100, 150, 200, 300]","[0.0, 45.0, 50.0, 75.0, 100.0, 150.0, 200.0, 3...",True,0
176,הפועל ירושלים נגד הפועל חולון,2022-05-31,2766,2766,10,10,"[0, 25, 40, 45, 50, 75, 100, 150, 200, 300]","[0.0, 25.0, 40.0, 45.0, 50.0, 75.0, 100.0, 150...",True,0
177,הפועל ירושלים נגד הפועל חולון,2022-06-01,3013,3013,13,13,"[0, 25, 40, 45, 50, 75, 100, 120, 150, 200, 30...","[0.0, 25.0, 40.0, 45.0, 50.0, 75.0, 100.0, 120...",True,0


### Check season 22/23:

In [15]:
# --- Load files ---
sf_df = pd.read_excel('New Assets Report - Gil-2025-07-07-00-09-39.xlsx')
robo_df = pd.read_excel('tickets_season_22.xlsx')

# --- Parse dates ---
sf_df["Product Date"] = pd.to_datetime(sf_df["Product Date"], errors="coerce")
robo_df["Product Date"] = pd.to_datetime(robo_df["Product Date"], errors="coerce")

# --- Filter SF by date range ---
start_date = "2022-09-01"
end_date = "2023-06-30"
sf_df = sf_df[
    (sf_df["Product Date"] >= start_date) &
    (sf_df["Product Date"] <= end_date)
]

# Check that you have data
print("SF rows after filtering:", len(sf_df))
print("Robo rows:", len(robo_df))

SF rows after filtering: 77232
Robo rows: 77232


In [16]:
# --- Helpers ---
def strip_tz(series):
    series = pd.to_datetime(series, errors="coerce")
    if pd.api.types.is_datetime64tz_dtype(series):
        return series.dt.tz_localize(None)
    return series

def uniq_sorted_list(s):
    return sorted(pd.Series(s).dropna().unique().tolist())

# --- Normalize dates (remove tz if present) ---
sf_df["Product Date"]   = strip_tz(sf_df["Product Date"])
robo_df["Product Date"] = strip_tz(robo_df["Product Date"])

keys = ["Asset Name", "Product Date"]

# --- Row counts ---
robo_counts = (
    robo_df.groupby(keys).size().reset_index(name="Robo_Count")
)
sf_counts = (
    sf_df.groupby(keys).size().reset_index(name="SF_Count")
)

# --- Price counts + lists ---
robo_prices = (
    robo_df.groupby(keys)
           .agg(Robo_Price_Count=("Price", "nunique"),
                Robo_Prices=("Price", uniq_sorted_list))
           .reset_index()
)
sf_prices = (
    sf_df.groupby(keys)
         .agg(SF_Price_Count=("Price", "nunique"),
              SF_Prices=("Price", uniq_sorted_list))
         .reset_index()
)

# --- Merge everything ---
compare_counts = (
    robo_counts.merge(sf_counts, on=keys, how="outer")
               .merge(robo_prices, on=keys, how="outer")
               .merge(sf_prices, on=keys, how="outer")
)

# Fill NaNs (counts → 0; price lists → [])
for col in ["Robo_Count","SF_Count","Robo_Price_Count","SF_Price_Count"]:
    compare_counts[col] = compare_counts[col].fillna(0).astype(int)

for col in ["Robo_Prices","SF_Prices"]:
    compare_counts[col] = compare_counts[col].apply(lambda v: v if isinstance(v, list) else [])

# --- Price match and row-count diff ---
compare_counts["Price_Match"] = compare_counts.apply(
    lambda r: set(r["Robo_Prices"]) == set(r["SF_Prices"]), axis=1
)
compare_counts["Difference"] = compare_counts["Robo_Count"] - compare_counts["SF_Count"]

# --- Sort & select columns (side-by-side) ---
compare_counts = compare_counts.sort_values(by=["Product Date","Asset Name"])[[
    "Asset Name", "Product Date",
    "Robo_Count", "SF_Count",
    "Robo_Price_Count", "SF_Price_Count",
    "Robo_Prices", "SF_Prices",
    "Price_Match",
    "Difference"
]].reset_index(drop=True)

compare_counts


  if pd.api.types.is_datetime64tz_dtype(series):
  if pd.api.types.is_datetime64tz_dtype(series):


Unnamed: 0,Asset Name,Product Date,Robo_Count,SF_Count,Robo_Price_Count,SF_Price_Count,Robo_Prices,SF_Prices,Price_Match,Difference
0,הפועל ירושלים נגד דרושפאקה,2022-09-28,2,2,1,1,[90],[90.0],True,0
1,הפועל ירושלים נגד דרושפאקה,2022-09-29,35,35,5,5,"[0, 45, 60, 90, 120]","[0.0, 45.0, 60.0, 90.0, 120.0]",True,0
2,הפועל ירושלים נגד דרושפאקה,2022-09-30,77,77,6,6,"[0, 45, 60, 80, 90, 120]","[0.0, 45.0, 60.0, 80.0, 90.0, 120.0]",True,0
3,הפועל ירושלים נגד דרושפאקה,2022-10-01,30,30,5,5,"[0, 45, 60, 90, 120]","[0.0, 45.0, 60.0, 90.0, 120.0]",True,0
4,הפועל ירושלים נגד דרושפאקה,2022-10-02,690,690,7,7,"[0, 45, 60, 80, 90, 120, 280]","[0.0, 45.0, 60.0, 80.0, 90.0, 120.0, 280.0]",True,0
...,...,...,...,...,...,...,...,...,...,...
165,הפועל ירושלים נגד בני הרצליה,2023-05-21,41,41,6,6,"[50, 60, 75, 80, 90, 100]","[50.0, 60.0, 75.0, 80.0, 90.0, 100.0]",True,0
166,הפועל ירושלים נגד בני הרצליה,2023-05-22,938,938,8,8,"[0, 50, 60, 75, 80, 90, 100, 140]","[0.0, 50.0, 60.0, 75.0, 80.0, 90.0, 100.0, 140.0]",True,0
167,הפועל ירושלים נגד בני הרצליה,2023-05-23,603,603,10,10,"[0, 40, 50, 60, 75, 80, 90, 100, 230, 500]","[0.0, 40.0, 50.0, 60.0, 75.0, 80.0, 90.0, 100....",True,0
168,הפועל ירושלים נגד בני הרצליה,2023-05-24,2334,2334,12,12,"[0, 30, 35, 40, 45, 50, 60, 75, 80, 90, 100, 280]","[0.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 75.0...",True,0


### Check season 23/24:

In [17]:
# --- Load files ---
sf_df = pd.read_excel('New Assets Report - Gil-2025-07-07-00-11-48.xlsx')
robo_df = pd.read_excel('tickets_season_23.xlsx')

# --- Parse dates ---
sf_df["Product Date"] = pd.to_datetime(sf_df["Product Date"], errors="coerce")
robo_df["Product Date"] = pd.to_datetime(robo_df["Product Date"], errors="coerce")

# --- Filter SF by date range ---
start_date = "2023-09-01"
end_date = "2024-06-30"
sf_df = sf_df[
    (sf_df["Product Date"] >= start_date) &
    (sf_df["Product Date"] <= end_date)
]

# Check that you have data
print("SF rows after filtering:", len(sf_df))
print("Robo rows:", len(robo_df))

SF rows after filtering: 44492
Robo rows: 44526


In [18]:
print("Supposed to be 34 missing: ", len(robo_df)-len(sf_df))

Supposed to be 34 missing:  34


In [19]:
# --- Helpers ---
def strip_tz(series):
    series = pd.to_datetime(series, errors="coerce")
    if pd.api.types.is_datetime64tz_dtype(series):
        return series.dt.tz_localize(None)
    return series

def uniq_sorted_list(s):
    return sorted(pd.Series(s).dropna().unique().tolist())

# --- Normalize dates (remove tz if present) ---
sf_df["Product Date"]   = strip_tz(sf_df["Product Date"])
robo_df["Product Date"] = strip_tz(robo_df["Product Date"])

keys = ["Asset Name", "Product Date"]

# --- Row counts ---
robo_counts = (
    robo_df.groupby(keys).size().reset_index(name="Robo_Count")
)
sf_counts = (
    sf_df.groupby(keys).size().reset_index(name="SF_Count")
)

# --- Price counts + lists ---
robo_prices = (
    robo_df.groupby(keys)
           .agg(Robo_Price_Count=("Price", "nunique"),
                Robo_Prices=("Price", uniq_sorted_list))
           .reset_index()
)
sf_prices = (
    sf_df.groupby(keys)
         .agg(SF_Price_Count=("Price", "nunique"),
              SF_Prices=("Price", uniq_sorted_list))
         .reset_index()
)

# --- Merge everything ---
compare_counts = (
    robo_counts.merge(sf_counts, on=keys, how="outer")
               .merge(robo_prices, on=keys, how="outer")
               .merge(sf_prices, on=keys, how="outer")
)

# Fill NaNs (counts → 0; price lists → [])
for col in ["Robo_Count","SF_Count","Robo_Price_Count","SF_Price_Count"]:
    compare_counts[col] = compare_counts[col].fillna(0).astype(int)

for col in ["Robo_Prices","SF_Prices"]:
    compare_counts[col] = compare_counts[col].apply(lambda v: v if isinstance(v, list) else [])

# --- Price match and row-count diff ---
compare_counts["Price_Match"] = compare_counts.apply(
    lambda r: set(r["Robo_Prices"]) == set(r["SF_Prices"]), axis=1
)
compare_counts["Difference"] = compare_counts["Robo_Count"] - compare_counts["SF_Count"]

# --- Sort & select columns (side-by-side) ---
compare_counts = compare_counts.sort_values(by=["Product Date","Asset Name"])[[
    "Asset Name", "Product Date",
    "Robo_Count", "SF_Count",
    "Robo_Price_Count", "SF_Price_Count",
    "Robo_Prices", "SF_Prices",
    "Price_Match",
    "Difference"
]].reset_index(drop=True)

compare_counts


  if pd.api.types.is_datetime64tz_dtype(series):
  if pd.api.types.is_datetime64tz_dtype(series):


Unnamed: 0,Asset Name,Product Date,Robo_Count,SF_Count,Robo_Price_Count,SF_Price_Count,Robo_Prices,SF_Prices,Price_Match,Difference
0,הפועל ירושלים נגד הפועל גליל עליון,2023-12-12,439,439,1,1,[0],[0.0],True,0
1,הפועל ירושלים נגד הפועל גליל עליון,2023-12-13,78,78,1,1,[0],[0.0],True,0
2,הפועל ירושלים נגד הפועל גליל עליון,2023-12-14,235,235,1,1,[0],[0.0],True,0
3,הפועל ירושלים נגד הפועל גליל עליון,2023-12-15,38,38,3,3,"[0, 45, 75]","[0.0, 45.0, 75.0]",True,0
4,הפועל ירושלים נגד הפועל גליל עליון,2023-12-16,309,309,3,3,"[0, 45, 50]","[0.0, 45.0, 50.0]",True,0
...,...,...,...,...,...,...,...,...,...,...
112,הפועל ירושלים נגד הפועל חולון,2024-06-02,523,523,9,9,"[0, 50, 75, 85, 90, 100, 130, 600, 700]","[0.0, 50.0, 75.0, 85.0, 90.0, 100.0, 130.0, 60...",True,0
113,חצי גמר משחק 2: הפועל ירושלים נגד מכבי תל אביב,2024-06-04,4171,4171,17,17,"[0, 25, 50, 60, 70, 85, 90, 100, 110, 140, 170...","[0.0, 25.0, 50.0, 60.0, 70.0, 85.0, 90.0, 100....",True,0
114,חצי גמר משחק 2: הפועל ירושלים נגד מכבי תל אביב,2024-06-05,1056,1056,18,18,"[0, 35, 45, 50, 55, 60, 70, 85, 90, 100, 110, ...","[0.0, 35.0, 45.0, 50.0, 55.0, 60.0, 70.0, 85.0...",True,0
115,חצי גמר משחק 2: הפועל ירושלים נגד מכבי תל אביב,2024-06-06,1685,1685,19,19,"[0, 30, 50, 55, 60, 70, 85, 90, 100, 110, 140,...","[0.0, 30.0, 50.0, 55.0, 60.0, 70.0, 85.0, 90.0...",True,0
