In [1]:
# pip install holidays python-dateutil
import pandas as pd
import holidays
from datetime import timedelta
from dateutil.easter import easter as easter_western, EASTER_ORTHODOX

def build_serbia_holiday_flags_strict(start_date: str, end_date: str, add_windows=True, window_days=2) -> pd.DataFrame:
    """
    Uses the 'holidays' library for Serbia (SRB) to generate public holiday flags with observed rules.
    Adds Orthodox Easter related days via dateutil (demand-relevant).
    """
    print("ENTER function", start_date, end_date) 
    # 1) Date index
    idx = pd.date_range(start=start_date, end=end_date, freq="D", tz=None)
    df = pd.DataFrame({"date": idx})

    # 2) Official public holidays (with observed rules)
    srb_holidays = holidays.country_holidays("SRB", years=sorted(df["date"].dt.year.unique()))

    # Public holiday flag (official/observed)
    df["public_holiday"] = df["date"].dt.date.map(lambda d: 1 if d in srb_holidays else 0)

    # Optional: separate columns for selected named holidays
    names_map = {
        "New Year": "new_year",
        "Statehood Day of the Republic of Serbia": "statehood_day",
        "Labour Day": "labor_day",
        "Armistice Day": "armistice_day",
        "Orthodox Christmas Day": "orthodox_christmas",
    }
    for n_readable, col in names_map.items():
        df[col] = df["date"].dt.date.map(lambda d: 1 if str(srb_holidays.get(d, "")).startswith(n_readable) else 0)

    # 3) Orthodox Easter-related flags (not all are statutory, but useful for demand)
    def orthodox_easter(year: int):
        # dateutil returns datetime.date for Orthodox Easter
        return easter_western(year, method=EASTER_ORTHODOX)

    df["year"] = df["date"].dt.year
    df["orthodox_easter_sunday"] = 0
    df["orthodox_easter_monday"] = 0
    df["orthodox_good_friday"] = 0
    df["orthodox_holy_saturday"] = 0

    for y in df["year"].unique():
        e = orthodox_easter(int(y))  # datetime.date
        mask_y = df["year"] == y
        df.loc[mask_y & (df["date"].dt.date == e), "orthodox_easter_sunday"] = 1
        df.loc[mask_y & (df["date"].dt.date == (e + timedelta(days=1))), "orthodox_easter_monday"] = 1
        df.loc[mask_y & (df["date"].dt.date == (e - timedelta(days=2))), "orthodox_good_friday"] = 1
        df.loc[mask_y & (df["date"].dt.date == (e - timedelta(days=1))), "orthodox_holy_saturday"] = 1

    # 4) Optional +/- windows around key flags
    if add_windows:
        for col in [
            "public_holiday",
            "orthodox_easter_sunday",
            "orthodox_easter_monday",
            "orthodox_good_friday",
            "orthodox_holy_saturday",
        ]:
            wincol = f"{col}_window"
            flag = df[col].values.astype(int)
            win = pd.Series(0, index=df.index)
            idxs = df.index[flag == 1]
            for i in idxs:
                start_i = max(0, i - window_days)
                end_i = min(len(df) - 1, i + window_days)
                win.iloc[start_i : end_i + 1] = 1
            df[wincol] = win.values.astype(int)

    # 5) Calendar features
    df["dow"] = df["date"].dt.weekday
    df["weekofyear"] = df["date"].dt.isocalendar().week.astype(int)
    df["month"] = df["date"].dt.month
    df["quarter"] = df["date"].dt.quarter

    # 6) Seasonal proxies (optional)
    df["cold_flu_season"] = ((df["month"] >= 10) | (df["month"] <= 3)).astype(int)
    df["pollen_season"] = df["month"].isin([4, 5, 6, 8, 9]).astype(int)
    print("EXIT function OK")
    return df.drop(columns=["year"])


In [2]:
import pandas as pd

# 1) Load your sales to get the date rangeE:/pharma_forcast/data/salesweekly_old.csv
sales = pd.read_csv("E:/pharma_forcast/data/salesdaily.csv", parse_dates=["datum"])  # adjust filename/column as needed
start = sales["datum"].min().date().isoformat()
end = sales["datum"].max().date().isoformat()

# 2) Build daily holiday flags for that range
hol_daily = build_serbia_holiday_flags_strict(start, end, add_windows=True, window_days=1)

hol_daily.head()

ENTER function 2014-01-02 2019-10-08
EXIT function OK


Unnamed: 0,date,public_holiday,new_year,statehood_day,labor_day,armistice_day,orthodox_christmas,orthodox_easter_sunday,orthodox_easter_monday,orthodox_good_friday,...,orthodox_easter_sunday_window,orthodox_easter_monday_window,orthodox_good_friday_window,orthodox_holy_saturday_window,dow,weekofyear,month,quarter,cold_flu_season,pollen_season
0,2014-01-02,1,0,0,0,0,0,0,0,0,...,0,0,0,0,3,1,1,1,1,0
1,2014-01-03,0,0,0,0,0,0,0,0,0,...,0,0,0,0,4,1,1,1,1,0
2,2014-01-04,0,0,0,0,0,0,0,0,0,...,0,0,0,0,5,1,1,1,1,0
3,2014-01-05,0,0,0,0,0,0,0,0,0,...,0,0,0,0,6,1,1,1,1,0
4,2014-01-06,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,1,1,1,0


In [3]:
hol_daily.columns    

Index(['date', 'public_holiday', 'new_year', 'statehood_day', 'labor_day',
       'armistice_day', 'orthodox_christmas', 'orthodox_easter_sunday',
       'orthodox_easter_monday', 'orthodox_good_friday',
       'orthodox_holy_saturday', 'public_holiday_window',
       'orthodox_easter_sunday_window', 'orthodox_easter_monday_window',
       'orthodox_good_friday_window', 'orthodox_holy_saturday_window', 'dow',
       'weekofyear', 'month', 'quarter', 'cold_flu_season', 'pollen_season'],
      dtype='object')

In [4]:
import pandas as pd

# Load sales daily
sales = pd.read_csv("E:/pharma_forcast/data/salesdaily.csv")

# Convert datum to datetime (this works if it's like "2014-01-02" or "20140102")
sales["datum"] = pd.to_datetime(sales["datum"], errors="coerce")

# Ensure hol_daily["date"] is datetime and rename to 'datum' for merging
hol_daily["datum"] = pd.to_datetime(hol_daily["date"])
hol_daily = hol_daily.drop(columns=["date"])

# Merge
merged = sales.merge(hol_daily, on="datum", how="left")

merged.head()

Unnamed: 0,datum,M01AB,M01AE,N02BA,N02BE,N05B,N05C,R03,R06,Year,...,orthodox_easter_sunday_window,orthodox_easter_monday_window,orthodox_good_friday_window,orthodox_holy_saturday_window,dow,weekofyear,month,quarter,cold_flu_season,pollen_season
0,2014-01-02,0.0,3.67,3.4,32.4,7.0,0.0,0.0,2.0,2014,...,0,0,0,0,3,1,1,1,1,0
1,2014-01-03,8.0,4.0,4.4,50.6,16.0,0.0,20.0,4.0,2014,...,0,0,0,0,4,1,1,1,1,0
2,2014-01-04,2.0,1.0,6.5,61.85,10.0,0.0,9.0,1.0,2014,...,0,0,0,0,5,1,1,1,1,0
3,2014-01-05,4.0,3.0,7.0,41.1,8.0,0.0,3.0,0.0,2014,...,0,0,0,0,6,1,1,1,1,0
4,2014-01-06,5.0,1.0,4.5,21.7,16.0,2.0,6.0,2.0,2014,...,0,0,0,0,0,2,1,1,1,0


In [5]:
merged.to_csv("holidays.csv", index=False)