In [1]:

import pandas as pd, matplotlib.pyplot as plt, matplotlib.patches as patches, re, calendar
from collections import defaultdict
from matplotlib.dates import DayLocator, DateFormatter
from pathlib import Path

# =========================
# Settings
# =========================
EXCEL_PATH = "SM_courses_information.xlsx" 
OUT_DIR = Path("gantt_out")
MONTHS_TO_PLOT = [(2025, 9), (2025, 10), (2025, 11), (2025, 12),(2026,1)]

PALETTE = [
    "#CDB4DB",  
    "#B5C7F3",   
    "#A2CFFE",   
    "#89C2D9",   
    "#61A5C2",  
    "#48BFE3",   
    "#56CFE1",  
    "#80ED99",  
    "#B5E48C",  
]



COLORS = {
    "online": "#646464",      # 
    "inperson": "#003caa",    # 
    "assignment": "#7B0450",  # 
    "exam": "#EF5391",        # 
}

DATE_RE = re.compile(r"(\d{2}\.\d{2}\.\d{4}|\d{4}-\d{2}-\d{2})")

# =========================
# Helpers
# =========================
def _extract_dates(cell):
    if pd.isna(cell): return []
    s = str(cell).strip()
    if s in {"", "-", "—", "–", "None", "nan", "NaT"}: return []
    tokens = DATE_RE.findall(s) or [t.strip() for t in s.split(",")]
    out = set()
    for t in tokens:
        try: out.add(pd.to_datetime(t, dayfirst=("." in t)).normalize())
        except Exception: pass
    return sorted(out)

def _read_excel_records(path: str):
    xls = pd.ExcelFile(path)
    sheet = xls.sheet_names[0]
    df = pd.read_excel(xls, sheet_name=sheet)
    df.columns = [str(c).strip() for c in df.columns]

    if "Course_Name" in df.columns:
        df["Course_Name"] = df["Course_Name"].ffill()

    for col in ["Online_Classes_Dates", "InPerson_Classes_Dates", "Assignments_Dates", "EXAM"]:
        if col not in df.columns:
            df[col] = None

    recmap = defaultdict(lambda: {"Course": "", "OnlineDates": set(), "InPersonDates": set(), "Assignments": set(), "Exams": set()})
    for r in df.itertuples(index=False):
        cname = str(getattr(r, "Course_Name", "")).strip()
        if not cname: continue
        rec = recmap[cname]; rec["Course"] = cname
        rec["OnlineDates"].update(_extract_dates(getattr(r, "Online_Classes_Dates", None)))
        rec["InPersonDates"].update(_extract_dates(getattr(r, "InPerson_Classes_Dates", None)))
        rec["Assignments"].update(_extract_dates(getattr(r, "Assignments_Dates", None)))
        rec["Exams"].update(_extract_dates(getattr(r, "EXAM", None)))

    records = []
    for course, d in recmap.items():
        if d["OnlineDates"] or d["InPersonDates"] or d["Assignments"] or d["Exams"]:
            records.append({
                "Course": course,
                "OnlineDates": sorted(d["OnlineDates"]),
                "InPersonDates": sorted(d["InPersonDates"]),
                "Assignments": sorted(d["Assignments"]),
                "Exams": sorted(d["Exams"]),
            })
    return records

def _month_bounds(year, month):
    first = pd.Timestamp(f"{year}-{month:02d}-01")
    last = pd.Timestamp(f"{year}-{month:02d}-{calendar.monthrange(year, month)[1]}")
    return first, last

def _date_formatter():
    try: return DateFormatter("%-d.%-m.")
    except: return DateFormatter("%d.%m.")

# =========================
# Graph
# =========================
def plot_month(records, year, month, out_file):
    start, end = _month_bounds(year, month)
    fig, ax = plt.subplots(figsize=(20, 9))
    bar_h = 0.7
    yticks, labels = [], []
    color_idx = 0

    

    for rec in sorted(records, key=lambda x: x["Course"]):
        markers = [d for d in rec["OnlineDates"] + rec["InPersonDates"] + rec["Assignments"] + rec["Exams"] if start <= d <= end]
        if not markers: continue
        y = len(yticks); yticks.append(y); labels.append(rec["Course"])
        color = PALETTE[color_idx % len(PALETTE)]; color_idx += 1
        block_start, block_end = min(markers), max(markers)
        ax.add_patch(patches.Rectangle((block_start, y - bar_h/2), (block_end - block_start), bar_h,
                                       linewidth=0, facecolor=color, alpha=0.5))

        for x in rec["OnlineDates"]:
            if start <= x <= end:
                ax.vlines(x, y - bar_h/2, y + bar_h/2, colors=COLORS["online"], linewidth=3)

        for x in rec["Exams"]:
            if start <= x <= end:
                ax.vlines(x, y - bar_h/2, y + bar_h/2, colors=COLORS["exam"], linewidth=4)
                ax.plot(x, y, marker="o", markersize=10, color=COLORS["exam"], markeredgecolor=COLORS["exam"], markeredgewidth=0.6)

        for x in rec["Assignments"]:
            if start <= x <= end:
                ax.vlines(x, y - bar_h/2, y + bar_h/2, colors=COLORS["assignment"], linewidth=4)
                ax.plot(x, y, marker="^", markersize=12, color=COLORS["assignment"], markeredgecolor=COLORS["assignment"], markeredgewidth=0.6)

        for x in rec["InPersonDates"]:
            if start <= x <= end:
                ax.vlines(x, y - bar_h/2, y + bar_h/2, colors=COLORS["inperson"], linewidth=4)


    ax.set_xlim(start, end)
    ax.set_ylim(-1, max(1, len(yticks)))
    ax.set_yticks(range(len(yticks)))
    ax.set_yticklabels(labels)
    ax.set_title(f"Gantt — {start.strftime('%B %Y')}")
    ax.set_xlabel("Date"); ax.set_ylabel("Course")
    ax.xaxis.set_major_locator(DayLocator(interval=1))
    ax.xaxis.set_major_formatter(_date_formatter())
    ax.tick_params(axis='x', labelrotation=45)
    ax.grid(True, axis="x", linestyle="--", alpha=0.35)
    plt.tight_layout()
    out_file.parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(out_file, dpi=200)
    plt.close(fig)



def main():
    records = _read_excel_records(EXCEL_PATH)
    for y, m in MONTHS_TO_PLOT:
        plot_month(records, y, m, OUT_DIR / f"gantt_{y}-{m:02d}.png")
    print(f"✅ Done: {OUT_DIR.resolve()}")

if __name__ == "__main__":
    main()


✅ Done: /Users/baskaklimek/Documents/gantt/gantt_out
