In [10]:
# -*- coding: utf-8 -*-
"""
Assignment pipeline using min-cost flow.

Features:
- Ordered and weighted preference modes.
- Input validation and deterministic behavior.
- Optional interpretation of weighted prefs as "higher is better".
- Optional inclusion of an "__NA__" bucket when capacity is insufficient.
- CSV outputs plus quick satisfaction statistics.
- IPython display fallback to plain text when not available.
"""
from __future__ import annotations

from typing import Dict, Iterable, List, Optional, Tuple

import pandas as pd

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 0)
pd.set_option("display.max_colwidth", None)

from hugarian_method.hugarian_method import *

if __name__ == "__main__":
    DATA_DIR = resolve_data_dir()
    results = run_both(
        data_dir=DATA_DIR,
        rank_cost=None,
        unweighted_penalty=None,
        weighted_penalty=10.0,
        unassigned_label="__NA__",
        write_outputs=True,
        higher_is_better=False,  # set True if weights mean "preference" (higher = better)
    )
    print("Cost (unweighted):", results["cost_unweighted"])
    print("Cost (weighted):  ", results["cost_weighted"])
    print(
        "Written:",
        results["paths_unweighted"]["student_out"].name,
        ",",
        results["paths_unweighted"]["project_out"].name,
        ",",
        results["paths_weighted"]["student_out"].name,
        ",",
        results["paths_weighted"]["project_out"].name,
    )

    # IPython is optional; fallback to plain text if not available
    try:
        from IPython.display import display  # type: ignore
    except Exception:
        display = None  # type: ignore

    def _show(df: pd.DataFrame, title: str) -> None:
        print(f"\n=== {title} ===")
        if display:
            display(df)  # type: ignore
        else:
            print(df.to_string(index=False))

    _show(results["students_unweighted"], "Unweighted — Students")
    _show(results["projects_unweighted"], "Unweighted — Projects")
    _show(results["students_weighted"], "Weighted — Students")
    _show(results["projects_weighted"], "Weighted — Projects")

    # Quick satisfaction stats
    try:
        print("\n=== Stats — Unweighted ===")
        print(satisfaction_stats(results["students_unweighted"]).to_string(index=False))
        print("\n=== Stats — Weighted ===")
        print(satisfaction_stats(results["students_weighted"]).to_string(index=False))
    except Exception as exc:
        warnings.warn(f"Could not compute stats: {exc}", RuntimeWarning)


Cost (unweighted): 14.0
Cost (weighted):   5.563000000000001
Written: assignment_student_unweighted.csv , assignment_project_unweighted.csv , assignment_student_weighted.csv , assignment_project_weighted.csv

=== Unweighted — Students ===


Unnamed: 0,student,project_id,choice_rank,choice_weight,project_label,initial_choice
0,Aaron HUMBERT,P1,1,0.1,Data Cleaning,1
1,Adam FOURNIER,P5,1,0.1,Time Series Forecasting,1
2,Agathe DUPUIS,P10,1,0.1,Computer Vision,1
3,Alice MARTIN,P1,2,0.133,Data Cleaning,2
4,Amandine RENARD,P9,1,0.1,Web Analytics,1
5,Ambre FABRE,P4,1,0.1,NLP Chatbot,1
6,Anaïs LÉFÈVRE,P4,1,0.1,NLP Chatbot,1
7,Antoine PERROT,P2,1,0.1,Recommender System,1
8,Arthur DAVID,P7,1,0.1,Reinforcement Learning,1
9,Aya MARTINEZ,P10,1,0.1,Computer Vision,1



=== Unweighted — Projects ===


Unnamed: 0,project_label,project_id,effectif,students
0,Data Cleaning,P1,6,Aaron HUMBERT;Alice MARTIN;Eliott CHARPENTIER;Lina MOREL;Lucas BERNARD;Lucie NOËL
1,Recommender System,P2,6,Antoine PERROT;Emma DUBOIS;Ethan GIRARD;Hugo THOMAS;Marius MATHIEU;Nina BARBIER
2,Image Classification,P3,6,Iris BLANCHARD;Louis DURAND;Léa RICHARD;Manon LEROY;Maël ANDRÉ;Valentin CHARLES
3,NLP Chatbot,P4,6,Ambre FABRE;Anaïs LÉFÈVRE;Chloé ROBERT;Gabriel PETIT;Maxime MULLER;Romane GONZALEZ
4,Time Series Forecasting,P5,6,Adam FOURNIER;Camille SIMON;Jeanne BERTRAND;Nathan MOREAU;Noé ROUX;Zoé VINCENT
5,Anomaly Detection,P6,6,Mila DUPONT;Nino OLIVIER;Pauline GAUTHIER;Raphaël MERCIER;Salomé COUSIN;Timéo PIRES
6,Reinforcement Learning,P7,6,Arthur DAVID;Inès GARCIA;Jules LAURENT;Sarah LEFEBVRE;Tom MICHEL;Émile REY
7,Optimization Engine,P8,5,Justine LOPES;Marion DUMAS;Victor LAMBERT;Yanis RENAUD;Élise BONNET
8,Web Analytics,P9,3,Amandine RENARD;Noa NAVARRO;Sacha FRANÇOIS
9,Computer Vision,P10,3,Agathe DUPUIS;Aya MARTINEZ;Oscar BOYER



=== Weighted — Students ===


Unnamed: 0,student,project_id,choice_rank,choice_weight,project_label,initial_choice
0,Adam FOURNIER,P5,1,0.1,Time Series Forecasting,1:0.100
1,Agathe DUPUIS,P10,1,0.1,Computer Vision,1:0.100
2,Alice MARTIN,P1,2,0.133,Data Cleaning,2:0.133
3,Amandine RENARD,P9,1,0.1,Web Analytics,1:0.100
4,Ambre FABRE,P4,1,0.1,NLP Chatbot,1:0.100
5,Anaïs LÉFÈVRE,P4,1,0.1,NLP Chatbot,1:0.100
6,Arthur DAVID,P7,1,0.1,Reinforcement Learning,1:0.100
7,Aya MARTINEZ,P10,1,0.1,Computer Vision,1:0.100
8,Baptiste LEGRAND,P11,1,0.1,Graph Mining,1:0.100
9,Camille SIMON,P5,2,0.133,Time Series Forecasting,2:0.133



=== Weighted — Projects ===


Unnamed: 0,project_label,project_id,effectif,students
0,Data Cleaning,P1,4,Alice MARTIN;Lina MOREL;Lucas BERNARD;Lucie NOËL
1,Recommender System,P2,1,Emma DUBOIS
2,Image Classification,P3,3,Louis DURAND;Léa RICHARD;Manon LEROY
3,NLP Chatbot,P4,6,Ambre FABRE;Anaïs LÉFÈVRE;Hugo THOMAS;Maxime MULLER;Maël ANDRÉ;Valentin CHARLES
4,Time Series Forecasting,P5,6,Adam FOURNIER;Camille SIMON;Jeanne BERTRAND;Nathan MOREAU;Noé ROUX;Zoé VINCENT
5,Anomaly Detection,P6,9,Chloé ROBERT;Ethan GIRARD;Gabriel PETIT;Mila DUPONT;Nino OLIVIER;Pauline GAUTHIER;Raphaël MERCIER;Salomé COUSIN;Timéo PIRES
6,Reinforcement Learning,P7,6,Arthur DAVID;Inès GARCIA;Jules LAURENT;Marion DUMAS;Sarah LEFEBVRE;Tom MICHEL
7,Optimization Engine,P8,5,Justine LOPES;Victor LAMBERT;Yanis RENAUD;Élise BONNET;Émile REY
8,Web Analytics,P9,3,Amandine RENARD;Noa NAVARRO;Sacha FRANÇOIS
9,Computer Vision,P10,3,Agathe DUPUIS;Aya MARTINEZ;Oscar BOYER



=== Stats — Unweighted ===
 n  assigned  unassigned  median_rank   p_top1  p_top3
62        62           0          1.0 0.806452     1.0

=== Stats — Weighted ===
 n  assigned  unassigned  median_rank   p_top1   p_top3
55        55           0          1.0 0.727273 0.945455


In [11]:
# -*- coding: utf-8 -*-
# Encoding declaration to ensure UTF-8 compatibility across environments.

if __name__ == "__main__":
    # Entry point for running exports/visualizations as a script.
    # The following assumes that the functions below are available in the
    # current module or imported from your assignment pipeline:
    # - resolve_data_dir
    # - load_projects_df
    # - load_choices_df
    # - build_graph_unweighted
    # - build_graph_weighted
    # - solve_min_cost

    # Resolve data directory locally (do not rely on another module's global)
    DATA_DIR = resolve_data_dir()  # Locate the working data folder.
    EXPORT_DIR = (DATA_DIR / "exports").resolve()  # Where we will store artifacts.

    def _pretty_path(path: Path, base: Path) -> str:
        """
        Return a path relative to 'base' if possible; otherwise just the
        filename. This avoids leaking absolute directories in console output.
        """
        try:
            # Prefer a relative path for nicer console output.
            return str(path.relative_to(base))
        except Exception:
            # Fallback: just the file name if relative conversion fails.
            return path.name

    # --- Unweighted: rebuild graph/flow for export ---
    # Load project catalog and student choices from CSV files.
    prj_df = load_projects_df(DATA_DIR / "projects.csv")
    ch_df = load_choices_df(
        DATA_DIR / "student-choices.csv",
        prj_df["id"].tolist(),
    )
    # Build the unweighted (rank-based) min-cost flow network.
    g_unw, meta_unw = build_graph_unweighted(
        ch_df,
        prj_df,
        rank_cost=None,  # Use default linear rank costs if None.
        penalty=None,  # Use default penalty for non-listed projects.
        unassigned_label="__NA__",  # Virtual bucket for capacity shortfall.
    )
    # Solve for an optimal assignment and its total cost (ignored here).
    flow_unw, _cost_unw = solve_min_cost(g_unw)
    # Export the annotated graph in multiple formats for inspection.
    paths_unw = export_graph_models(
        g_unw,
        meta_unw,
        flow_unw,
        EXPORT_DIR,
        prefix="unweighted_model",
    )
    # Produce a simple bipartite visualization (entries -> projects).
    vis_unw = visualize_assignment_graph(
        flow_unw,
        meta_unw,
        EXPORT_DIR / "unweighted_assignment.png",
        title="Assignment (Unweighted)",
    )

    # --- Weighted: rebuild graph/flow for export ---
    # Build the weighted min-cost flow network (interpreting provided weights as costs).
    g_w, meta_w = build_graph_weighted(
        ch_df,
        prj_df,
        penalty=10.0,  # Default penalty when a project is not rated by a student.
        unassigned_label="__NA__",  # Same virtual bucket as above.
    )
    # Solve for the weighted case as well.
    flow_w, _cost_w = solve_min_cost(g_w)
    # Export weighted network snapshots to files.
    paths_w = export_graph_models(
        g_w,
        meta_w,
        flow_w,
        EXPORT_DIR,
        prefix="weighted_model",
    )
    # Visualize the weighted assignment similarly.
    vis_w = visualize_assignment_graph(
        flow_w,
        meta_w,
        EXPORT_DIR / "weighted_assignment.png",
        title="Assignment (Weighted)",
    )

    # Print a concise summary of exported file locations (relative to DATA_DIR).
    print("\nExports (unweighted):")
    for key, path in paths_unw.items():
        print(f"  {key}: {_pretty_path(path, DATA_DIR)}")
    print("  viz:", _pretty_path(vis_unw, DATA_DIR))

    print("\nExports (weighted):")
    for key, path in paths_w.items():
        print(f"  {key}: {_pretty_path(path, DATA_DIR)}")
    print("  viz:", _pretty_path(vis_w, DATA_DIR))



Exports (unweighted):
  graphml: exports\unweighted_model.graphml
  gexf: exports\unweighted_model.gexf
  gpickle: exports\unweighted_model.gpickle
  json: exports\unweighted_model.json
  csv: exports\unweighted_model_flow_edges.csv
  viz: exports\unweighted_assignment.png

Exports (weighted):
  graphml: exports\weighted_model.graphml
  gexf: exports\weighted_model.gexf
  gpickle: exports\weighted_model.gpickle
  json: exports\weighted_model.json
  csv: exports\weighted_model_flow_edges.csv
  viz: exports\weighted_assignment.png


In [12]:
# -*- coding: utf-8 -*-
"""
Minimal batch for the 3 projects / 3 students sample.

- Reads ONLY: data/3_sample/3_projects.csv and
              data/3_sample/3_student-choices.csv
- Runs both variants (unweighted + weighted) WITHOUT renaming/copying files
- Writes outputs into data/3_sample/
- Exports models and visualizations into data/3_sample/exports
"""

if __name__ == "__main__":
    # Launch the 3x3 batch when invoked as a script
    # This enables `python this_script.py` to run the full sample pipeline.
    run_sample_3x3()



[3x3] Done.
 - Cost (unweighted): 0.0
 - Cost (weighted)  : 0.8999999999999999
 - Files written:
   • assignment_student_unweighted.csv
   • assignment_project_unweighted.csv
   • assignment_student_weighted.csv
   • assignment_project_weighted.csv
   • Exports in: exports
