In [None]:
import pandas as pd

from collections import defaultdict

import gurobipy as gp
from gurobipy import Model, GRB, quicksum, Env

import re
from math import ceil

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/IEOR4004 Project 2/games.csv')

In [None]:
# check the dataframe
df.head()

Unnamed: 0,Date,Visitor,PTS,Home,PTS.1,Attend.,LOG,Arena,Notes
0,"Sat, Nov 01, 2025",Golden State Warriors,,Boston Celtics,,19000,7:30 PM,TD Garden,
1,"Sat, Nov 01, 2025",Los Angeles Lakers,,New York Knicks,,19400,7:30 PM,Madison Square Garden,
2,"Sat, Nov 01, 2025",Denver Nuggets,,Brooklyn Nets,,17500,7:30 PM,Barclays Center,
3,"Sat, Nov 01, 2025",Phoenix Suns,,Philadelphia 76ers,,19650,7:30 PM,Wells Fargo Center,
4,"Sat, Nov 01, 2025",Houston Rockets,,Toronto Raptors,,19600,7:30 PM,Scotiabank Arena,


In [None]:
# Standardize the column names
needed_map = {
    "Date": "date",
    "Visitor": "away_team",
    "Home": "home_team",
}
df = df.rename(columns=needed_map)

In [None]:
# Check how many times are there in a day
df['LOG'].unique()

array(['7:30 PM'], dtype=object)

In [None]:
# Parse dates
df["date"] = pd.to_datetime(df["date"], errors="raise").dt.date

# Normalize team names
df["home_team"] = df["home_team"].astype(str).str.strip()
df["away_team"] = df["away_team"].astype(str).str.strip()
teams = sorted(set(df["home_team"]) | set(df["away_team"]))

In [None]:
# (a) all dates when team i played home
home_dates = {
    i: sorted(set(df.loc[df["home_team"] == i, "date"]))
    for i in teams
}

# (b) for each team j, the number of times team i played against team j at home
home_vs_count = {i: defaultdict(int) for i in teams}
for _, row in df.iterrows():
    i = row["home_team"]
    j = row["away_team"]
    if i != j:
        home_vs_count[i][j] += 1

# (c) for each team j, the number of times team i played against team j away (i at j’s home)
#     This equals the number of times j hosted i
away_vs_count = {i: defaultdict(int) for i in teams}
for _, row in df.iterrows():
    host = row["home_team"]
    guest = row["away_team"]
    if host != guest:
        away_vs_count[guest][host] += 1

# (d) all the dates when team j played away
away_dates = {
    i: sorted(set(df.loc[df["away_team"] == i, "date"]))
    for i in teams
}


## Part 1

Print information for each team

In [None]:
def print_team_summary(i):
    print(f"\nTEAM: {i}")
    # (a)
    print("(a) Home dates:")
    print("    " + (", ".join(map(str, home_dates[i])) if home_dates[i] else "None"))
    # (b)
    print("(b) # times HOSTED each opponent:")
    if home_vs_count[i]:
        for j in sorted(home_vs_count[i]):
            print(f"    vs {j} at HOME: {home_vs_count[i][j]}")
    else:
        print("    None")
    # (c)
    print("(c) # times VISITED each opponent:")
    if away_vs_count[i]:
        for j in sorted(away_vs_count[i]):
            print(f"    at {j} (AWAY): {away_vs_count[i][j]}")
    else:
        print("    None")
    # (d)
    print("(d) Away dates:")
    print("    " + (", ".join(map(str, away_dates[i])) if away_dates[i] else "None"))

print("===== Q1 Summaries =====")
for team in teams:
    print_team_summary(team)


===== Q1 Summaries =====

TEAM: Atlanta Hawks
(a) Home dates:
    2025-11-03, 2025-11-07, 2025-11-15, 2025-11-17, 2025-11-19, 2025-11-23, 2025-11-27, 2025-11-28, 2025-11-29, 2025-12-25
(b) # times HOSTED each opponent:
    vs Boston Celtics at HOME: 1
    vs Chicago Bulls at HOME: 1
    vs Dallas Mavericks at HOME: 1
    vs Golden State Warriors at HOME: 1
    vs Los Angeles Lakers at HOME: 1
    vs Miami Heat at HOME: 1
    vs Milwaukee Bucks at HOME: 1
    vs New York Knicks at HOME: 1
    vs Phoenix Suns at HOME: 1
    vs Toronto Raptors at HOME: 1
(c) # times VISITED each opponent:
    at Brooklyn Nets (AWAY): 1
    at Chicago Bulls (AWAY): 1
    at Cleveland Cavaliers (AWAY): 1
    at Denver Nuggets (AWAY): 1
    at Houston Rockets (AWAY): 1
    at Philadelphia 76ers (AWAY): 1
(d) Away dates:
    2025-11-01, 2025-11-05, 2025-11-11, 2025-11-13, 2025-11-21, 2025-12-01

TEAM: Boston Celtics
(a) Home dates:
    2025-11-01, 2025-11-07, 2025-11-13, 2025-11-19, 2025-11-23, 2025-11-28
(b)

In [None]:
teams

['Atlanta Hawks',
 'Boston Celtics',
 'Brooklyn Nets',
 'Chicago Bulls',
 'Cleveland Cavaliers',
 'Dallas Mavericks',
 'Denver Nuggets',
 'Golden State Warriors',
 'Houston Rockets',
 'Los Angeles Lakers',
 'Miami Heat',
 'Milwaukee Bucks',
 'New York Knicks',
 'Philadelphia 76ers',
 'Phoenix Suns',
 'Toronto Raptors']

Sets

T = set of all teams
(e.g., from Python: teams = sorted(set(df["Home"]).union(set(df["Visitor"]))))

D = set of all dates
(e.g., from Python: dates = sorted(df["Date"].unique()))

Indices:

i in T : team index (home role)

j in T : team index (away role)

d in D : date index

i != j

H_date[i,d] = 1 if team i plays at home on date d, 0 otherwise.
(Derived from home_dates[i].)

A_date[i,d] = 1 if team i plays away on date d, 0 otherwise.
(Derived from away_dates[i].)

H_pair[i,j] = total number of times team i plays at home vs team j.
(Derived from home_vs_counts[i][j].)

A_pair[i,j] = total number of times team i plays away at team j’s home.
(Derived from away_vs_counts[i][j].)

x[i,j,d] in {0, 1}

Interpretation:

x[i,j,d] = 1 if on date d, team i plays at home vs team j (away).

x[i,j,d] = 0 otherwise.

In [None]:
# Dates from the dataframe
dates = sorted(df["date"].unique())

In [None]:
!pip install gurobipy

Collecting gurobipy
  Downloading gurobipy-13.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (16 kB)
Downloading gurobipy-13.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (14.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.8/14.8 MB[0m [31m52.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gurobipy
Successfully installed gurobipy-13.0.0


In [None]:
from google.colab import userdata

# Create an environment with your WLS license
params = {
"WLSACCESSID": "206b3829-27fe-4e85-b2b5-40f61b2d7366",
"WLSSECRET": "f86b22c6-80f1-49ab-a97a-a3ad7bf5149a",
"LICENSEID": 2709572
}

env = Env(params=params)

Set parameter WLSAccessID
Set parameter WLSSecret
Set parameter LicenseID to value 2709572
Academic license 2709572 - for non-commercial use only - registered to mg___@columbia.edu


## Part 2

In [None]:
# --------------------------------------------------
# Time zone encoding (differences)
# Eastern = 0, Central = 1, Mountain = 2, Pacific = 3
# --------------------------------------------------
tz_team = {
    'Atlanta Hawks': 0,
    'Boston Celtics': 0,
    'Brooklyn Nets': 0,
    'New York Knicks': 0,
    'Philadelphia 76ers': 0,
    'Toronto Raptors': 0,
    'Miami Heat': 0,
    'Cleveland Cavaliers': 0,
    'Chicago Bulls': 1,
    'Milwaukee Bucks': 1,
    'Dallas Mavericks': 1,
    'Houston Rockets': 1,
    'Denver Nuggets': 2,
    'Phoenix Suns': 2,
    'Golden State Warriors': 3,
    'Los Angeles Lakers': 3,
}

H_date = {(i, d): 0 for i in teams for d in dates}
A_date = {(i, d): 0 for i in teams for d in dates}

for _, row in df.iterrows():
    home = row["home_team"]
    away = row["away_team"]
    d = row["date"]
    if home in teams:
        H_date[(home, d)] = 1
    if away in teams:
        A_date[(away, d)] = 1

H_pair = {(i, j): 0 for i in teams for j in teams if i != j}
A_pair = {(i, j): 0 for i in teams for j in teams if i != j}

for _, row in df.iterrows():
    home = row["home_team"]
    away = row["away_team"]
    if home in teams and away in teams and home != away:
        H_pair[(home, away)] += 1
        A_pair[(away, home)] += 1

# For each team, list of dates when they play (home or away), sorted
game_dates = {}
for i in teams:
    ds = [d for d in dates if H_date[(i, d)] == 1 or A_date[(i, d)] == 1]
    game_dates[i] = sorted(ds)

# Model and decision Variable

m = gp.Model("schedule_with_triple_travel_hard", env=env)

# x[i,j,d] = 1 if on date d, team i is home vs team j (away)
x = m.addVars(
    [(i, j, d) for i in teams for j in teams for d in dates if i != j],
    vtype=GRB.BINARY,
    name="x"
)

# Constraints

# (e) Home schedule fixed: sum_j x[i,j,d] = H_date[i,d]
for i in teams:
    for d in dates:
        m.addConstr(
            gp.quicksum(x[i, j, d] for j in teams if i != j) == H_date[(i, d)],
            name=f"home_schedule_{i}_{d}"
        )

# (f) Away schedule fixed: sum_j x[j,i,d] = A_date[i,d]
for i in teams:
    for d in dates:
        m.addConstr(
            gp.quicksum(x[j, i, d] for j in teams if i != j) == A_date[(i, d)],
            name=f"away_schedule_{i}_{d}"
        )

# (g) Home pair counts: sum_d x[i,j,d] = H_pair[i,j]
for i in teams:
    for j in teams:
        if i == j:
            continue
        m.addConstr(
            gp.quicksum(x[i, j, d] for d in dates) == H_pair[(i, j)],
            name=f"home_pair_{i}_vs_{j}"
        )

# (h) Away pair counts: sum_d x[j,i,d] = A_pair[i,j]
for i in teams:
    for j in teams:
        if i == j:
            continue
        m.addConstr(
            gp.quicksum(x[j, i, d] for d in dates) == A_pair[(i, j)],
            name=f"away_pair_{i}_at_{j}"
        )


# Objective:

m.setObjective(0.0, GRB.MINIMIZE)

# Solve

m.optimize()

# Print schedule

if m.Status in (GRB.OPTIMAL, GRB.SUBOPTIMAL):
    print("A feasible schedule satisfying the hard 3-game constraint was found.")

    # --- Schedule output ---
    schedule_rows = []
    for d in dates:
        for i in teams:
            for j in teams:
                if i == j:
                    continue
                key = (i, j, d)
                if key in x and x[key].X > 0.5:
                    schedule_rows.append({
                        "date": d,
                        "home_team": i,
                        "away_team": j,
                        "home_tz": tz_team[i],
                        "away_tz": tz_team[j]
                    })

    schedule_df = pd.DataFrame(schedule_rows).sort_values(by=["date", "home_team"])
    print("\n=== Schedule (home/away per date) ===")
    print(schedule_df)

else:
    print("No feasible schedule found with the hard 3-game constraint. Model status:", m.Status)


Gurobi Optimizer version 13.0.0 build v13.0.0rc1 (linux64 - "Ubuntu 22.04.4 LTS")

CPU model: AMD EPYC 7B12, instruction set [SSE2|AVX|AVX2]
Thread count: 1 physical cores, 2 logical processors, using up to 2 threads

Academic license 2709572 - for non-commercial use only - registered to mg___@columbia.edu
Optimize a model with 992 rows, 3840 columns and 15360 nonzeros (Min)
Model fingerprint: 0xb3f76766
Model has 0 linear objective coefficients
Variable types: 0 continuous, 3840 integer (3840 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [0e+00, 0e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 684 rows and 3398 columns
Presolve time: 0.01s
Presolved: 308 rows, 442 columns, 1386 nonzeros
Variable types: 0 continuous, 442 integer (442 binary)
Found heuristic solution: objective 0.0000000

Explored 0 nodes (0 simplex iterations) in 0.03 seconds (0.01 work units)
Thread count was 2 (of 2 available processors

### Check if the produced schedule from part 2 is different from the original schedule

In [None]:
orig_simple = df[['date', 'home_team', 'away_team']].copy()
orig_simple['date'] = pd.to_datetime(orig_simple['date']).dt.date


In [None]:
new_simple = schedule_df[['date', 'home_team', 'away_team']].copy()

In [None]:
orig_sorted = orig_simple.sort_values(['date', 'home_team', 'away_team']).reset_index(drop=True)
new_sorted = new_simple.sort_values(['date', 'home_team', 'away_team']).reset_index(drop=True)

In [None]:
orig_sorted.equals(new_sorted)


False

In [None]:
orig_sorted

Unnamed: 0,date,home_team,away_team
0,2025-11-01,Boston Celtics,Golden State Warriors
1,2025-11-01,Brooklyn Nets,Denver Nuggets
2,2025-11-01,Chicago Bulls,Atlanta Hawks
3,2025-11-01,Miami Heat,Dallas Mavericks
4,2025-11-01,Milwaukee Bucks,Cleveland Cavaliers
...,...,...,...
123,2025-12-25,Denver Nuggets,Brooklyn Nets
124,2025-12-25,Golden State Warriors,Boston Celtics
125,2025-12-25,Houston Rockets,Toronto Raptors
126,2025-12-25,Los Angeles Lakers,New York Knicks


In [None]:
new_sorted

Unnamed: 0,date,home_team,away_team
0,2025-11-01,Boston Celtics,Phoenix Suns
1,2025-11-01,Brooklyn Nets,Denver Nuggets
2,2025-11-01,Chicago Bulls,Atlanta Hawks
3,2025-11-01,Miami Heat,Houston Rockets
4,2025-11-01,Milwaukee Bucks,Dallas Mavericks
...,...,...,...
123,2025-12-25,Denver Nuggets,Miami Heat
124,2025-12-25,Golden State Warriors,New York Knicks
125,2025-12-25,Houston Rockets,Toronto Raptors
126,2025-12-25,Los Angeles Lakers,Brooklyn Nets


In [None]:
# Convert it to csv file
new_sorted.to_csv("new_schedule(part 2).csv", index=False)

## Part 3

### Hard Constraint

For this section, we will be implementing the additional constraint: no team
should play three consecutive matches where the sum of the absolute values of the difference between the time zones of two consecutive matches is 4 or more; or conclude that no such schedule exists.

In [None]:
# Model and decision Variable

m = gp.Model("schedule_with_triple_travel_hard", env=env)

# x[i,j,d] = 1 if on date d, team i is home vs team j (away)
x = m.addVars(
    [(i, j, d) for i in teams for j in teams for d in dates if i != j],
    vtype=GRB.BINARY,
    name="x"
)

# Constraints

# (e) Home schedule fixed: sum_j x[i,j,d] = H_date[i,d]
for i in teams:
    for d in dates:
        m.addConstr(
            gp.quicksum(x[i, j, d] for j in teams if i != j) == H_date[(i, d)],
            name=f"home_schedule_{i}_{d}"
        )

# (f) Away schedule fixed: sum_j x[j,i,d] = A_date[i,d]
for i in teams:
    for d in dates:
        m.addConstr(
            gp.quicksum(x[j, i, d] for j in teams if i != j) == A_date[(i, d)],
            name=f"away_schedule_{i}_{d}"
        )

# (g) Home pair counts: sum_d x[i,j,d] = H_pair[i,j]
for i in teams:
    for j in teams:
        if i == j:
            continue
        m.addConstr(
            gp.quicksum(x[i, j, d] for d in dates) == H_pair[(i, j)],
            name=f"home_pair_{i}_vs_{j}"
        )

# (h) Away pair counts: sum_d x[j,i,d] = A_pair[i,j]
for i in teams:
    for j in teams:
        if i == j:
            continue
        m.addConstr(
            gp.quicksum(x[j, i, d] for d in dates) == A_pair[(i, j)],
            name=f"away_pair_{i}_at_{j}"
        )

# # --------------------------------------------------
# # 3) Hard 3-game time-zone constraint (part 3)
# #     For every team i and every triple of consecutive games:
# #     |tz2 - tz1| + |tz3 - tz2| <= 3
# # --------------------------------------------------

# # game_dates[i] already holds the sorted dates when team i plays (home or away)

for i in teams:
    games = game_dates[i]
    if len(games) < 3:
        continue  # no triple for this team

    for k in range(len(games) - 2):
        d1 = games[k]
        d2 = games[k + 1]
        d3 = games[k + 2]

        # Time zone of team i's arena for each of the three games:
        #   - if i is home: tz_team[i]
        #   - if i is away at j: tz_team[j]
        tz_d1 = (
            tz_team[i] * gp.quicksum(x[i, j, d1] for j in teams if i != j) +
            gp.quicksum(tz_team[j] * x[j, i, d1] for j in teams if i != j)
        )
        tz_d2 = (
            tz_team[i] * gp.quicksum(x[i, j, d2] for j in teams if i != j) +
            gp.quicksum(tz_team[j] * x[j, i, d2] for j in teams if i != j)
        )
        tz_d3 = (
            tz_team[i] * gp.quicksum(x[i, j, d3] for j in teams if i != j) +
            gp.quicksum(tz_team[j] * x[j, i, d3] for j in teams if i != j)
        )

        # Continuous vars for the absolute jumps
        diff1 = m.addVar(lb=0.0, vtype=GRB.CONTINUOUS,
                         name=f"diff1_{i}_{k}")
        diff2 = m.addVar(lb=0.0, vtype=GRB.CONTINUOUS,
                         name=f"diff2_{i}_{k}")

        # diff1 >= |tz_d2 - tz_d1|
        m.addConstr(diff1 >= tz_d2 - tz_d1, name=f"diff1_pos_{i}_{k}")
        m.addConstr(diff1 >= -(tz_d2 - tz_d1), name=f"diff1_neg_{i}_{k}")

        # diff2 >= |tz_d3 - tz_d2|
        m.addConstr(diff2 >= tz_d3 - tz_d2, name=f"diff2_pos_{i}_{k}")
        m.addConstr(diff2 >= -(tz_d3 - tz_d2), name=f"diff2_neg_{i}_{k}")

        # Hard constraint: no triple with sum of jumps >= 4
        m.addConstr(diff1 + diff2 <= 3, name=f"triple_hard_{i}_{k}")


# Objective:

m.setObjective(0.0, GRB.MINIMIZE)

# Solve

m.optimize()

# Print schedule

if m.Status in (GRB.OPTIMAL, GRB.SUBOPTIMAL):
    print("A feasible schedule satisfying the hard 3-game constraint was found.")

    # --- Schedule output ---
    schedule_rows = []
    for d in dates:
        for i in teams:
            for j in teams:
                if i == j:
                    continue
                key = (i, j, d)
                if key in x and x[key].X > 0.5:
                    schedule_rows.append({
                        "date": d,
                        "home_team": i,
                        "away_team": j,
                        "home_tz": tz_team[i],
                        "away_tz": tz_team[j]
                    })

    schedule_df = pd.DataFrame(schedule_rows).sort_values(by=["date", "home_team"])
    print("\n=== Schedule (home/away per date) ===")
    print(schedule_df)

else:
    print("No feasible schedule found with the hard 3-game constraint. Model status:", m.Status)


Gurobi Optimizer version 13.0.0 build v13.0.0rc1 (linux64 - "Ubuntu 22.04.4 LTS")

CPU model: AMD EPYC 7B12, instruction set [SSE2|AVX|AVX2]
Thread count: 1 physical cores, 2 logical processors, using up to 2 threads

Academic license 2709572 - for non-commercial use only - registered to mg___@columbia.edu
Optimize a model with 2112 rows, 4288 columns and 43584 nonzeros (Min)
Model fingerprint: 0x4bdc49f2
Model has 0 linear objective coefficients
Variable types: 448 continuous, 3840 integer (3840 binary)
Coefficient statistics:
  Matrix range     [1e+00, 3e+00]
  Objective range  [0e+00, 0e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 3e+00]
Presolve removed 1014 rows and 3489 columns
Presolve time: 0.01s

Explored 0 nodes (0 simplex iterations) in 0.03 seconds (0.01 work units)
Thread count was 1 (of 2 available processors)

Solution count 0

Model is infeasible
Best objective -, best bound -, gap -
No feasible schedule found with the hard 3-game constraint. Model 

### Soft Penalty
We aren't able to get a feasible solution for implementing the thrid constraint, so instead, we tried to formulate it as a soft penalty to discourage as many violations of the 3rd rule as possible.

In [None]:
# --------------------------------------------------
# 1. Define model and decision variables
# --------------------------------------------------
m = gp.Model("schedule_with_triple_travel", env=env)

# x[i,j,d] = 1 if on date d, team i is home vs team j (away)
x = m.addVars(
    [(i, j, d) for i in teams for j in teams for d in dates if i != j],
    vtype=GRB.BINARY,
    name="x"
)

# --------------------------------------------------
# 2. Core schedule constraints (e)–(h)
# --------------------------------------------------

# (e) Home schedule fixed: sum_j x[i,j,d] = H_date[i,d]
for i in teams:
    for d in dates:
        m.addConstr(
            gp.quicksum(x[i, j, d] for j in teams if i != j) == H_date[(i, d)],
            name=f"home_schedule_{i}_{d}"
        )

# (f) Away schedule fixed: sum_j x[j,i,d] = A_date[i,d]
for i in teams:
    for d in dates:
        m.addConstr(
            gp.quicksum(x[j, i, d] for j in teams if i != j) == A_date[(i, d)],
            name=f"away_schedule_{i}_{d}"
        )

# (g) Home pair counts: sum_d x[i,j,d] = H_pair[i,j]
for i in teams:
    for j in teams:
        if i == j:
            continue
        m.addConstr(
            gp.quicksum(x[i, j, d] for d in dates) == H_pair[(i, j)],
            name=f"home_pair_{i}_vs_{j}"
        )

# (h) Away pair counts: sum_d x[j,i,d] = A_pair[i,j]
for i in teams:
    for j in teams:
        if i == j:
            continue
        m.addConstr(
            gp.quicksum(x[j, i, d] for d in dates) == A_pair[(i, j)],
            name=f"away_pair_{i}_at_{j}"
        )

# --------------------------------------------------
# 3. Soft 3-game penalty ONLY (no 2-game constraint)
# --------------------------------------------------

viol = {}
diff1_vars = {}
diff2_vars = {}
M = 3 # max possible sum of two jumps (3 + 3)

for i in teams:
    games = game_dates[i]
    if len(games) < 3:
        continue

    for k in range(len(games) - 2):
        d1 = games[k]
        d2 = games[k + 1]
        d3 = games[k + 2]

        # Binary: 1 if this triple is "bad" (sum of jumps can exceed 3)
        viol[i, k] = m.addVar(vtype=GRB.BINARY, name=f"viol_{i}_{k}")

        diff1 = m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"diff1_{i}_{k}")
        diff2 = m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"diff2_{i}_{k}")
        diff1_vars[(i, k)] = diff1
        diff2_vars[(i, k)] = diff2

        tz_d1 = (
            tz_team[i] * gp.quicksum(x[i, j, d1] for j in teams if i != j) +
            gp.quicksum(tz_team[j] * x[j, i, d1] for j in teams if i != j)
        )
        tz_d2 = (
            tz_team[i] * gp.quicksum(x[i, j, d2] for j in teams if i != j) +
            gp.quicksum(tz_team[j] * x[j, i, d2] for j in teams if i != j)
        )
        tz_d3 = (
            tz_team[i] * gp.quicksum(x[i, j, d3] for j in teams if i != j) +
            gp.quicksum(tz_team[j] * x[j, i, d3] for j in teams if i != j)
        )

        # diff1 >= |tz_d2 - tz_d1|
        m.addConstr(diff1 >= tz_d2 - tz_d1)
        m.addConstr(diff1 >= -(tz_d2 - tz_d1))

        # diff2 >= |tz_d3 - tz_d2|
        m.addConstr(diff2 >= tz_d3 - tz_d2)
        m.addConstr(diff2 >= -(tz_d3 - tz_d2))

        # Soft 3-game constraint:
        # If viol[i,k] = 0 -> diff1 + diff2 <= 3 (good triple)
        # If viol[i,k] = 1 -> diff1 + diff2 <= 3 + M (allowed but penalized)
        m.addConstr(diff1 + diff2 <= 3 + M * viol[i, k],
                    name=f"triple_travel_soft_{i}_{k}")

# --------------------------------------------------
# 5. Objective: minimize number of "bad triples"
# --------------------------------------------------

if viol:
    m.setObjective(gp.quicksum(viol[i, k] for (i, k) in viol), GRB.MINIMIZE)
else:
    m.setObjective(0.0, GRB.MINIMIZE)

# --------------------------------------------------
# 6. Solve
# --------------------------------------------------

m.optimize()

# --------------------------------------------------
# 7. Print schedule and triple violations
# --------------------------------------------------

if m.Status in (GRB.OPTIMAL, GRB.SUBOPTIMAL):
    print("A feasible schedule was found.")

    # --- Schedule output ---
    schedule_rows = []
    for d in dates:
        for i in teams:
            for j in teams:
                if i == j:
                    continue
                key = (i, j, d)
                if key in x and x[key].X > 0.5:
                    schedule_rows.append({
                        "date": d,
                        "home_team": i,
                        "away_team": j,
                        "home_tz": tz_team[i],
                        "away_tz": tz_team[j]
                    })
    schedule_df = pd.DataFrame(schedule_rows).sort_values(by=["date", "home_team"])
    print("\n=== Schedule (home/away per date) ===")
    print(schedule_df)

    # --- Triple violations report ---
    violating_rows = []

    for (team, k), v in viol.items():
        if v.X <= 0.5:
            continue  # only triples with viol=1

        games = game_dates[team]
        d1 = games[k]
        d2 = games[k + 1]
        d3 = games[k + 2]

        # Helper: find arena team and tz for this team on a given date
        def get_arena_team_and_tz(team_i, date_d):
            home_arena_team = None
            away_arena_team = None

            # If team_i is home
            for opp in teams:
                if opp == team_i:
                    continue
                if (team_i, opp, date_d) in x and x[team_i, opp, date_d].X > 0.5:
                    home_arena_team = team_i
                    break

            # If team_i is away
            if home_arena_team is None:
                for opp in teams:
                    if opp == team_i:
                        continue
                    if (opp, team_i, date_d) in x and x[opp, team_i, date_d].X > 0.5:
                        away_arena_team = opp
                        break

            if home_arena_team is not None:
                arena_team = home_arena_team
            elif away_arena_team is not None:
                arena_team = away_arena_team
            else:
                return None, None

            return arena_team, tz_team[arena_team]

        arena1, tz1 = get_arena_team_and_tz(team, d1)
        arena2, tz2 = get_arena_team_and_tz(team, d2)
        arena3, tz3 = get_arena_team_and_tz(team, d3)

        if arena1 is None or arena2 is None or arena3 is None:
            continue

        diff12 = abs(tz2 - tz1)
        diff23 = abs(tz3 - tz2)
        total_diff = diff12 + diff23

        violating_rows.append({
            "team": team,
            "triple_index_for_team": k,
            "date_1": d1,
            "arena_1": arena1,
            "tz_1": tz1,
            "date_2": d2,
            "arena_2": arena2,
            "tz_2": tz2,
            "date_3": d3,
            "arena_3": arena3,
            "tz_3": tz3,
            "jump_1_2": diff12,
            "jump_2_3": diff23,
            "sum_jumps": total_diff
        })

    viol_df = pd.DataFrame(violating_rows).sort_values(by=["team", "date_1"])
    print("\nNumber of violating triples found in solution:", len(viol_df))
    if len(viol_df) > 0:
        print("\n=== Violating triples ===")
        print(viol_df)

else:
    print("No feasible schedule found. Model status:", m.Status)


Gurobi Optimizer version 13.0.0 build v13.0.0rc1 (linux64 - "Ubuntu 22.04.4 LTS")

CPU model: AMD EPYC 7B12, instruction set [SSE2|AVX|AVX2]
Thread count: 1 physical cores, 2 logical processors, using up to 2 threads

Academic license 2709572 - for non-commercial use only - registered to mg___@columbia.edu
Optimize a model with 2112 rows, 4512 columns and 43808 nonzeros (Min)
Model fingerprint: 0xa522a451
Model has 224 linear objective coefficients
Variable types: 448 continuous, 4064 integer (4064 binary)
Coefficient statistics:
  Matrix range     [1e+00, 3e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 3e+00]
Presolve removed 1621 rows and 3944 columns
Presolve time: 0.03s
Presolved: 491 rows, 568 columns, 2404 nonzeros
Variable types: 38 continuous, 530 integer (508 binary)
Found heuristic solution: objective 33.0000000
Found heuristic solution: objective 32.0000000
Found heuristic solution: objective 31.0000000
Found heuristic so

In [None]:
schedule_df

Unnamed: 0,date,home_team,away_team,home_tz,away_tz
0,2025-11-01,Boston Celtics,Phoenix Suns,0,2
1,2025-11-01,Brooklyn Nets,Denver Nuggets,0,2
2,2025-11-01,Chicago Bulls,Golden State Warriors,1,3
3,2025-11-01,Miami Heat,Los Angeles Lakers,0,3
4,2025-11-01,Milwaukee Bucks,Cleveland Cavaliers,1,0
...,...,...,...,...,...
123,2025-12-25,Denver Nuggets,Miami Heat,2,0
124,2025-12-25,Golden State Warriors,Philadelphia 76ers,3,0
125,2025-12-25,Houston Rockets,Boston Celtics,1,0
126,2025-12-25,Los Angeles Lakers,Brooklyn Nets,3,0


In [None]:
# --- Schedule output ---
schedule_rows = []
for d in dates:
    for i in teams:
        for j in teams:
            if i == j:
                continue
            key = (i, j, d)
            if key in x and x[key].X > 0.5:
                schedule_rows.append({
                    "date": d,
                    "home_team": i,
                    "away_team": j,
                    "home_tz": tz_team[i],
                    "away_tz": tz_team[j]
                })

schedule_df = pd.DataFrame(schedule_rows).sort_values(by=["date", "home_team"])
print("\n=== Schedule (home/away per date) ===")
print(schedule_df)

# >>> ADD THIS <<<
# Save a clean schedule: date, home_team, away_team
schedule_df_simple = schedule_df[["date", "home_team", "away_team"]].copy()
schedule_df_simple.to_csv("optimized_schedule.csv", index=False)
print("\nSaved optimized schedule as optimized_schedule.csv")



=== Schedule (home/away per date) ===
           date              home_team              away_team  home_tz  \
0    2025-11-01         Boston Celtics           Phoenix Suns        0   
1    2025-11-01          Brooklyn Nets         Denver Nuggets        0   
2    2025-11-01          Chicago Bulls  Golden State Warriors        1   
3    2025-11-01             Miami Heat     Los Angeles Lakers        0   
4    2025-11-01        Milwaukee Bucks    Cleveland Cavaliers        1   
..          ...                    ...                    ...      ...   
123  2025-12-25         Denver Nuggets             Miami Heat        2   
124  2025-12-25  Golden State Warriors     Philadelphia 76ers        3   
125  2025-12-25        Houston Rockets         Boston Celtics        1   
126  2025-12-25     Los Angeles Lakers          Brooklyn Nets        3   
127  2025-12-25           Phoenix Suns        New York Knicks        2   

     away_tz  
0          2  
1          2  
2          3  
3          3

In [None]:
# --- Build new compact schedule ---
df_reform = pd.DataFrame({
    "date": df["date"],
    "home_team": df["home_team"],
    "away_team": df["away_team"],
})

df_reform["home_tz"] = df_reform["home_team"].map(tz_team)
df_reform["away_tz"] = df_reform["away_team"].map(tz_team)

# Sort to match desired format order (optional)
df_reform= df_reform.sort_values(["date", "home_team"]).reset_index(drop=True)

df_reform.head()


Unnamed: 0,date,home_team,away_team,home_tz,away_tz
0,2025-11-01,Boston Celtics,Golden State Warriors,0,3
1,2025-11-01,Brooklyn Nets,Denver Nuggets,0,2
2,2025-11-01,Chicago Bulls,Atlanta Hawks,1,0
3,2025-11-01,Miami Heat,Dallas Mavericks,0,1
4,2025-11-01,Milwaukee Bucks,Cleveland Cavaliers,1,0


Check if the produced schedule is different from the original dataset

In [None]:
schedule_df.equals(df_reform)

False

Check how many violations the original schedule had

In [None]:
# Make sure dates are sorted correctly
df = df.sort_values("date").reset_index(drop=True)

# --------------------------------------------------
# 1. Time zone mapping and team list
# --------------------------------------------------

teams = list(tz_team.keys())

# --------------------------------------------------
# 2. Build per-team game sequences with arena time zones
# --------------------------------------------------

team_games = {t: [] for t in teams}

for _, row in df.iterrows():
    d = row["date"]
    home = row["home_team"]
    away = row["away_team"]

    # Home game
    if home in teams:
        team_games[home].append({
            "date": d,
            "arena_team": home,
            "tz": tz_team[home]
        })

    # Away game (played in *home* team's arena)
    if away in teams:
        team_games[away].append({
            "date": d,
            "arena_team": home,
            "tz": tz_team[home]
        })

# Sort each team's games by date
for t in teams:
    team_games[t] = sorted(team_games[t], key=lambda x: x["date"])

# --------------------------------------------------
# 3. Check 3-consecutive rule:
#    abs(tz2 - tz1) + abs(tz3 - tz2) >= 4
# --------------------------------------------------

three_violations = []

for team in teams:
    games = team_games[team]
    for k in range(len(games) - 2):
        g1 = games[k]
        g2 = games[k + 1]
        g3 = games[k + 2]

        diff12 = abs(g2["tz"] - g1["tz"])
        diff23 = abs(g3["tz"] - g2["tz"])
        total = diff12 + diff23

        if total >= 4:
            three_violations.append({
                "team": team,
                "triple_index_for_team": k,
                "date_1": g1["date"],
                "arena_1": g1["arena_team"],
                "tz_1": g1["tz"],
                "date_2": g2["date"],
                "arena_2": g2["arena_team"],
                "tz_2": g2["tz"],
                "date_3": g3["date"],
                "arena_3": g3["arena_team"],
                "tz_3": g3["tz"],
                "jump_1_2": diff12,
                "jump_2_3": diff23,
                "sum_jumps": total
            })

three_viol_df = pd.DataFrame(three_violations)

print("Number of 3-consecutive violations:", len(three_viol_df))

if not three_viol_df.empty:
    three_viol_df = three_viol_df.sort_values(by=["team", "date_1"]).reset_index(drop=True)
    display(three_viol_df)
else:
    print("No 3-consecutive violations found.")

# OPTIONAL: Save results
three_viol_df.to_csv("three_consecutive_violations.csv", index=False)
print("Saved as three_consecutive_violations.csv")


Number of 3-consecutive violations: 22


Unnamed: 0,team,triple_index_for_team,date_1,arena_1,tz_1,date_2,arena_2,tz_2,date_3,arena_3,tz_3,jump_1_2,jump_2_3,sum_jumps
0,Boston Celtics,0,2025-11-01,Boston Celtics,0,2025-11-03,Los Angeles Lakers,3,2025-11-05,Denver Nuggets,2,3,1,4
1,Brooklyn Nets,6,2025-11-15,New York Knicks,0,2025-11-17,Los Angeles Lakers,3,2025-11-19,Phoenix Suns,2,3,1,4
2,Cleveland Cavaliers,3,2025-11-07,Cleveland Cavaliers,0,2025-11-11,Los Angeles Lakers,3,2025-11-13,Phoenix Suns,2,3,1,4
3,Cleveland Cavaliers,10,2025-11-23,Cleveland Cavaliers,0,2025-11-27,Golden State Warriors,3,2025-11-28,Cleveland Cavaliers,0,3,3,6
4,Denver Nuggets,0,2025-11-01,Brooklyn Nets,0,2025-11-03,Golden State Warriors,3,2025-11-05,Denver Nuggets,2,3,1,4
5,Golden State Warriors,2,2025-11-05,Golden State Warriors,3,2025-11-07,Atlanta Hawks,0,2025-11-11,Golden State Warriors,3,3,3,6
6,Golden State Warriors,3,2025-11-07,Atlanta Hawks,0,2025-11-11,Golden State Warriors,3,2025-11-13,Miami Heat,0,3,3,6
7,Golden State Warriors,4,2025-11-11,Golden State Warriors,3,2025-11-13,Miami Heat,0,2025-11-15,Golden State Warriors,3,3,3,6
8,Golden State Warriors,10,2025-11-23,Dallas Mavericks,1,2025-11-27,Golden State Warriors,3,2025-11-28,Chicago Bulls,1,2,2,4
9,Los Angeles Lakers,2,2025-11-05,Los Angeles Lakers,3,2025-11-07,Dallas Mavericks,1,2025-11-11,Los Angeles Lakers,3,2,2,4


Saved as three_consecutive_violations.csv


The original schedule had 22 violates, meaning our new schedule (16 violations) have improved.

### Check if time zones have improved

In [None]:
def build_team_games_from_schedule(schedule_df, tz_team):
    """
    schedule_df must have columns: date, home_team, away_team
    tz_team: dict mapping team -> time zone index
    """
    teams = list(tz_team.keys())
    team_games = {t: [] for t in teams}

    for _, row in schedule_df.iterrows():
        d = row["date"]
        home = row["home_team"]
        away = row["away_team"]

        # Home team plays in its own arena
        if home in teams:
            team_games[home].append({
                "date": d,
                "arena_team": home,
                "tz": tz_team[home]
            })

        # Away team plays in the home team's arena
        if away in teams:
            team_games[away].append({
                "date": d,
                "arena_team": home,
                "tz": tz_team[home]
            })

    # Sort each team's games by date
    for t in teams:
        team_games[t] = sorted(team_games[t], key=lambda x: x["date"])

    return team_games


def compute_tz_metrics(team_games):
    """
    team_games: dict team -> list of dicts with keys: date, arena_team, tz
    Returns summary metrics about time-zone jumps.
    """
    total_pair_jump = 0
    num_pairs = 0

    total_triple_sum = 0
    num_triples = 0

    num_triple_violations = 0  # sum_jumps >= 4

    for team, games in team_games.items():
        n = len(games)
        if n < 2:
            continue

        # Pair jumps (between consecutive games)
        for k in range(n - 1):
            tz1 = games[k]["tz"]
            tz2 = games[k + 1]["tz"]
            total_pair_jump += abs(tz2 - tz1)
            num_pairs += 1

        # Triple windows
        for k in range(n - 2):
            tz1 = games[k]["tz"]
            tz2 = games[k + 1]["tz"]
            tz3 = games[k + 2]["tz"]

            diff12 = abs(tz2 - tz1)
            diff23 = abs(tz3 - tz2)
            total = diff12 + diff23

            total_triple_sum += total
            num_triples += 1

            if total >= 4:
                num_triple_violations += 1

    avg_pair_jump = total_pair_jump / num_pairs if num_pairs > 0 else 0.0
    avg_triple_sum = total_triple_sum / num_triples if num_triples > 0 else 0.0

    return {
        "avg_pair_jump": avg_pair_jump,
        "avg_triple_sum": avg_triple_sum,
        "num_triple_violations": num_triple_violations,
    }


In [None]:
# Make sure original df is sorted and has date, home_team, away_team
df = df.sort_values("date").reset_index(drop=True)

# Build team game sequences for original schedule
orig_team_games = build_team_games_from_schedule(
    df[["date", "home_team", "away_team"]],
    tz_team
)
orig_metrics = compute_tz_metrics(orig_team_games)

# Build team game sequences for optimized schedule
opt_team_games = build_team_games_from_schedule(
    schedule_df[["date", "home_team", "away_team"]],
    tz_team
)
opt_metrics = compute_tz_metrics(opt_team_games)

print("=== Time-zone metrics comparison ===")
print("Original schedule:")
for k, v in orig_metrics.items():
    print(f"  {k}: {v}")

print("\nOptimized schedule:")
for k, v in opt_metrics.items():
    print(f"  {k}: {v}")

print("\nDifferences (optimized - original):")
for k in orig_metrics:
    diff = opt_metrics[k] - orig_metrics[k]
    print(f"  {k}: {diff}")


=== Time-zone metrics comparison ===
Original schedule:
  avg_pair_jump: 0.8333333333333334
  avg_triple_sum: 1.5803571428571428
  num_triple_violations: 22

Optimized schedule:
  avg_pair_jump: 0.8
  avg_triple_sum: 1.5267857142857142
  num_triple_violations: 16

Differences (optimized - original):
  avg_pair_jump: -0.033333333333333326
  avg_triple_sum: -0.0535714285714286
  num_triple_violations: -6
