In [1]:
import pandas as pd
import numpy as np

In [2]:
df_companies = pd.read_csv('Companies.csv')
df_students = pd.read_csv('Students.csv')

df_companies.drop(['Company','Project_Title'], axis=1,inplace=True)
df_students.drop(['EID','GPA', 'Partner_Importance', 'Partner_EID'], axis=1,inplace=True)
col_index = df_students.columns.get_loc('I1')
df_students = df_students.iloc[:, :col_index]

df_companies.drop(['Hardware', 'Software'], axis = 1,inplace=True)
df_students.drop(['Hardware, Software, or Both','Honors','SP'], axis=1,inplace=True)

In [3]:
np_companies = df_companies.iloc[:,1:].fillna(0).astype(int).to_numpy()
np_students = df_students.iloc[:,1:].fillna(0).astype(int).to_numpy()

In [5]:
affinity_matrix = np.dot(np_companies, np_students.T)
np.set_printoptions(threshold=np.inf)
print(affinity_matrix)

[[229 297 292 244 277 307 299 267 250 335 259 288 279 234 313 264 275 294
  236 180 294 194 324 295 321 239 358 295 269 310 285 408 260 198 208 330
  314 305 173 279 267 289 372 241 282 264 321 292 319 318 279 296 341 240
  254 331 211 326 253 324 335 346 279 215 305 349 296 322 296 227 282 314
  239 255 283 392 243 168 247 323 329 303 276 322 307 302 307 357 246 294
  147 295 187 220 245 291 207 313 326]
 [288 385 369 293 345 387 365 339 314 437 320 352 328 280 366 325 347 357
  289 236 361 250 394 360 405 284 430 353 316 377 371 499 326 254 255 401
  375 371 210 333 324 371 446 306 367 329 416 352 396 381 327 332 397 318
  293 407 263 396 294 395 406 412 338 248 365 406 370 389 369 300 343 405
  305 315 360 469 288 201 297 412 390 376 336 406 366 382 381 439 298 336
  202 355 240 272 313 335 250 366 402]
 [283 408 377 330 374 384 365 343 311 433 316 338 350 283 371 312 363 371
  304 222 370 235 414 376 404 310 446 357 326 399 377 515 330 242 266 420
  390 368 212 349 339 386 457 333 

In [4]:
from ortools.sat.python import cp_model
import numpy as np


# Assuming np_students and np_companies are defined as in your notebook:
# np_companies = df_companies.iloc[:,1:].fillna(0).astype(int).to_numpy()
# np_students = df_students.iloc[:,1:].fillna(0).astype(int).to_numpy()

n_students = np_students.shape[0]
n_teams = np_companies.shape[0]
n_skills = np_students.shape[1]

model = cp_model.CpModel()

# Decision variables: assignment[(i, t)] is True if student i is assigned to team t.
assignment = {}
for i in range(n_students):
    for t in range(n_teams):
        assignment[(i, t)] = model.NewBoolVar(f"assign_{i}_{t}")

# Each student is assigned to exactly one team.
for i in range(n_students):
    model.Add(sum(assignment[(i, t)] for t in range(n_teams)) == 1)

# Enforce team size constraints: each team must have between 5 and 7 students.
for t in range(n_teams):
    model.Add(sum(assignment[(i, t)] for i in range(n_students)) >= 3)
    model.Add(sum(assignment[(i, t)] for i in range(n_students)) <= 5)

# Calculate the goodness fit for each team.
team_goodness = {}
affinity_matrix = np.dot(np_companies, np_students.T)
for t in range(n_teams):
    team_goodness[t] = model.NewIntVar(0, 1000000, f"team_goodness_{t}")
    terms = [assignment[(i, t)] * int(affinity_matrix[t][i]) for i in range(n_students)]
    model.Add(team_goodness[t] == sum(terms))

# team_goodness = {}
# for t in range(n_teams):
#     team_goodness[t] = model.NewIntVar(0, 1000000, f"team_goodness_{t}")
#     terms = []
#     for i in range(n_students):
#         # Compute dot product for student i and team t.
#         dot_product = sum(np_students[i][s] * np_companies[t][s] for s in range(n_skills))
#         terms.append(assignment[(i, t)] * dot_product)
#     model.Add(team_goodness[t] == sum(terms))

# Total goodness across all teams.
total_goodness = model.NewIntVar(0, 1000000, "total_goodness")
model.Add(total_goodness == sum(team_goodness[t] for t in range(n_teams)))

# Objective: maximize the total goodness.
model.Maximize(total_goodness)

# Solve the model.
solver = cp_model.CpSolver()
solver.parameters.log_search_progress = True
solver.log_callback = print
solver.parameters.max_time_in_seconds = 60*10
status = solver.Solve(model)

if status in (cp_model.OPTIMAL, cp_model.FEASIBLE):
    print("Total Goodness:", solver.Value(total_goodness))
    for t in range(n_teams):
        team_students = [i for i in range(n_students) if solver.Value(assignment[(i, t)]) == 1]
        print(f"Team {t}: Students: {team_students}, Goodness: {solver.Value(team_goodness[t])}")
else:
    print("No solution found.")


Starting CP-SAT solver v9.11.4210
Parameters: max_time_in_seconds: 600 log_search_progress: true
Setting number of workers to 32

Initial optimization model '': (model_fingerprint: 0xb903e07444929100)
#Variables: 2'601 (#ints: 1 in objective)
  - 2'574 Booleans in [0,1]
  - 27 in [0,1000000]
#kLinearN: 178 (#terms: 10'349)

Starting presolve at 0.00s
  4.66e-04s  0.00e+00d  [DetectDominanceRelations] 
  4.25e-03s  0.00e+00d  [operations_research::sat::CpModelPresolver::PresolveToFixPoint] #num_loops=2 #num_dual_strengthening=1 
  1.85e-05s  0.00e+00d  [operations_research::sat::CpModelPresolver::ExtractEncodingFromLinear] #potential_supersets=99 
[Symmetry] Graph for symmetry has 2'752 nodes and 7'722 arcs.
[Symmetry] Symmetry computation done. time: 0.0002605 dtime: 0.00061967
  5.21e-04s  0.00e+00d  [operations_research::sat::CpModelPresolver::DetectDuplicateConstraintsWithDifferentEnforcements] 
  8.55e-03s  8.04e-03d  [operations_research::sat::CpModelPresolver::Probe] #probed=5'1

In [5]:
from ortools.sat.python import cp_model
import numpy as np

# Assuming np_students and np_companies are defined as in your notebook:
# np_companies = df_companies.iloc[:,1:].fillna(0).astype(int).to_numpy()
# np_students = df_students.iloc[:,1:].fillna(0).astype(int).to_numpy()

n_students = np_students.shape[0]
n_teams = np_companies.shape[0]
n_skills = np_students.shape[1]

model = cp_model.CpModel()

# Decision variables: assignment[(i, t)] is True if student i is assigned to team t.
assignment = {}
for i in range(n_students):
    for t in range(n_teams):
        assignment[(i, t)] = model.NewBoolVar(f"assign_{i}_{t}")

# Each student is assigned to exactly one team.
for i in range(n_students):
    model.Add(sum(assignment[(i, t)] for t in range(n_teams)) == 1)

# Enforce team size constraints: each team must have between 5 and 7 students.
for t in range(n_teams):
    model.Add(sum(assignment[(i, t)] for i in range(n_students)) >= 3)
    model.Add(sum(assignment[(i, t)] for i in range(n_students)) <= 5)

# Calculate the goodness fit for each team.
# Calculate the goodness fit for each team.
team_goodness = {}
affinity_matrix = np.dot(np_companies, np_students.T)
for t in range(n_teams):
    team_goodness[t] = model.NewIntVar(0, 1000000, f"team_goodness_{t}")
    terms = [assignment[(i, t)] * int(affinity_matrix[t][i]) for i in range(n_students)]
    model.Add(team_goodness[t] == sum(terms))
# for t in range(n_teams):
#     team_goodness[t] = model.NewIntVar(0, 1000000, f"team_goodness_{t}")
#     terms = []
#     for i in range(n_students):
#         # Compute dot product for student i and team t.
#         dot_product = sum(np_students[i][s] * np_companies[t][s] for s in range(n_skills))
#         terms.append(assignment[(i, t)] * dot_product)
#     model.Add(team_goodness[t] == sum(terms))

# Define the minimum goodness across all teams.
min_goodness = model.NewIntVar(0, 1000000, "min_goodness")
for t in range(n_teams):
    model.Add(team_goodness[t] >= min_goodness)

# Objective: maximize the minimum team goodness.
model.Maximize(min_goodness)

# Solve the model.
solver = cp_model.CpSolver()
solver.parameters.log_search_progress = True
solver.log_callback = print
solver.parameters.max_time_in_seconds = 60*15
status = solver.Solve(model)

if status in (cp_model.OPTIMAL, cp_model.FEASIBLE):
    print("Minimum Goodness:", solver.Value(min_goodness))
    for t in range(n_teams):
        team_students = [i for i in range(n_students) if solver.Value(assignment[(i, t)]) == 1]
        print(f"Team {t}: Students: {team_students}, Goodness: {solver.Value(team_goodness[t])}")
else:
    print("No solution found.")


Starting CP-SAT solver v9.11.4210
Parameters: max_time_in_seconds: 900 log_search_progress: true
Setting number of workers to 32

Initial optimization model '': (model_fingerprint: 0xea2c0b7d08a46b73)
#Variables: 2'601 (#ints: 1 in objective)
  - 2'574 Booleans in [0,1]
  - 27 in [0,1000000]
#kLinear2: 26
#kLinearN: 177 (#terms: 10'322)

Starting presolve at 0.00s
  5.37e-04s  0.00e+00d  [DetectDominanceRelations] 
  4.56e-03s  0.00e+00d  [operations_research::sat::CpModelPresolver::PresolveToFixPoint] #num_loops=3 #num_dual_strengthening=1 
  2.75e-05s  0.00e+00d  [operations_research::sat::CpModelPresolver::ExtractEncodingFromLinear] #potential_supersets=99 
[Symmetry] Graph for symmetry has 5'353 nodes and 12'897 arcs.
[Symmetry] Symmetry computation done. time: 0.0008879 dtime: 0.00176828
  6.69e-05s  0.00e+00d  [operations_research::sat::CpModelPresolver::DetectDuplicateConstraintsWithDifferentEnforcements] 
  2.09e-02s  1.54e-02d  [operations_research::sat::CpModelPresolver::Pro

In [9]:
import sys
def custom_log(msg):
    sys.stdout.write(msg)
    sys.stdout.flush()

In [None]:
print(np_students.shape)
print(np_companies.shape)