In [1]:
import pandas as pd
from sklearn import linear_model
from pulp import LpMaximize, LpProblem, LpVariable

### Read data

In [2]:
df = pd.read_csv("stint_data.csv")
df

Unnamed: 0,game_id,h_team,a_team,minutes,h_goals,a_goals,home1,home2,home3,home4,away1,away2,away3,away4
0,1,USA,Japan,4.252969,4,9,USA_p4,USA_p1,USA_p3,USA_p6,Japan_p12,Japan_p4,Japan_p8,Japan_p6
1,1,USA,Japan,5.688809,6,11,USA_p1,USA_p7,USA_p6,USA_p5,Japan_p7,Japan_p10,Japan_p8,Japan_p12
2,1,USA,Japan,1.149557,0,1,USA_p8,USA_p1,USA_p4,USA_p5,Japan_p5,Japan_p3,Japan_p7,Japan_p10
3,1,USA,Japan,3.511617,7,5,USA_p2,USA_p7,USA_p8,USA_p6,Japan_p2,Japan_p4,Japan_p10,Japan_p12
4,1,USA,Japan,2.163139,7,5,USA_p9,USA_p5,USA_p8,USA_p7,Japan_p8,Japan_p6,Japan_p10,Japan_p1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7443,660,Chile,Argentina,2.583097,6,2,Chile_p1,Chile_p7,Chile_p12,Chile_p2,Argentina_p12,Argentina_p10,Argentina_p2,Argentina_p1
7444,660,Chile,Argentina,1.039052,4,3,Chile_p8,Chile_p2,Chile_p5,Chile_p12,Argentina_p10,Argentina_p6,Argentina_p11,Argentina_p3
7445,660,Chile,Argentina,1.599023,3,0,Chile_p5,Chile_p7,Chile_p4,Chile_p1,Argentina_p4,Argentina_p2,Argentina_p1,Argentina_p3
7446,660,Chile,Argentina,3.841087,6,13,Chile_p8,Chile_p6,Chile_p2,Chile_p11,Argentina_p3,Argentina_p9,Argentina_p10,Argentina_p8


### Filter stints that involved Canada 

In [3]:
df = df[(df['h_team'] == "Canada") | (df['a_team'] == "Canada")].copy().reset_index(drop=True)
df

Unnamed: 0,game_id,h_team,a_team,minutes,h_goals,a_goals,home1,home2,home3,home4,away1,away2,away3,away4
0,3,USA,Canada,0.633517,0,0,USA_p9,USA_p4,USA_p2,USA_p12,Canada_p7,Canada_p4,Canada_p10,Canada_p6
1,3,USA,Canada,2.089695,4,3,USA_p9,USA_p12,USA_p10,USA_p8,Canada_p11,Canada_p6,Canada_p10,Canada_p5
2,3,USA,Canada,0.701476,1,1,USA_p5,USA_p7,USA_p4,USA_p11,Canada_p2,Canada_p6,Canada_p5,Canada_p9
3,3,USA,Canada,2.718043,0,3,USA_p4,USA_p11,USA_p10,USA_p2,Canada_p7,Canada_p11,Canada_p12,Canada_p6
4,3,USA,Canada,6.470463,6,3,USA_p6,USA_p8,USA_p10,USA_p11,Canada_p8,Canada_p10,Canada_p1,Canada_p2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1258,653,Chile,Canada,0.217044,0,0,Chile_p3,Chile_p6,Chile_p7,Chile_p12,Canada_p3,Canada_p7,Canada_p9,Canada_p2
1259,653,Chile,Canada,5.690802,6,11,Chile_p12,Chile_p8,Chile_p1,Chile_p4,Canada_p1,Canada_p11,Canada_p12,Canada_p9
1260,653,Chile,Canada,2.080188,1,3,Chile_p2,Chile_p6,Chile_p5,Chile_p11,Canada_p8,Canada_p2,Canada_p11,Canada_p3
1261,653,Chile,Canada,1.147784,1,0,Chile_p12,Chile_p3,Chile_p2,Chile_p5,Canada_p11,Canada_p12,Canada_p8,Canada_p10


### Plus-minus per stint

In [4]:
df["plus-minus"] = df["h_goals"] - df["a_goals"]
df = df.drop(columns=["h_goals", "a_goals"])
df

Unnamed: 0,game_id,h_team,a_team,minutes,home1,home2,home3,home4,away1,away2,away3,away4,plus-minus
0,3,USA,Canada,0.633517,USA_p9,USA_p4,USA_p2,USA_p12,Canada_p7,Canada_p4,Canada_p10,Canada_p6,0
1,3,USA,Canada,2.089695,USA_p9,USA_p12,USA_p10,USA_p8,Canada_p11,Canada_p6,Canada_p10,Canada_p5,1
2,3,USA,Canada,0.701476,USA_p5,USA_p7,USA_p4,USA_p11,Canada_p2,Canada_p6,Canada_p5,Canada_p9,0
3,3,USA,Canada,2.718043,USA_p4,USA_p11,USA_p10,USA_p2,Canada_p7,Canada_p11,Canada_p12,Canada_p6,-3
4,3,USA,Canada,6.470463,USA_p6,USA_p8,USA_p10,USA_p11,Canada_p8,Canada_p10,Canada_p1,Canada_p2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1258,653,Chile,Canada,0.217044,Chile_p3,Chile_p6,Chile_p7,Chile_p12,Canada_p3,Canada_p7,Canada_p9,Canada_p2,0
1259,653,Chile,Canada,5.690802,Chile_p12,Chile_p8,Chile_p1,Chile_p4,Canada_p1,Canada_p11,Canada_p12,Canada_p9,-5
1260,653,Chile,Canada,2.080188,Chile_p2,Chile_p6,Chile_p5,Chile_p11,Canada_p8,Canada_p2,Canada_p11,Canada_p3,-2
1261,653,Chile,Canada,1.147784,Chile_p12,Chile_p3,Chile_p2,Chile_p5,Canada_p11,Canada_p12,Canada_p8,Canada_p10,1


### Identifying players that played on each stint

In [5]:
all_players = pd.unique(
    df[['home1', 'home2', 'home3', 'home4', 'away1', 'away2', 'away3', 'away4']].values.ravel()
)
all_players = sorted(all_players.tolist())

canadian_players = sorted([player for player in all_players if "Canada" in player])
canadian_players

df[canadian_players] = 0

for col in canadian_players:
    
    player_playing = (df["home1"].str.contains(col)) | \
        (df["home2"].str.contains(col)) | \
        (df["home3"].str.contains(col)) | \
        (df["home4"].str.contains(col)) | \
        (df["away1"].str.contains(col)) | \
        (df["away2"].str.contains(col)) | \
        (df["away3"].str.contains(col)) | \
        (df["away4"].str.contains(col))
    
    df.loc[player_playing, [col]] = 1
    
df = df.drop(columns=["home1", "home2", "home3", "home4", "away1", "away2", "away3", "away4"])

df

Unnamed: 0,game_id,h_team,a_team,minutes,plus-minus,Canada_p1,Canada_p10,Canada_p11,Canada_p12,Canada_p2,Canada_p3,Canada_p4,Canada_p5,Canada_p6,Canada_p7,Canada_p8,Canada_p9
0,3,USA,Canada,0.633517,0,1,1,0,0,0,0,1,0,1,1,0,0
1,3,USA,Canada,2.089695,1,1,1,1,0,0,0,0,1,1,0,0,0
2,3,USA,Canada,0.701476,0,0,0,0,0,1,0,0,1,1,0,0,1
3,3,USA,Canada,2.718043,-3,1,0,1,1,0,0,0,0,1,1,0,0
4,3,USA,Canada,6.470463,3,1,1,0,0,1,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1258,653,Chile,Canada,0.217044,0,0,0,0,0,1,1,0,0,0,1,0,1
1259,653,Chile,Canada,5.690802,-5,1,0,1,1,0,0,0,0,0,0,0,1
1260,653,Chile,Canada,2.080188,-2,1,0,1,0,1,1,0,0,0,0,1,0
1261,653,Chile,Canada,1.147784,1,1,1,1,1,0,0,0,0,0,0,1,0


### Canada plus-minus per 100-minutes

In [6]:
df["canada-plus-minus-per-100-minutes"] = df["plus-minus"] / df["minutes"] * 100
df.loc[df["a_team"] == "Canada", "canada-plus-minus-per-100-minutes"] = -df["canada-plus-minus-per-100-minutes"]

df = df.drop(columns=["h_team", "a_team", "plus-minus", "minutes", "game_id"])
df

Unnamed: 0,Canada_p1,Canada_p10,Canada_p11,Canada_p12,Canada_p2,Canada_p3,Canada_p4,Canada_p5,Canada_p6,Canada_p7,Canada_p8,Canada_p9,canada-plus-minus-per-100-minutes
0,1,1,0,0,0,0,1,0,1,1,0,0,-0.000000
1,1,1,1,0,0,0,0,1,1,0,0,0,-47.853864
2,0,0,0,0,1,0,0,1,1,0,0,1,-0.000000
3,1,0,1,1,0,0,0,0,1,1,0,0,110.373549
4,1,1,0,0,1,0,0,0,0,0,1,0,-46.364532
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1258,0,0,0,0,1,1,0,0,0,1,0,1,-0.000000
1259,1,0,1,1,0,0,0,0,0,0,0,1,87.861082
1260,1,0,1,0,1,1,0,0,0,0,1,0,96.145155
1261,1,1,1,1,0,0,0,0,0,0,1,0,-87.124437


### Calculate APMs

In [7]:
X = df.drop(columns=["canada-plus-minus-per-100-minutes"])
y = df["canada-plus-minus-per-100-minutes"]

lm = linear_model.LinearRegression()
model = lm.fit(X,y)

APMs_list = list(zip(model.coef_, model.feature_names_in_))
can_players = [i[1] for i in APMs_list]
can_APMs = [i[0] for i in APMs_list]

df_APMs = pd.DataFrame({"player": can_players, "APM": can_APMs})

df_APMs.sort_values("APM", ascending=False, inplace=True)
df_APMs

Unnamed: 0,player,APM
11,Canada_p9,38.988314
2,Canada_p11,31.156465
0,Canada_p1,19.281929
10,Canada_p8,18.040159
4,Canada_p2,12.153952
9,Canada_p7,11.516379
7,Canada_p5,11.458567
5,Canada_p3,2.862473
8,Canada_p6,-12.642596
1,Canada_p10,-22.254517


### APMs per fatigue level

In [8]:
relative_performances = [1.25, 1.0, 0.75, 0.5]

df_APMs = df_APMs[df_APMs["APM"] > 0].copy()
for performance in relative_performances:
    
    df_APMs[f"APM_{performance}"] = performance * df_APMs["APM"]
    
df_APMs.drop(columns=["APM"], inplace=True)

df_APMs

Unnamed: 0,player,APM_1.25,APM_1.0,APM_0.75,APM_0.5
11,Canada_p9,48.735392,38.988314,29.241235,19.494157
2,Canada_p11,38.945582,31.156465,23.367349,15.578233
0,Canada_p1,24.102412,19.281929,14.461447,9.640965
10,Canada_p8,22.550199,18.040159,13.530119,9.02008
4,Canada_p2,15.19244,12.153952,9.115464,6.076976
9,Canada_p7,14.395473,11.516379,8.637284,5.758189
7,Canada_p5,14.323209,11.458567,8.593925,5.729284
5,Canada_p3,3.578091,2.862473,2.146854,1.431236


### Read Players Scores

In [9]:
df_players = pd.read_csv("player_data.csv")
df_players

Unnamed: 0,player,rating
0,USA_p1,3.0
1,USA_p2,3.0
2,USA_p3,3.5
3,USA_p4,0.0
4,USA_p5,2.0
...,...,...
139,Chile_p8,2.0
140,Chile_p9,0.5
141,Chile_p10,0.5
142,Chile_p11,0.0


### Filter Canadian players

In [10]:
df_players = df_players[df_players["player"].str.contains("Canada")]
df_players

Unnamed: 0,player,rating
36,Canada_p1,3.0
37,Canada_p2,0.0
38,Canada_p3,2.0
39,Canada_p4,2.0
40,Canada_p5,3.5
41,Canada_p6,1.5
42,Canada_p7,3.0
43,Canada_p8,3.5
44,Canada_p9,2.5
45,Canada_p10,1.0


### Simulation DataFrame

In [11]:
df_simulation = df_APMs.copy()
df_simulation["current_APM"] = df_simulation["APM_1.25"]
df_simulation[["minutes_played", "is_playing"]] = 0
df_simulation = df_simulation.merge(df_players, how='inner', on=["player"])
df_simulation

Unnamed: 0,player,APM_1.25,APM_1.0,APM_0.75,APM_0.5,current_APM,minutes_played,is_playing,rating
0,Canada_p9,48.735392,38.988314,29.241235,19.494157,48.735392,0,0,2.5
1,Canada_p11,38.945582,31.156465,23.367349,15.578233,38.945582,0,0,2.0
2,Canada_p1,24.102412,19.281929,14.461447,9.640965,24.102412,0,0,3.0
3,Canada_p8,22.550199,18.040159,13.530119,9.02008,22.550199,0,0,3.5
4,Canada_p2,15.19244,12.153952,9.115464,6.076976,15.19244,0,0,0.0
5,Canada_p7,14.395473,11.516379,8.637284,5.758189,14.395473,0,0,3.0
6,Canada_p5,14.323209,11.458567,8.593925,5.729284,14.323209,0,0,3.5
7,Canada_p3,3.578091,2.862473,2.146854,1.431236,3.578091,0,0,2.0


### Simulation functions

In [12]:
def choose_players(df_simulation):

    # Create the model
    model = LpProblem(name="Maximize-APM", sense=LpMaximize)

    # Define variables
    variables = {
        player: {
            "decision_variable": LpVariable(name=player, cat="Binary"),
            "rating": df_simulation.loc[df_simulation["player"] == player, ["rating"]].iloc[0][0],
            "current_APM": df_simulation.loc[df_simulation["player"] == player, ["current_APM"]].iloc[0][0]
        } for player in df_simulation["player"].unique()
    }

    # Define objective function
    expression = 0
    for player in variables.keys():
        expression += variables[player]["decision_variable"] * variables[player]["current_APM"]

    # Define constraints
    constraint_1 = 0
    for player in variables.keys():
        constraint_1 += variables[player]["decision_variable"]

    constraint_1 = constraint_1 == 4

    constraint_2 = 0
    for player in variables.keys():
        constraint_2 += variables[player]["decision_variable"] * variables[player]["rating"]

    constraint_2 = constraint_2 <= 8

    model += expression
    model += constraint_1
    model += constraint_2

    status = model.solve()

    if status != 1:
        return "ERROR"

    chosen_players = {var.name for var in model.variables() if var.value() == 1}

    return chosen_players

def update_APM(row):
    
    if row["minutes_played"] == 6:
        return row["APM_1.0"]
    
    if row["minutes_played"] == 12:
        return row["APM_0.75"]
    
    if row["minutes_played"] == 24:
        return row["APM_0.5"]
    
    return row["current_APM"]

### Simulation

In [13]:
minutes_passed = 0
lineup_changes = {}
current_lineup = None

while minutes_passed < 120:
    
    chosen_players = choose_players(df_simulation)
    
    if chosen_players != current_lineup:
        lineup_changes[minutes_passed] = chosen_players
        
    current_lineup = chosen_players
    
    df_simulation["is_playing"] = 0
    df_simulation.loc[df_simulation["player"].isin(chosen_players), ["is_playing"]] = 1
        
    minutes_passed += 1
    
    df_simulation.loc[df_simulation["player"].isin(chosen_players), ["minutes_played"]] += 1
    
    df_simulation["current_APM"] = df_simulation.apply(update_APM, axis=1)
    
df_simulation

Unnamed: 0,player,APM_1.25,APM_1.0,APM_0.75,APM_0.5,current_APM,minutes_played,is_playing,rating
0,Canada_p9,48.735392,38.988314,29.241235,19.494157,19.494157,120,1,2.5
1,Canada_p11,38.945582,31.156465,23.367349,15.578233,15.578233,120,1,2.0
2,Canada_p1,24.102412,19.281929,14.461447,9.640965,9.640965,72,1,3.0
3,Canada_p8,22.550199,18.040159,13.530119,9.02008,9.02008,24,0,3.5
4,Canada_p2,15.19244,12.153952,9.115464,6.076976,6.076976,120,1,0.0
5,Canada_p7,14.395473,11.516379,8.637284,5.758189,8.637284,12,0,3.0
6,Canada_p5,14.323209,11.458567,8.593925,5.729284,8.593925,12,0,3.5
7,Canada_p3,3.578091,2.862473,2.146854,1.431236,3.578091,0,0,2.0


### Substitutions per minute

In [14]:
lineup_changes

{0: {'Canada_p1', 'Canada_p11', 'Canada_p2', 'Canada_p9'},
 6: {'Canada_p11', 'Canada_p2', 'Canada_p8', 'Canada_p9'},
 12: {'Canada_p1', 'Canada_p11', 'Canada_p2', 'Canada_p9'},
 18: {'Canada_p11', 'Canada_p2', 'Canada_p8', 'Canada_p9'},
 24: {'Canada_p1', 'Canada_p11', 'Canada_p2', 'Canada_p9'},
 36: {'Canada_p11', 'Canada_p2', 'Canada_p7', 'Canada_p9'},
 42: {'Canada_p11', 'Canada_p2', 'Canada_p5', 'Canada_p9'},
 48: {'Canada_p11', 'Canada_p2', 'Canada_p8', 'Canada_p9'},
 60: {'Canada_p11', 'Canada_p2', 'Canada_p7', 'Canada_p9'},
 66: {'Canada_p11', 'Canada_p2', 'Canada_p5', 'Canada_p9'},
 72: {'Canada_p1', 'Canada_p11', 'Canada_p2', 'Canada_p9'}}