In [142]:
import json
import pandas as pd
import gurobipy as gp
from datetime import datetime, timedelta

In [143]:
OBP_AVG = 0.327
SLG_AVG = 0.365

FPCT_AVG = {
    '2': 0.992,
    '3': 0.993,
    '4': 0.977, 
    '5': 0.929,
    '6': 0.964,
    '7': 0.977,
    '8': 0.988,
    '9': 0.976
}

POSITION_WEIGHT = {
    '2': {"batting": 0.95, "fielding": 1.05},
    '3': {"batting": 1.21, "fielding": 0.83},
    '4': {"batting": 0.97, "fielding": 1.03},
    '5': {"batting": 0.96, "fielding": 1.04},
    '6': {"batting": 0.94, "fielding": 1.06},
    '7': {"batting": 1.02, "fielding": 0.98},
    '8': {"batting": 1.04, "fielding": 0.96},
    '9': {"batting": 1.17, "fielding": 0.85},
}

OPPONENT_WEIGHT = {
    "中信兄弟": 1.15,
    "統一7-ELEVEn獅": 1.11,
    "樂天桃猿": 0.96, 
    "富邦悍將": 0.93,
    "味全龍": 0.85
}

dataRoot = "../clean/clean_all_"
playerRoot = "../球員異動/"

teamNameMap = {
    "中信兄弟": "brothers", 
    "味全龍": "dragons",
    "富邦悍將": "guardians", 
    "統一7-ELEVEn獅": "lions",
    "樂天桃猿": "monkeys"
}

### Import Data

In [144]:
class Player:
    def __init__(self, name, data):
        self.name = name
        self.data = data

In [145]:
allData = {}
for team in teamNameMap.keys():
    with open(dataRoot + team + ".json", encoding="utf-8") as f:
        temp = []
        for line in f:
            player = json.loads(line)
            temp.append(Player(player["name"], player["data"]))
        allData[teamNameMap[team]] = temp

brothers = allData["brothers"]
dragons = allData["dragons"]
guardians = allData["guardians"]
lions = allData["lions"]
monkeys = allData["monkeys"]

In [146]:
playerList = {}

for team in teamNameMap.keys():
    tempDict = pd.read_csv(playerRoot + team + ".csv", header=0, index_col=0).to_dict()
    tempDict = {
        datetime.strptime(key, "%Y-%m-%d").date(): list(filter(lambda d: pd.isna(d) == False, list(value.values())))
        for key, value in tempDict.items()
    }
    playerList[teamNameMap[team]] = tempDict

In [147]:
def createDate(mm, dd):
    return datetime(2022, mm, dd, 0, 0, 0).date()

### Self-defined Variables

In [148]:
aTeam = "味全龍"
aMonth = "May"
aGame = [
    {
        "date": createDate(4, 12),
        "field": "洲際", 
        "oppo": "中信兄弟", 
        "pitcher": "鄭凱文"
    }, 
    {
        "date":createDate(4, 14), 
        "field": "澄清湖", 
        "oppo": "統一7-ELEVEn獅", 
        "pitcher": "胡智為"
    }, 
    {
        "date": createDate(4, 15), 
        "field": "天母", 
        "oppo": "樂天桃猿", 
        "pitcher": "陳冠宇"
    }, 
    {
        "date": createDate(4, 16), 
        "field": "天母", 
        "oppo": "樂天桃猿", 
        "pitcher": "霸林爵"
    }, 
    {
        "date": createDate(4, 17), 
        "field": "天母", 
        "oppo": "樂天桃猿", 
        "pitcher": "王溢正"
    }
]
# aStar = ["許基宏", "江坤宇"]
# aPotential = ["岳東華"]
# aInjured = ["周思齊"]

### Calculate Parameters

In [149]:
# 比賽當天 27 人名單去除投手
def findPlayerList(team, date):
    minDelta = timedelta(days=100)
    playerDay = date
    for key in playerList[teamNameMap[aTeam]].keys():
        if date > key:
            break
        if key - date < minDelta:
            minDelta = key - date
            playerDay = key
    
    withoutPitcherList = []
    for data in allData[teamNameMap[team]]:
        withoutPitcherList.append(data.name)
    return list(filter(lambda d: d in withoutPitcherList, playerList[teamNameMap[team]][playerDay]))
        

In [150]:
# A_{ij}
def calcA(player, pos):
    if str(pos) in player.data["fielding"]["pos"].keys():
        return 1
    else:
        return 0

In [151]:
# F_{ij}
def calcF(player, pos):
    if str(pos) in player.data["fielding"]["pos"].keys():
        F = float(player.data["fielding"]["pos"][str(pos)]["FPCT"]) / FPCT_AVG[str(pos)]
        return F
    else:
        return 0

In [152]:
# B_i
def calcOPS(OBP, SLG):
    return OBP / OBP_AVG + SLG / SLG_AVG - 1

def calcB(game, player):
    if game["date"].month == 4:
        aMonth = "Apr"
    if game["date"].month == 5:
        aMonth = "May"
    if game["date"].month == 6:
        aMonth = "Jun"
    aOppo = game["oppo"]
    aPitcher = game["pitcher"]
    aField = game["field"]

    OPSseason = float(player.data["batting"]["season"]["OPS+"])
    PAseason = int(player.data["batting"]["season"]["PA"])
    
    if "month" in player.data["batting"].keys():
        if aMonth in player.data["batting"]["month"].keys():
            OPSmonth = calcOPS(
                float(player.data["batting"]["month"][aMonth]["OBP"]), 
                float(player.data["batting"]["month"][aMonth]["TB"]) / float(player.data["batting"]["month"][aMonth]["AB"])
            )
            PAmonth = int(player.data["batting"]["month"][aMonth]["PA"])
        else:
            OPSmonth = OPSseason
            PAmonth = 0
    else:
        OPSmonth = OPSseason
        PAmonth = 0

    if "field" in player.data["batting"]:
        if aField in player.data["batting"]["field"].keys():
            OPSfield = calcOPS(
                float(player.data["batting"]["field"][aField]["OBP"]), 
                float(player.data["batting"]["field"][aField]["TB"]) / float(player.data["batting"]["field"][aField]["AB"])
            )
            PAfield = int(player.data["batting"]["field"][aField]["PA"])
        else:
            OPSfield = OPSseason
            PAfield = 0
    else:
        OPSfield = OPSseason
        PAfield = 0
    
    if "vsP" in player.data["batting"].keys():
        if aOppo in player.data["batting"]["vsP"]["data"].keys():
            if aPitcher in player.data["batting"]["vsP"]["data"][aOppo].keys():
                if player.data["batting"]["vsP"]["data"][aOppo][aPitcher]["AB"] != "0":
                    OPSvsp = calcOPS(
                        float(player.data["batting"]["vsP"]["data"][aOppo][aPitcher]["OBP"]), 
                        float(player.data["batting"]["vsP"]["data"][aOppo][aPitcher]["TB"]) / float(player.data["batting"]["vsP"]["data"][aOppo][aPitcher]["AB"])
                    )
                    PAvsp = int(player.data["batting"]["vsP"]["data"][aOppo][aPitcher]["PA"])
                else:
                    OPSvsp = OPSseason
                    PAvsp = 0
            else:
                OPSvsp = OPSseason
                PAvsp = 0
        else:
            OPSvsp = OPSseason
            PAvsp = 0
    else:
        OPSvsp = OPSseason
        PAvsp = 0

    B = (OPSseason * PAseason + OPSmonth * PAmonth + OPSfield * PAfield + OPSvsp * PAvsp) / 400
    return B

# V_{ij}
def calcV(game, player, pos):
    A = calcA(player, pos)
    B = calcB(game, player)
    F = calcF(player, pos)
    return (POSITION_WEIGHT[str(pos)]["batting"] * B + POSITION_WEIGHT[str(pos)]["fielding"] * F) * A

### Optimization

#### Stage 1

In [153]:
model = gp.Model("model")

# decision variables
x = []
for g in range(len(aGame)):
    todayPlayerList = findPlayerList(aTeam, aGame[g]["date"])
    singleGame = {}
    for i in range(len(todayPlayerList)):
        singlePlayer = []
        for j in range(2, 10):
            singlePlayer.append(model.addVar(lb=0, vtype="B", name="x" + str(g) + "-" + todayPlayerList[i] + "-" + str(j)))
        singleGame[todayPlayerList[i]] = singlePlayer
    x.append(singleGame)

# objective function
model.setObjective(gp.quicksum(
    gp.quicksum(
        gp.quicksum(
            OPPONENT_WEIGHT[aGame[g]["oppo"]] * calcV(aGame[g], list(filter(lambda d: d.name == thisPlayer, allData[teamNameMap[aTeam]]))[0], j) * x[g][thisPlayer][j - 2]
            for j in range(2, 10)
        ) for thisPlayer in findPlayerList(aTeam, aGame[g]["date"])
    ) for g in range(len(aGame))
), gp.GRB.MAXIMIZE)

# constraints
for g in range(len(aGame)):
    todayPlayerList = findPlayerList(aTeam, aGame[g]["date"])
    for thisPlayerName in todayPlayerList:
        thisPlayer = list(filter(lambda d: d.name == thisPlayerName, allData[teamNameMap[aTeam]]))[0]
        
        # constraint 1
        # model.addConstr(calcF(thisPlayer, 8) * x[g][thisPlayerName][8 - 2] >= calcF(thisPlayer, 7) * x[g][thisPlayerName][7 - 2])
        # constraint 2
        # model.addConstr(calcF(thisPlayer, 8) * x[g][thisPlayerName][8 - 2] >= calcF(thisPlayer, 9) * x[g][thisPlayerName][9 - 2])
        
        # constraint 4
        model.addConstr(gp.quicksum(x[g][thisPlayerName][j - 2] for j in range(2, 10)) <= 1)

        for j in range(2, 10):
            # constraint 5
            model.addConstr(x[g][thisPlayerName][j - 2] <= calcA(thisPlayer, j))

    for j in range(2, 10):
        # constraint 3
        model.addConstr(gp.quicksum(x[g][thisPlayerName][j - 2] for thisPlayerName in todayPlayerList) <= 1)

model.optimize()

Gurobi Optimizer version 9.5.1 build v9.5.1rc2 (win64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 715 rows, 600 columns and 1800 nonzeros
Model fingerprint: 0x19ba8f0a
Variable types: 0 continuous, 600 integer (600 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [3e+00, 2e+02]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Found heuristic solution: objective 2585.7755498
Presolve removed 700 rows and 578 columns
Presolve time: 0.01s
Presolved: 15 rows, 22 columns, 44 nonzeros
Found heuristic solution: objective 3016.2327097
Variable types: 0 continuous, 22 integer (22 binary)
Found heuristic solution: objective 3016.8730658

Root relaxation: objective 3.052514e+03, 10 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

*    0     0   

In [154]:
for var in model.getVars():
    if var.x != 0.0:
        print(var.varName)

x0-吉力吉撈．鞏冠-2
x0-李凱威-4
x0-劉基鴻-3
x0-拿莫．伊漾-6
x0-吳東融-5
x0-郭天信-9
x0-張祐銘-7
x0-陳品捷-8
x1-吉力吉撈．鞏冠-2
x1-李凱威-4
x1-劉基鴻-3
x1-拿莫．伊漾-6
x1-吳東融-5
x1-郭天信-9
x1-張祐銘-7
x1-陳品捷-8
x2-吉力吉撈．鞏冠-2
x2-李凱威-4
x2-劉基鴻-3
x2-拿莫．伊漾-6
x2-吳東融-5
x2-郭天信-9
x2-張祐銘-7
x2-陳品捷-8
x3-吉力吉撈．鞏冠-2
x3-李凱威-4
x3-劉基鴻-3
x3-拿莫．伊漾-6
x3-吳東融-5
x3-郭天信-9
x3-張祐銘-7
x3-陳品捷-8
x4-吉力吉撈．鞏冠-2
x4-李凱威-4
x4-劉基鴻-3
x4-拿莫．伊漾-6
x4-吳東融-5
x4-郭天信-9
x4-張祐銘-7
x4-陳品捷-8


#### Stage 2

In [155]:
playerName = []
for g in range(len(aGame)):
    singleGame = []
    for j in range(8):
        for name in x[g].keys():
            if x[g][name][j].x == 1:
                singleGame.append(name)
    playerName.append(singleGame)

In [156]:
x = []
for g in range(len(aGame)):
    todayPlayerList = findPlayerList(aTeam, aGame[g]["date"])
    singleGame = {}
    for i in range(len(todayPlayerList)):
        singlePlayer = []
        for j in range(2, 10):
            singlePlayer.append(model.addVar(lb=0, vtype="B", name="x" + str(g) + "-" + todayPlayerList[i] + "-" + str(j)))
        singleGame[todayPlayerList[i]] = singlePlayer
    x.append(singleGame)

In [157]:
for g in range(len(aGame)):
    model2 = gp.Model("model2")
    
    allO = []
    y = []
    for j in range(2, 10):
        thisPlayer = list(filter(lambda d: d.name == playerName[g][j - 2], allData[teamNameMap[aTeam]]))[0]
        temp = []
        tempY = []
        for k in range(1, 10):
            if "baorder" in thisPlayer.data["batting"].keys():
                if str(k) in thisPlayer.data["batting"]["baorder"].keys():
                    thisOrderData = thisPlayer.data["batting"]["baorder"][str(k)]
                    PA = int(thisOrderData["PA"])
                    if PA >= 30:
                        O = (int(thisOrderData["IBB"][1]) * 0.72 + int(thisOrderData["HBP"]) * 0.75 
                        + (int(thisOrderData["H"]) - int(thisOrderData["2B"]) - int(thisOrderData["3B"]) - int(thisOrderData["HR"])) * 0.9
                        + int(thisOrderData["2B"]) * 1.24 + int(thisOrderData["3B"]) * 1.56 + int(thisOrderData["HR"]) * 1.95) / PA
                    else:
                        O = thisPlayer.data["batting"]["season"]["wOBA"]
                else:
                    O = thisPlayer.data["batting"]["season"]["wOBA"]
            else:
                O = thisPlayer.data["batting"]["season"]["wOBA"]
            temp.append(O)
            tempY.append(model2.addVar(lb=0, vtype="B", name="y" + str(j) + "-" + str(k)))

        allO.append(temp)
        y.append(tempY)
    
    model2.setObjective(
        gp.quicksum(
            gp.quicksum(
                allO[j - 2][k - 1] * y[j - 2][k - 1] 
                for k in range(1, 10)
            ) 
            for j in range(2, 10)
        ),
        gp.GRB.MAXIMIZE
    )

    for k in range(1, 10):
        model2.addConstr(gp.quicksum(y[j - 2][k - 1] for j in range(2, 10)) <= 1)
    for j in range(2, 10):
        model2.addConstr(gp.quicksum(y[j - 2][k - 1] for k in range(1, 10)) <= 1)

    model2.optimize()
    for var in model2.getVars():
        if var.x != 0.0:
            print(var.varName)

Gurobi Optimizer version 9.5.1 build v9.5.1rc2 (win64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 17 rows, 72 columns and 144 nonzeros
Model fingerprint: 0xdac0a5a6
Variable types: 0 continuous, 72 integer (72 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [2e-01, 4e-01]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Found heuristic solution: objective 2.1923478
Presolve time: 0.00s
Presolved: 17 rows, 72 columns, 144 nonzeros
Variable types: 0 continuous, 72 integer (72 binary)
Found heuristic solution: objective 2.3716631

Root relaxation: objective 2.507926e+00, 22 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

*    0     0               0       2.5079259    2.50793  0.00%     -    0s

Explored 1 nodes (22 simplex iterations)