In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
pd.options.mode.chained_assignment = None  # default='warn'
from mip import Model, xsum, maximize, BINARY

## Build a model to create a metric for judging team based on team level information ##

In [2]:
teams = pd.read_csv("teams.csv")
teams = teams[(teams.yearID >=1961) & (teams.yearID <= 2001)]

## Extracting features for teams ##

In [3]:
teams["BB"] = teams["BB"]/teams["G"]
teams["singles"] = (teams["H"] - teams["X2B"] - teams["X3B"] - teams["HR"])/teams["G"]
teams["doubles"] =  teams["X2B"]/teams["G"]
teams["triples"] = teams["X3B"]/teams["G"]
teams["HR"] = teams["HR"]/teams["G"]
teams["R"] = teams["R"]/teams["G"]

In [4]:
teams.shape

(1026, 52)

## Model building ##

In [5]:
team_features = teams[["BB","singles","doubles","triples","HR"]]
team_runs = teams["R"]
model = BayesianRidge()
model.fit(team_features, team_runs)
#reg = LinearRegression().fit(team_features, team_runs)
#reg.score(team_features,team_runs)

BayesianRidge()

In [6]:
batting=pd.read_csv("Batting.csv")

In [7]:
def extract_pa_per_game(df):
    
    pa_per_game = (df['AB'].sum() + df["BB"].sum())/df["G"].max()
    
    return pa_per_game


In [8]:
pa_per_game=batting[batting.yearID ==2002].groupby('teamID').apply(extract_pa_per_game)
average_pa_teamwise = pa_per_game.mean()

In [9]:
average_pa_teamwise

38.74656866970645

In [10]:
batting["PA"] = batting["AB"] + batting["BB"]
batting["singles"] = batting["H"] - batting["X2B"] - batting["X3B"] - batting["HR"]

In [11]:
players = batting[(batting.yearID >= 1997) & (batting.yearID <=2001)].groupby('playerID').agg(PA_sum = ("PA",sum),HR_sum=("HR",sum),BB_sum=("BB",sum),singles_sum=("singles",sum),doubles_sum=("X2B",sum),triples_sum=("X3B",sum),AB_sum=("AB",sum),H_sum=("H",sum))
players["Average_PA"] = players["PA_sum"]/average_pa_teamwise
players["HR"] = players["HR_sum"]/players["Average_PA"]
players["BB"] = players["BB_sum"]/players["Average_PA"]
players["singles"] = players["singles_sum"]/players["Average_PA"]
players["doubles"] = players["doubles_sum"]/players['Average_PA']
players["triples"] = players["triples_sum"]/players["Average_PA"]
players["Average"] = players["H_sum"]/players["AB_sum"]
players = players[players.PA_sum >= 1000]

In [12]:
players.head()

Unnamed: 0_level_0,PA_sum,HR_sum,BB_sum,singles_sum,doubles_sum,triples_sum,AB_sum,H_sum,Average_PA,HR,BB,singles,doubles,triples,Average
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
abreubo01,2815,96,420,444,164,33,2395,737,72.651595,1.321375,5.781016,6.111359,2.257349,0.454223,0.307724
agbaybe01,1060,35,123,172,51,6,937,264,27.357261,1.279368,4.496064,6.287179,1.864222,0.21932,0.28175
alfoned01,3063,96,359,535,158,7,2704,796,79.052161,1.214388,4.541305,6.767683,1.99868,0.088549,0.294379
alicelu01,1954,24,216,339,82,22,1738,467,50.430272,0.475905,4.283142,6.722153,1.626007,0.436246,0.2687
alomaro01,3090,91,342,583,173,20,2748,867,79.748997,1.14108,4.288455,7.310437,2.169306,0.250787,0.315502


In [13]:
players_features = players[["HR","BB","singles","doubles","triples"]]
players_features.head()

Unnamed: 0_level_0,HR,BB,singles,doubles,triples
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abreubo01,1.321375,5.781016,6.111359,2.257349,0.454223
agbaybe01,1.279368,4.496064,6.287179,1.864222,0.21932
alfoned01,1.214388,4.541305,6.767683,1.99868,0.088549
alicelu01,0.475905,4.283142,6.722153,1.626007,0.436246
alomaro01,1.14108,4.288455,7.310437,2.169306,0.250787


In [14]:
#players_features["R_hat"]=(reg.predict(players_features))
players_features["R_hat"]=(model.predict(players_features))
players_features.head()

Unnamed: 0_level_0,HR,BB,singles,doubles,triples,R_hat
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
abreubo01,1.321375,5.781016,6.111359,2.257349,0.454223,8.876176
agbaybe01,1.279368,4.496064,6.287179,1.864222,0.21932,7.505707
alfoned01,1.214388,4.541305,6.767683,1.99868,0.088549,7.852762
alicelu01,0.475905,4.283142,6.722153,1.626007,0.436246,7.451847
alomaro01,1.14108,4.288455,7.310437,2.169306,0.250787,8.557009


## Adding salary information ##

In [15]:
Salaries=pd.read_csv("salaries.csv")

In [16]:
salaries_yr_2002 =Salaries[Salaries.yearID==2002]
salaries_yr_2002= salaries_yr_2002[["playerID","salary"]]

In [17]:
player_insights = pd.merge(salaries_yr_2002, players_features, on='playerID')
player_insights.head()

Unnamed: 0,playerID,salary,HR,BB,singles,doubles,triples,R_hat
0,anderga01,5000000,1.245384,1.676031,7.25116,2.234707,0.197865,7.197128
1,erstada01,6250000,0.982139,3.197939,7.198358,2.024164,0.227569,7.632801
2,fabrejo01,500000,0.583429,2.230759,6.623638,1.132539,0.171597,5.360105
3,fullmbr01,4000000,1.433642,2.504095,5.906606,2.676132,0.133807,7.111337
4,glaustr01,4000000,2.105016,5.440932,4.299228,2.015821,0.053517,6.717143


## Select the players given the maximum budget as 40 million dollars ##


Please see reference link before proceeding the code : 
https://docs.python-mip.com/en/latest/examples.html

In [18]:
from mip import Model, xsum, maximize, BINARY

In [19]:
p = player_insights["R_hat"]
w = player_insights["salary"]

c, I = 40000000, range(len(w))

In [20]:
m = Model("knapsack")

x = [m.add_var(var_type=BINARY) for i in I]

m.objective = maximize(xsum(p[i] * x[i] for i in I))

m += xsum(w[i] * x[i] for i in I) <= c
m += xsum(x[i] for i in I) <= 9
m.optimize()

selected = [i for i in I if x[i].x >= 0.99]
print("selected items: {}".format(selected))


selected items: [15, 54, 69, 87, 90, 111, 155, 194, 222]


## Selected team ##

In [21]:
selected_team=player_insights.iloc[selected]
selected_team

Unnamed: 0,playerID,salary,HR,BB,singles,doubles,triples,R_hat
15,gracema01,3000000,0.884232,5.231708,6.803676,2.296548,0.196496,8.639206
54,loftoke01,1025000,0.690507,4.638123,7.24381,1.602498,0.351768,7.967508
69,caseyse01,4000000,1.188545,3.657062,7.13127,2.486802,0.109712,8.296037
87,heltoto01,5000000,2.24035,4.739202,6.218408,2.742993,0.157973,8.929569
90,walkela01,12666667,2.326584,4.876878,6.845525,2.595036,0.313194,9.558451
111,berkmla01,500000,1.932416,5.240449,5.502471,2.783988,0.196517,8.629489
155,mientdo01,285000,0.643882,4.317799,6.438824,2.310401,0.151502,7.745977
194,abreubo01,6333333,1.321375,5.781016,6.111359,2.257349,0.454223,8.876176
222,martied01,7086668,1.741558,6.311602,6.410414,2.309725,0.049406,9.019139
