In [2]:
import pandas as pd
import gurobipy as gp
from gurobipy import GRB
import numpy as np
from datetime import timedelta 

In [3]:
# Load data
data = pd.read_excel("tournaments_updated.xlsx")

#clean data
data_copy = data[["Tournament", "Date", "Series", "Round", "Winner", "Loser", "WRank", "LRank", "WPts", "LPts", 
"Wpoints", "Fpoints", "SFpoints", "QFpoints", "sixteenpoints", "thirtytwopoints", "sixtyfourpoints", "onehundredtwentyeightpoints"]]

data_copy

Unnamed: 0,Tournament,Date,Series,Round,Winner,Loser,WRank,LRank,WPts,LPts,Wpoints,Fpoints,SFpoints,QFpoints,sixteenpoints,thirtytwopoints,sixtyfourpoints,onehundredtwentyeightpoints
0,Adelaide International 1,2023-01-01,ATP250,1st Round,Giron M.,Gasquet R.,61.0,68.0,776.0,740.0,250,165,100,50,25,13,0,0
1,Adelaide International 1,2023-01-01,ATP250,1st Round,Mcdonald M.,Galan D.E.,63.0,67.0,775.0,741.0,250,165,100,50,25,13,0,0
2,Adelaide International 1,2023-01-02,ATP250,1st Round,Kecmanovic M.,O Connell C.,29.0,78.0,1420.0,652.0,250,165,100,50,25,13,0,0
3,Adelaide International 1,2023-01-02,ATP250,1st Round,Nishioka Y.,Rune H.,36.0,11.0,1134.0,2888.0,250,165,100,50,25,13,0,0
4,Adelaide International 1,2023-01-02,ATP250,1st Round,Popyrin A.,Auger-Aliassime F.,120.0,6.0,469.0,4195.0,250,165,100,50,25,13,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1991,Canadian Open,2023-08-12,Masters 1000,Quarterfinals,Paul T.,Alcaraz C.,14.0,1.0,2345.0,9225.0,1000,650,400,200,100,50,30,10
1992,Canadian Open,2023-08-12,Masters 1000,Quarterfinals,Sinner J.,Monfils G.,8.0,276.0,3815.0,207.0,1000,650,400,200,100,50,30,10
1993,Canadian Open,2023-08-12,Masters 1000,Semifinals,De Minaur A.,Davidovich Fokina A.,18.0,37.0,2085.0,1140.0,1000,650,400,200,100,50,30,10
1994,Canadian Open,2023-08-13,Masters 1000,Semifinals,Sinner J.,Paul T.,8.0,14.0,3815.0,2345.0,1000,650,400,200,100,50,30,10


In [4]:
data_copy = data_copy.drop(columns=['Start Date', 'End Date'], errors='ignore')

data_copy['Date'] = pd.to_datetime(data_copy['Date'])

dates = data_copy.groupby('Tournament')['Date']
dates = dates.agg(['min', 'max'])
dates.columns = ['Start Date', 'End Date']

data_copy = data_copy.merge(dates, on='Tournament', how='right')

In [6]:
data_copy.to_csv('final_data.csv', index=False)

In [8]:
#make a row for each player, probabilities for each round, group by tournament, unique players

#winners data frame
winners = data_copy[['Tournament', 'Winner', 'WRank']].rename(columns={'Winner': 'Player', 'WRank': 'Rank'})

#losers data frame
losers = data_copy[['Tournament', 'Loser', 'LRank']].rename(columns={'Loser': 'Player', 'LRank': 'Rank'})

#combine together into seperate  players data frame
players = pd.concat([winners, losers], ignore_index=True).drop_duplicates() 

# keep only columns needed in data_copy
col_needed_data = data_copy[['Tournament', 'Start Date', 'End Date', 'Series', 'Wpoints', 'Fpoints', 'SFpoints', 'QFpoints', 'sixteenpoints', 'thirtytwopoints', 'sixtyfourpoints', 'onehundredtwentyeightpoints']].drop_duplicates()

#left join with rest of data to get rest of columns
final_data = pd.merge(players, col_needed_data, on=['Tournament'], how='left')

#conditions for rank ranges
conditions = [final_data["Rank"].between(1,5), final_data["Rank"].between(6,20), final_data["Rank"].between(21,50), final_data["Rank"].between(51,100), final_data["Rank"] > 100]

#probabilities corresponding to rank ranges
probs_128 = [0.99, 0.94, 0.90, 0.80, 0.70]
probs_64 = [0.97, 0.88, 0.80, 0.65, 0.55]
probs_32 = [0.95, 0.80, 0.70, 0.55, 0.45]
probs_16 = [0.92, 0.70, 0.60, 0.45, 0.35]
probs_QF = [0.90, 0.65, 0.50, 0.30, 0.20]
probs_SF = [0.85, 0.50, 0.40, 0.20, 0.10]
probs_F = [0.80, 0.40, 0.30, 0.10, 0.05]
probs_W = [0.75, 0.30, 0.20, 0.05, 0.01]

#add probabilities columns based on conditions
final_data['Probability_128'] = np.select(conditions, probs_128)
final_data['Probability_64'] = np.select(conditions, probs_64)
final_data['Probability_32'] = np.select(conditions, probs_32)
final_data['Probability_16'] = np.select(conditions, probs_16)
final_data['Probability_QF'] = np.select(conditions, probs_QF)
final_data['Probability_SF'] = np.select(conditions, probs_SF)
final_data['Probability_F'] = np.select(conditions, probs_F)
final_data['Probability_W'] = np.select(conditions, probs_W)
final_data
final_data.dtypes
# expected points calculation
final_data['Expected_Points'] = (final_data['Probability_128'] * final_data['onehundredtwentyeightpoints'] + final_data['Probability_64'] * final_data['sixtyfourpoints'] + final_data['Probability_32'] * final_data['thirtytwopoints'] +final_data['Probability_16'] * final_data['sixteenpoints'] + final_data['Probability_QF'] * final_data['QFpoints'] +final_data['Probability_SF'] * final_data['SFpoints'] + final_data['Probability_F'] * final_data['Fpoints'] + final_data['Probability_W'] * final_data['Wpoints'])
final_data

Unnamed: 0,Tournament,Player,Rank,Start Date,End Date,Series,Wpoints,Fpoints,SFpoints,QFpoints,...,onehundredtwentyeightpoints,Probability_128,Probability_64,Probability_32,Probability_16,Probability_QF,Probability_SF,Probability_F,Probability_W,Expected_Points
0,ABN AMRO World Tennis Tournament,Gasquet R.,45.0,2023-02-13,2023-02-19,ATP500,500,330,1200,100,...,0,0.9,0.80,0.70,0.60,0.5,0.4,0.30,0.20,776.50
1,ABN AMRO World Tennis Tournament,Dimitrov G.,28.0,2023-02-13,2023-02-19,ATP500,500,330,1200,100,...,0,0.9,0.80,0.70,0.60,0.5,0.4,0.30,0.20,776.50
2,ABN AMRO World Tennis Tournament,Wawrinka S.,130.0,2023-02-13,2023-02-19,ATP500,500,330,1200,100,...,0,0.7,0.55,0.45,0.35,0.2,0.1,0.05,0.01,190.25
3,ABN AMRO World Tennis Tournament,Barrere G.,71.0,2023-02-13,2023-02-19,ATP500,500,330,1200,100,...,0,0.8,0.65,0.55,0.45,0.3,0.2,0.10,0.05,364.25
4,ABN AMRO World Tennis Tournament,Brouwer G.,160.0,2023-02-13,2023-02-19,ATP500,500,330,1200,100,...,0,0.7,0.55,0.45,0.35,0.2,0.1,0.05,0.01,190.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2069,Wimbledon,Medjedovic H.,156.0,2023-07-03,2023-07-16,Grand Slam,2000,1300,800,400,...,10,0.7,0.55,0.45,0.35,0.2,0.1,0.05,0.01,394.50
2070,Wimbledon,Coppejans K.,188.0,2023-07-03,2023-07-16,Grand Slam,2000,1300,800,400,...,10,0.7,0.55,0.45,0.35,0.2,0.1,0.05,0.01,394.50
2071,Wimbledon,Humbert U.,39.0,2023-07-03,2023-07-16,Grand Slam,2000,1300,800,400,...,10,0.9,0.80,0.70,0.60,0.5,0.4,0.30,0.20,1549.00
2072,Wimbledon,Cecchinato M.,89.0,2023-07-03,2023-07-16,Grand Slam,2000,1300,800,400,...,10,0.8,0.65,0.55,0.45,0.3,0.2,0.10,0.05,695.50


In [None]:
m = gp.Model("Maximizing Entered Tournaments")

y = {}
for i in range(48):
    y[i] = m.addVar(vtype=GRB.BINARY, name=f"tourney{i}")
    p[i] = m.addVar

m.setObjective(gp.quicksum(y[i]*p[i] for i in range(48)), GRB.MAXIMIZE)

m.addConstr(gp.quicksum(y[i] for i in range(48)) <= 15)

for i in range(48):
    for j in range(48):
        if i < j:
            start_i = dates.loc[dates.index[i], 'Start Date']
            end_i = dates.loc[dates.index[i], 'End Date']
            start_j = dates.loc[dates.index[j], 'Start Date']
            end_j = dates.loc[dates.index[j], 'End Date']
            if (start_i <= end_j) and (start_j <= end_i):
                m.addConstr(y[i] + y[j] <= 1)


for i in range(48):
    for j in range(48):
        if i < j:
            end_i = dates.loc[dates.index[i], 'End Date']
            start_j = dates.loc[dates.index[j], 'Start Date']
            if (start_j <= end_i + timedelta(days=3)):
                m.addConstr(y[i] + y[j] <= 1)


m.optimize()

