In [238]:
import pandas as pd
import numpy as np

data = pd.read_csv('./../data/wta.csv')
elo_data = pd.read_csv('./../data/data_elo.csv')
head_to_head_data = pd.read_csv('./../data/data_head_to_head.csv')
data_last_matches = pd.read_csv('./../data/data_last_matches.csv')
data_player_odds = pd.read_csv('./../data/data_player_odds.csv')

  data = pd.read_csv('./../data/wta.csv')


In [239]:
elo_dict = {}
for index, row in elo_data.iterrows():
    elo_dict[row['PlayerName']] = row

    
def get_elo_row(row, is_opponent):
    player = row['Player_2'] if is_opponent else row['Player_1']
    court = row['Court']
    surface = row['Surface']
    if(player not in elo_dict):
        return 1500
    elo_row = elo_dict[player]
    elo = elo_row[f'Elo{court}{surface}']
    return elo

print(elo_dict)

{'Abanda F.': Unnamed: 0                      0
PlayerName              Abanda F.
EloOutdoorHard        1475.098186
EloOutdoorCarpet             1500
EloOutdoorClay        1506.014819
EloOutdoorGrass       1498.918468
EloOutdoorGreenset           1500
EloIndoorHard         1512.437816
EloIndoorCarpet            1500.0
EloIndoorClay              1500.0
EloIndoorGrass               1500
EloIndoorGreenset          1500.0
EloClayHard                1500.0
EloClayCarpet                1500
EloClayClay                  1500
EloClayGrass                 1500
EloClayGreenset              1500
EloAll                1531.866837
Name: 0, dtype: object, 'Abduraimova N.': Unnamed: 0                         1
PlayerName            Abduraimova N.
EloOutdoorHard           1422.805703
EloOutdoorCarpet                1500
EloOutdoorClay           1481.022004
EloOutdoorGrass               1500.0
EloOutdoorGreenset              1500
EloIndoorHard                 1500.0
EloIndoorCarpet               1500.0

In [240]:
head_head_dict = {}
for index, row in head_to_head_data.iterrows():
    player1 = row['PlayerName']
    for column in head_to_head_data.columns[1:]:
        player2 = column
        key = f'{player1} vs {player2}'
        val = row[column]
        head_head_dict[key] = val

def get_win_amount_row(row, is_opponent):
    player_1 = row['Player_1']
    player_2 = row['Player_2']
    key = f'{player_1} vs {player_2}' if not is_opponent else f'{player_2} vs {player_1}'
    return head_head_dict.get(key, 0)

In [241]:
last_matches30_dict = {}
for index, row in data_last_matches.iterrows():
    player = row['PlayerName']
    last_matches30_dict[player] = row['Last30']
    
def get_last_matches30_row(row, is_opponent):
    player = row['Player_2'] if is_opponent else row['Player_1']
    last30 = last_matches30_dict.get(player, 0)
    return last30


In [242]:
player_odds_dict = {}
for index, row in data_player_odds.iterrows():
    player = row['PlayerName']
    player_odds_dict[player] = row['AvgProb']

def get_player_odds(row, is_opponent):
    p1 = row['Player_1']
    p2 = row['Player_2']
    odds1 = player_odds_dict.get(p1, 0)
    odds2 = player_odds_dict.get(p2, 0)
    
    if odds1 == 0 or odds2 == 0:
        return 0.0
    
    return odds2 if is_opponent else odds1

In [243]:
from sklearn.preprocessing import MinMaxScaler

# Create the nn_data DataFrame
nn_data = pd.DataFrame({
    'Elo1': [get_elo_row(row, False) for index, row in data.iterrows()],
    'Elo2': [get_elo_row(row, True) for index, row in data.iterrows()],
    'WinAmount1': [get_win_amount_row(row, False) for index, row in data.iterrows()],
    'WinAmount2': [get_win_amount_row(row, True) for index, row in data.iterrows()],
    'LastMatches30_1': [get_last_matches30_row(row, False) for index, row in data.iterrows()],
    'LastMatches30_2': [get_last_matches30_row(row, True) for index, row in data.iterrows()],
    'PlayerOdds1': [get_player_odds(row, False) for index, row in data.iterrows()],
    'PlayerOdds2': [get_player_odds(row, True) for index, row in data.iterrows()],
    'Winner1': [1 if p == w else 0 for p, w in zip(data['Player_1'], data['Winner'])],
    'Winner2': [1 if p == w else 0 for p, w in zip(data['Player_2'], data['Winner'])],
})

nn_data = nn_data.sample(frac=1).reset_index(drop=True)
nn_data.to_csv('./../data/data_nn.csv', index=False)

print(nn_data)

              Elo1         Elo2  WinAmount1  WinAmount2  LastMatches30_1  \
0      1475.645602  1628.395671           1           1                0   
1      1469.499929  1490.065033           1           0                0   
2      1532.491064  1452.110205           0           3                0   
3      1407.869883  1406.501178           1           0                0   
4      1457.687457  1504.697859           1           1                0   
...            ...          ...         ...         ...              ...   
41726  1466.584884  1497.866124           1           0                2   
41727  1633.493758  1598.709210           2           1                2   
41728  1673.369526  1658.830992           2           4                2   
41729  1586.032652  1373.032067           3           1                4   
41730  1969.468493  1649.380734           3           1                6   

       LastMatches30_2  PlayerOdds1  PlayerOdds2  Winner1  Winner2  
0                 

In [244]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import tensorflow as tf

input_columns = nn_data.drop(columns=['Winner1', 'Winner2']).columns
X = nn_data[input_columns]
y = nn_data[['Winner1', 'Winner2']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
data_shape = X_train.shape[1]
model.add(Dense(128, activation='relu', input_shape=(data_shape,)))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

model.save('./../models/model.keras')

print(f'Test accuracy: {model.evaluate(X_test, y_test)[1]:.2f}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5185 - loss: 10.7532 - val_accuracy: 0.5465 - val_loss: 0.6886
Epoch 2/50
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.5794 - loss: 0.6864 - val_accuracy: 0.6415 - val_loss: 0.6356
Epoch 3/50
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6120 - loss: 0.6641 - val_accuracy: 0.6698 - val_loss: 0.6218
Epoch 4/50
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6272 - loss: 0.6512 - val_accuracy: 0.6502 - val_loss: 0.6290
Epoch 5/50
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6454 - loss: 0.6401 - val_accuracy: 0.6922 - val_loss: 0.6032
Epoch 6/50
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6562 - loss: 0.6257 - val_accuracy: 0.7169 - val_loss: 0.6158
Epoch 7/50
[1m

In [247]:

def get_elo(player, court, surface, elo_data):
    row = elo_data[elo_data['PlayerName'] == player]
    if row.empty:
        return 1500
    return row[f'Elo{surface}{court}'].values[0]

def get_head_to_head(player1, player2, head_to_head_data):
    row = head_to_head_data[head_to_head_data['PlayerName'] == player1]
    if row.empty:
        return 0
    return row[f'{player2}'].values[0] if player2 in row else 0

def get_last_matches30(player, last_matches_data):
    row = last_matches_data[last_matches_data['PlayerName'] == player]
    if row.empty:
        return 0
    return row['Last30'].values[0]
    
def get_player_odds(player, player_odds_data):
    row = player_odds_data[player_odds_data['PlayerName'] == player]
    if row.empty:
        return 0
    return row['AvgProb'].values[0]

def predict_victory_chance(model, player1, player2, court, surface, elo_data, head_to_head_data, data_last_matches, data_player_odds):
    elo1 = get_elo(player=player1, court=court, surface=surface, elo_data=elo_data)
    elo2 = get_elo(player=player2, court=court, surface=surface, elo_data=elo_data)
    win_amount1 = get_head_to_head(player1=player1, player2=player2, head_to_head_data=head_to_head_data)
    win_amount2 = get_head_to_head(player1=player2, player2=player1, head_to_head_data=head_to_head_data)
    last_matches30_1 = get_last_matches30(player=player1, last_matches_data=data_last_matches)
    last_matches30_2 = get_last_matches30(player=player2, last_matches_data=data_last_matches)
    avgOdds1 = get_player_odds(player=player1, player_odds_data=data_player_odds)
    avgOdds2 = get_player_odds(player=player2, player_odds_data=data_player_odds)
    
    if avgOdds1 == 0.0 or avgOdds2 == 0.0:
        avgOdds1 = 0.0
        avgOdds2 = 0.0
    
    X = np.array([[elo1, elo2, win_amount1, win_amount2, last_matches30_1, last_matches30_2, avgOdds1, avgOdds2]])
    
    p1 = model.predict(X)[0][0]
    p2 = model.predict(X)[0][1]
    
    result_pred = pd.DataFrame({
        'Player' : [player1, player2],
        'WinChance' : [p1, p2]
    })
    return result_pred



model = tf.keras.models.load_model('./../models/model.keras')
elo_data = pd.read_csv('./../data/data_elo.csv')
head_to_head_data = pd.read_csv('./../data/data_head_to_head.csv')
data_last_matches = pd.read_csv('./../data/data_last_matches.csv')
data_player_odds = pd.read_csv('./../data/data_player_odds.csv')

player1 = 'Anisimova A.'
player2 = 'Abanda F.'
surface = 'Outdoor'
court = 'Hard'

prediction = predict_victory_chance(model, player1, player2, court, surface, elo_data, head_to_head_data, data_last_matches, data_player_odds)
print(prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
         Player  WinChance
0  Anisimova A.   0.920175
1     Abanda F.   0.079825
