In [43]:
import numpy as np
import pandas as pd
import scipy
import scipy.optimize as opt
import json
import os
import matplotlib.pyplot as plt
import sklearn.linear_model as lm
from sklearn.feature_selection import r_regression

from espncricinfo.match import Match

from tqdm import tqdm

In [44]:
#!pip3 install python-espncricinfo

In [45]:
import warnings
warnings.filterwarnings('ignore')

In [46]:
# df_afg = scrape_afghanistan_t20is()
# df_afg.head()

In [47]:
# data = pd.read_csv('cricket_matches.csv')
# data

In [48]:
# teams = np.unique(np.append(data['team1'].values, data['team2'].values))
# teams

In [49]:
# data.columns.values

In [50]:
def prob_win_A(R_a: float, R_b: float, alpha: float = 400, home: str = 'neutral', hga: float = 0):
    if home == 'neutral':
        E_a = 1 / (1 + 10**( (R_b - R_a) / alpha ))
    elif home == 'first':
        E_a = 1 / (1 + 10**( (R_b - (R_a + hga) ) / alpha ))
    elif home == 'second':
        E_a = E_a = 1 / (1 + 10**( (R_b + hga - R_a) / alpha ))
    else:
        raise ValueError('Not a valid home country')
    return E_a

def elo_step(R_a: float, R_b: float, S: float, K: float, W_mv: float, alpha: float, home: str = 'neutral', hga: float = 0):
    E_a = prob_win_A(R_a, R_b, alpha, home, hga)
    E_b = 1 - E_a
    R_a_pr = R_a + K*(S - E_a)*W_mv
    R_b_pr = R_b + K*((1-S) - E_b)*W_mv
    
    return R_a_pr, R_b_pr

In [51]:
def prob_win_country_A(team_elos, country_A: str, country_B: str, venue_country: str, alpha: float = 400, hga: float = 0):
    """
    :param team_elos: Dictionary of elo ratings for each team.
    For the purposes of this function, country_A is the country for which win probability is calculated.
    """
    R_a = team_elos[country_A]
    R_b = team_elos[country_B]
    if venue_country == country_A:
        home = 'first'
    elif venue_country == country_B:
        home = 'second'
    else:
        home = 'neutral'
    return prob_win_A(R_a, R_b, alpha, home, hga)

In [52]:
def calculate_W_mv_m1(nrr):
    """
    :param nrr: Net run rate for game.
    """
    return (3 + nrr)**0.63 - 1 # Testing this out

def calculate_W_mv_log(nrr, elodiff):
    """
    :param nrr: Net run rate for game
    :param elodiff: Elo rating difference. ELODIFF = ELOW - ELOL
    """
    return np.log(1 + abs(min(nrr, 3))) * 2.2/(0.001*elodiff + 2.2) # Testing this out

In [53]:
def generate_elo_ratings(data, teams, K, alpha=400, calculate_W_mv=calculate_W_mv_log, hga=0, mean_rev_const=0.3):
    """
    :param data: DataFrame of games played.
    :param teams: Dictionary of all teams with respective elo rating scores. At the start, all teams' Elo ratings 
    will be set to 1500.
    :param K: K-parameter for Elo rating updates.
    :param alpha: alpha parameter for calculating probabilities from Elo.
    :param calculate_W_mv: Function for calculating the victory multiplier.
    
    Runs through Elo from start to finish for set of games, assuming that we are starting from the beginning -
    i.e. the teams do not have pre-existing Elo ratings calculated from other games and thus all initial
    Elo ratings are set to 1500.
    """
    teams_final = teams.copy()
    previous_year = 2025
    
    for i in range(data.shape[0]):
        row = data.iloc[i, :]
        
        first = row['first_team']
        second = row['second_team']
        winner = row['winner']
        nrr = row['net_run_rate']
        home_country = row['venue_country']
        year = row['year']
        
        if year > previous_year:
            previous_year = year
            avg = np.mean(np.array(list(teams_final.values())))
            for ind in np.array(list(teams_final.keys())):
                mean_rev = mean_rev_const*(teams_final[ind]-avg)
                teams_final[ind] -= mean_rev
        
        if home_country == first:
            home = 'first'
        elif home_country == second:
            home = 'second'
        else:
            home = 'neutral'
        
        if first == winner:
            S_a = 1
            loser = second
        elif winner == 'tie':
            S_a = 0.5
            loser = 'tie'
        else:
            S_a = 0
            loser = first
        
        if winner != 'tie':
            W_mv = calculate_W_mv(nrr, teams_final[winner]-teams_final[loser])
        else:
            W_mv = 2.2*np.log(2)
        R_a_pr, R_b_pr = elo_step(teams_final[first], teams_final[second], S_a, K, W_mv, alpha, home, hga)
        
        teams_final[first] = R_a_pr
        teams_final[second] = R_b_pr
    
    return teams_final

In [54]:
def brier(data, teams, K, alpha=400, calculate_W_mv=calculate_W_mv_log, hga=0, mean_rev_const=0.3):
    """
    :param data: DataFrame of all games.
    :param elodict: Dictionary of teams and their final Elo scores.
    :param alpha: alpha parameter.
    Assumes simulation has already been completed and Elo ratings calculated. Cross entropy loss for Elo ratings.
    """
    score = 0
    teams_final = teams.copy()
    previous_year = 2025
    
    for i in range(data.shape[0]):
        row = data.iloc[i, :]
        
        first = row['first_team']
        second = row['second_team']
        winner = row['winner']
        nrr = row['net_run_rate']
        
        home_country = row['venue_country']
        
        year = row['year']
        
        if year > previous_year:
            previous_year = year
            avg = np.mean(np.array(list(teams_final.values())))
            for ind in np.array(list(teams_final.keys())):
                mean_rev = mean_rev_const*(teams_final[ind]-avg)
                teams_final[ind] -= mean_rev
        
        if home_country == first:
            home = 'first'
        elif home_country == second:
            home = 'second'
        else:
            home = 'neutral'
        
        
        if first == winner:
            S_a = 1
            loser = second
        elif winner == 'tie':
            S_a = 0.5
            loser = 'tie'
        else:
            S_a = 0
            loser = first
        
        if winner != 'tie':
            W_mv = calculate_W_mv(nrr, teams_final[winner]-teams_final[loser])
        else:
            W_mv = 2.2*np.log(2)
            
        E_a = prob_win_A(teams_final[first], teams_final[second], alpha, home, hga)
        E_b = 1 - E_a
        
        R_a_pr, R_b_pr = elo_step(teams_final[first], teams_final[second], S_a, K, W_mv, alpha, home, hga)
        
        brier_comp = (S_a - E_a)**2 + ((1-S_a) - E_b)**2
        score += brier_comp
        
        teams_final[first] = R_a_pr
        teams_final[second] = R_b_pr
        
        # brier_comp = (S_a - E_a)**2 + ((1-S_a) - E_b)**2
    
    score /= data.shape[0]
    
    return score

In [55]:
# def cross_entropy(data, elodict, alpha=400):
#     """
#     :param data: DataFrame of all games.
#     :param elodict: Dictionary of teams and their final Elo scores.
#     :param alpha: alpha parameter.
#     Assumes simulation has already been completed and Elo ratings calculated. Cross entropy loss for Elo ratings.
#     """
#     score = 0
    
#     for i in range(data.shape[0]):
#         row = data.iloc[i, :]
        
        
#         first, second, winner = row['first_team'], row['second_team'], row['winner']
        
#         elo1, elo2 = elodict[first], elodict[second]
#         E_a = prob_win_A(elo1, elo2, alpha)
#         E_b = 1 - E_a
        
#         if first == winner:
#             S_a = 1
#         elif winner == 'tie':
#             S_a = 0.5
#         else:
#             S_a = 0
        
#         # brier_comp = (S_a - E_a)**2 + ((1-S_a) - E_b)**2
#         cross_entropy_comp = -(S_a*np.log(E_a) + (1-S_a)*np.log(1-E_a))
#         score += cross_entropy_comp
    
#     score /= data.shape[0]
    
#     return score

In [56]:
def cross_entropy(data, teams, K, alpha=400, calculate_W_mv=calculate_W_mv_log, hga=0, mean_rev_const=0.3):
    """
    :param data: DataFrame of all games.
    :param elodict: Dictionary of teams and their final Elo scores.
    :param alpha: alpha parameter.
    Assumes simulation has already been completed and Elo ratings calculated. Cross entropy loss for Elo ratings.
    """
    score = 0
    teams_final = teams.copy()
    previous_year = 2025
    
    for i in range(data.shape[0]):
        row = data.iloc[i, :]
        
        first = row['first_team']
        second = row['second_team']
        winner = row['winner']
        nrr = row['net_run_rate']
        
        home_country = row['venue_country']
        year = row['year']
        
        if year > previous_year:
            previous_year = year
            avg = np.mean(np.array(list(teams_final.values())))
            for ind in np.array(list(teams_final.keys())):
                mean_rev = mean_rev_const*(teams_final[ind]-avg)
                teams_final[ind] -= mean_rev
        
        if home_country == first:
            home = 'first'
        elif home_country == second:
            home = 'second'
        else:
            home = 'neutral'
        
        
        if first == winner:
            S_a = 1
            loser = second
        elif winner == 'tie':
            S_a = 0.5
            loser = 'tie'
        else:
            S_a = 0
            loser = first
        
        if winner != 'tie':
            W_mv = calculate_W_mv(nrr, teams_final[winner]-teams_final[loser])
        else:
            W_mv = 2.2*np.log(2)
            
        E_a = prob_win_A(teams_final[first], teams_final[second], alpha, home, hga)
        E_b = 1 - E_a
        
        R_a_pr, R_b_pr = elo_step(teams_final[first], teams_final[second], S_a, K, W_mv, alpha, home, hga)
        
        cross_entropy_comp = -(S_a*np.log(E_a) + (1-S_a)*np.log(1-E_a))
        score += cross_entropy_comp
        
        teams_final[first] = R_a_pr
        teams_final[second] = R_b_pr
        
        # brier_comp = (S_a - E_a)**2 + ((1-S_a) - E_b)**2
    
    score /= data.shape[0]
    
    return score

In [57]:
# def correctness(data, elodict, alpha=400): # TODO: Fix this
#     """
#     :param data: DataFrame of all games.
#     :param elodict: Dictionary of teams and their final Elo scores.
#     :param alpha: alpha parameter.
#     Assumes simulation has already been completed and Elo ratings calculated. Correctness (i.e., what %
#     of games are predicted correctly) for Elo ratings.
#     """
#     score = 0
    
#     for i in range(data.shape[0]):
#         row = data.iloc[i, :]
        
        
#         first, second, winner = row['first_team'], row['second_team'], row['winner']
        
#         elo1, elo2 = elodict[first], elodict[second]
#         E_a = prob_win_A(elo1, elo2, alpha)
#         E_b = 1 - E_a
        
#         if E_a >= 0.5:
#             S_exp = 1
#         else:
#             S_exp = 0
        
#         if first == winner:
#             S_a = 1
#         elif winner == 'tie':
#             S_a = 0.5
#         else:
#             S_a = 0
        
#         if S_exp == S_a:
#             score += 1
    
#     score /= data.shape[0]
    
#     return score

In [58]:
def correctness(data, teams, K, alpha=400, calculate_W_mv=calculate_W_mv_log, hga=0, mean_rev_const=0.3):
    """
    :param data: DataFrame of all games.
    :param elodict: Dictionary of teams and their final Elo scores.
    :param alpha: alpha parameter.
    Assumes simulation has already been completed and Elo ratings calculated. Cross entropy loss for Elo ratings.
    """
    score = 0
    teams_final = teams.copy()
    previous_year = 2025
    
    for i in range(data.shape[0]):
        row = data.iloc[i, :]
        
        first = row['first_team']
        second = row['second_team']
        winner = row['winner']
        nrr = row['net_run_rate']
        
        home_country = row['venue_country']
        year = row['year']
        
        if year > previous_year:
            previous_year = year
            avg = np.mean(np.array(list(teams_final.values())))
            for ind in np.array(list(teams_final.keys())):
                mean_rev = mean_rev_const*(teams_final[ind]-avg)
                teams_final[ind] -= mean_rev
        
        if home_country == first:
            home = 'first'
        elif home_country == second:
            home = 'second'
        else:
            home = 'neutral'
        
        
        if first == winner:
            S_a = 1
            loser = second
        elif winner == 'tie':
            S_a = 0.5
            loser = 'tie'
        else:
            S_a = 0
            loser = first
        
        if winner != 'tie':
            W_mv = calculate_W_mv(nrr, teams_final[winner]-teams_final[loser])
        else:
            W_mv = 2.2*np.log(2)
            
        E_a = prob_win_A(teams_final[first], teams_final[second], alpha, home, hga)
        E_b = 1 - E_a
        
        if E_a >= 0.5:
            S_exp = 1
        else:
            S_exp = 0
        
        R_a_pr, R_b_pr = elo_step(teams_final[first], teams_final[second], S_a, K, W_mv, alpha, home, hga)
        
        # cross_entropy_comp = -(S_a*np.log(E_a) + (1-S_a)*np.log(1-E_a))
        if S_exp == S_a:
            score += 1
        
        teams_final[first] = R_a_pr
        teams_final[second] = R_b_pr
        
        # brier_comp = (S_a - E_a)**2 + ((1-S_a) - E_b)**2
    
    score /= data.shape[0]
    
    return score

In [59]:
# data.iloc[0, :]

In [60]:
def extract_cricket_summary(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # Get teams and winner
    teams = data['info']['teams']
    
    winner = data['info']['outcome'].get('winner')
    
    if winner is None:
        match data['info']['outcome'].get('result'):
            case 'no result':
                winner = 'no result'
            case 'tie':
                winner = 'tie'
                
    if data['info']['outcome'].get('method') == 'Awarded':
        print('Haiiiii! We at ', file_path, '<3')
    
    date = data['info']['dates'][0]
    
    teams_np = np.array(teams)
    
    loser = teams_np[teams_np != winner][0]
    
    first = (data['info']['toss']['winner'] if data['info']['toss']['decision']=='bat' 
             else teams_np[teams_np != data['info']['toss']['winner']][0])
    second = teams_np[teams_np != first][0]
    
    # city = data['info']['city']
    venue = data['info']['venue']

    team_stats = {}

    for innings in data['innings']:
        team = innings['team']
        total_runs = 0
        legal_balls = 0

        for over in innings['overs']:
            for delivery in over['deliveries']:
                # Add runs
                total_runs += delivery['runs']['total']
                
                # Count legal balls only (not wides or no-balls)
                extras = delivery.get('extras', {})
                if 'wides' not in extras and 'noballs' not in extras:
                    legal_balls += 1

        overs = legal_balls // 6 + (legal_balls % 6) / 10
        team_stats[team] = {
            'runs': total_runs,
            'balls': legal_balls,
            'overs': round(overs, 1)
        }
    
    if winner == 'no result' or data['info']['outcome'].get('method') == 'Awarded':
        nrr = float('nan')
    
    elif winner == 'tie':
        nrr = abs((team_stats[first]['runs'] - team_stats[second]['runs'])/20)
    
    elif team_stats[second]['balls'] <= 120 and winner == second: # quickfix: Condition on balls instead of overs
        nrr = team_stats[winner]['runs']/team_stats[winner]['overs'] - team_stats[loser]['runs']/team_stats[loser]['overs']
    
    else:
        nrr = (team_stats[winner]['runs'] - team_stats[loser]['runs']) / 20
    
    if winner == 'no result' or data['info']['outcome'].get('method') == 'Awarded': # For our purposes, matches with no result do not matter for Elo rating updates
        return {
        'date': date,
        'winner': winner,
        # 'city': city,
        'venue': venue,
        'first_team': first,
        'second_team': second,
        'first_runs': float('nan'),
        'first_balls': float('nan'),
        'first_overs': float('nan'),
        'second_runs': float('nan'),
        'second_balls': float('nan'),
        'second_overs': float('nan'),
        'net_run_rate': nrr
    }
    
    return {
        'date': date,
        'winner': winner,
        # 'city': city,
        'venue': venue,
        'first_team': first,
        'second_team': second,
        'first_runs': team_stats[first]['runs'],
        'first_balls': team_stats[first]['balls'],
        'first_overs': team_stats[first]['overs'],
        'second_runs': team_stats[second]['runs'],
        'second_balls': team_stats[second]['balls'],
        'second_overs': team_stats[second]['overs'],
        'net_run_rate': nrr
    }

In [61]:
# Example usage
summary = extract_cricket_summary("ipl-data/335984.json")
# summary = extract_cricket_summary("data/1407719.json")
print(summary)

{'date': '2008-04-19', 'winner': 'Delhi Daredevils', 'venue': 'Feroz Shah Kotla', 'first_team': 'Rajasthan Royals', 'second_team': 'Delhi Daredevils', 'first_runs': 129, 'first_balls': 120, 'first_overs': 20.0, 'second_runs': 132, 'second_balls': 91, 'second_overs': 15.1, 'net_run_rate': 2.291721854304636}


In [62]:
def load_directory(directory):
    summaries = []

    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            full_path = os.path.join(directory, filename)
            try:
                summary = extract_cricket_summary(full_path)
                if summary:
                    summaries.append(summary)
            except Exception as e:
                print(f"Error processing {filename}: {e}")

    return pd.DataFrame(summaries)

In [63]:
def calculate_nrr(winner, first, second, first_runs, first_overs, second_runs, second_overs):
    if winner == 'tie':
        nrr = abs((first_runs - second_runs)/20)
    elif second_overs <= 20 and winner == second:
        nrr = second_runs/second_overs - first_runs/first_overs
    elif winner == first:
        nrr = (first_runs - second_runs) / 20
    
    return nrr

In [64]:
# # afg = pd.read_csv('afghanistan-T20I-matches.csv')
# # print(afg['winner'])
# # afg['net_run_rate'] = afg[['winner', 'first_team', 'second_team', 'first_runs', 'first_overs', 'second_runs',
#                           'second_overs']].apply(lambda x: calculate_nrr(x['winner'], x['first_team'], x['second_team'],
#                                                                          x['first_runs'], x['first_overs'], x['second_runs'],
#                                                                          x['second_overs']), axis=1)
# # afg.head()

In [65]:
# app_matches = pd.read_csv('appended-mlc-matches-2025.csv')
# app_matches['net_run_rate'] = app_matches[['winner', 'first_team', 'second_team', 'first_runs', 'first_overs', 'second_runs',
#                           'second_overs']].apply(lambda x: calculate_nrr(x['winner'], x['first_team'], x['second_team'],
#                                                                          x['first_runs'], x['first_overs'], x['second_runs'],
#                                                                          x['second_overs']), axis=1)
# app_matches.head()

In [66]:
# data = load_directory('mlc-data/')
# data = pd.concat([data, app_matches], axis=0)
# # data = pd.concat([data, afg], axis=0)
# data = data.sort_values(by=['date'], ascending=True)
# data.head()

In [67]:
elo_df = pd.read_csv('mlc_init_elos-21-6-25.csv')
elo_df = elo_df.rename({'Unnamed: 0': 'team'}, axis=1)
elo_df

Unnamed: 0,team,elo
0,Los Angeles Knight Riders,1339.525936
1,MI New York,1486.364577
2,San Francisco Unicorns,1709.00021
3,Seattle Orcas,1326.218874
4,Texas Super Kings,1551.413864
5,Washington Freedom,1587.476539


In [68]:
team_elos = {team: elo for team, elo in zip(elo_df['team'], elo_df['elo'])}
team_elos

{'Los Angeles Knight Riders': 1339.5259362580473,
 'MI New York': 1486.3645770041378,
 'San Francisco Unicorns': 1709.000209843167,
 'Seattle Orcas': 1326.218874210684,
 'Texas Super Kings': 1551.413863706444,
 'Washington Freedom': 1587.476538977519}

In [69]:
grounds_to_home = {
    'Church Street Park, Morrisville': 'neutral',
    'Grand Prairie Stadium': 'Texas Super Kings',
    'Grand Prairie Stadium, Dallas': 'Texas Super Kings',
    'Oakland Coliseum,Oakland': 'San Francisco Unicorns'
}

In [70]:
K_opt, alpha_opt, hga_opt, mrc_opt = 106.48059306482318, 406.58672146000106, 0.0, 0.65

In [71]:
upcoming = pd.read_csv('upcoming-mlc-matches-2025-dallas.csv')
upcoming['venue_country'] = upcoming['venue'].map(lambda x: grounds_to_home[x])
upcoming['date'] = pd.DatetimeIndex(upcoming['date'])
upcoming = upcoming.sort_values(by=['date'], ascending=True)
upcoming.head()

Unnamed: 0,date,first_team,second_team,venue,first_runs,first_balls,first_overs,second_runs,second_balls,second_overs,winner,venue_country
0,2025-06-21,MI New York,Washington Freedom,"Grand Prairie Stadium, Dallas",188.0,120.0,20.0,189.0,113.0,19.2,Washington Freedom,Texas Super Kings
1,2025-06-22,Los Angeles Knight Riders,Seattle Orcas,"Grand Prairie Stadium, Dallas",177.0,120.0,20.0,178.0,10.0,18.2,Los Angeles Knight Riders,Texas Super Kings
2,2025-06-22,Texas Super Kings,Washington Freedom,"Grand Prairie Stadium, Dallas",220.0,120.0,20.0,223.0,118.0,19.4,Washington Freedom,Texas Super Kings
3,2025-06-23,MI New York,San Francisco Unicorns,"Grand Prairie Stadium, Dallas",246.0,120.0,20.0,199.0,120.0,20.0,San Francisco Unicorns,Texas Super Kings
4,2025-06-24,Texas Super Kings,Los Angeles Knight Riders,"Grand Prairie Stadium, Dallas",,,,,,,,Texas Super Kings


In [72]:
upcoming['year'] = upcoming['date'].map(lambda x: x.year)
upcoming.head(1)

Unnamed: 0,date,first_team,second_team,venue,first_runs,first_balls,first_overs,second_runs,second_balls,second_overs,winner,venue_country,year
0,2025-06-21,MI New York,Washington Freedom,"Grand Prairie Stadium, Dallas",188.0,120.0,20.0,189.0,113.0,19.2,Washington Freedom,Texas Super Kings,2025


In [73]:
done = upcoming[~upcoming['winner'].isna()]
upcoming = upcoming[upcoming['winner'].isna()]
done

Unnamed: 0,date,first_team,second_team,venue,first_runs,first_balls,first_overs,second_runs,second_balls,second_overs,winner,venue_country,year
0,2025-06-21,MI New York,Washington Freedom,"Grand Prairie Stadium, Dallas",188.0,120.0,20.0,189.0,113.0,19.2,Washington Freedom,Texas Super Kings,2025
1,2025-06-22,Los Angeles Knight Riders,Seattle Orcas,"Grand Prairie Stadium, Dallas",177.0,120.0,20.0,178.0,10.0,18.2,Los Angeles Knight Riders,Texas Super Kings,2025
2,2025-06-22,Texas Super Kings,Washington Freedom,"Grand Prairie Stadium, Dallas",220.0,120.0,20.0,223.0,118.0,19.4,Washington Freedom,Texas Super Kings,2025
3,2025-06-23,MI New York,San Francisco Unicorns,"Grand Prairie Stadium, Dallas",246.0,120.0,20.0,199.0,120.0,20.0,San Francisco Unicorns,Texas Super Kings,2025


In [74]:
upcoming = upcoming.drop(['first_runs', 'first_balls', 'first_overs', 'second_runs', 'second_balls', 'second_overs',
                         'winner'], axis=1)
upcoming.shape

(7, 6)

In [75]:
done['net_run_rate'] = done[['winner', 'first_team', 'second_team', 'first_runs', 'first_overs', 'second_runs',
                          'second_overs']].apply(lambda x: calculate_nrr(x['winner'], x['first_team'], x['second_team'],
                                                                         x['first_runs'], x['first_overs'], x['second_runs'],
                                                                         x['second_overs']), axis=1)
done.head()

Unnamed: 0,date,first_team,second_team,venue,first_runs,first_balls,first_overs,second_runs,second_balls,second_overs,winner,venue_country,year,net_run_rate
0,2025-06-21,MI New York,Washington Freedom,"Grand Prairie Stadium, Dallas",188.0,120.0,20.0,189.0,113.0,19.2,Washington Freedom,Texas Super Kings,2025,0.44375
1,2025-06-22,Los Angeles Knight Riders,Seattle Orcas,"Grand Prairie Stadium, Dallas",177.0,120.0,20.0,178.0,10.0,18.2,Los Angeles Knight Riders,Texas Super Kings,2025,-0.05
2,2025-06-22,Texas Super Kings,Washington Freedom,"Grand Prairie Stadium, Dallas",220.0,120.0,20.0,223.0,118.0,19.4,Washington Freedom,Texas Super Kings,2025,0.494845
3,2025-06-23,MI New York,San Francisco Unicorns,"Grand Prairie Stadium, Dallas",246.0,120.0,20.0,199.0,120.0,20.0,San Francisco Unicorns,Texas Super Kings,2025,-2.35


In [76]:
team_elos = generate_elo_ratings(done, team_elos, K=K_opt, alpha=alpha_opt, hga=hga_opt, mean_rev_const=mrc_opt)
team_elos = dict(sorted(team_elos.items(), key=lambda item: item[1], reverse=True))
team_elos

{'San Francisco Unicorns': 1733.177848654401,
 'Washington Freedom': 1618.974045365158,
 'Texas Super Kings': 1533.3990108087899,
 'MI New York': 1448.7042847029193,
 'Los Angeles Knight Riders': 1342.010677468849,
 'Seattle Orcas': 1323.7341329998821}

In [77]:
upcoming['first_team_win_chance'] = upcoming.apply(lambda x: prob_win_country_A(team_elos, x['first_team'], x['second_team'],
                                                                               x['venue_country'], alpha=alpha_opt, hga=hga_opt), axis=1)
upcoming['second_team_win_chance'] = upcoming['first_team_win_chance'].map(lambda x: 1 - x)
upcoming

Unnamed: 0,date,first_team,second_team,venue,venue_country,year,first_team_win_chance,second_team_win_chance
4,2025-06-24,Texas Super Kings,Los Angeles Knight Riders,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.747226,0.252774
5,2025-06-25,San Francisco Unicorns,Seattle Orcas,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.910419,0.089581
6,2025-06-26,Los Angeles Knight Riders,Washington Freedom,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.17243,0.82757
7,2025-06-27,MI New York,Seattle Orcas,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.6699,0.3301
8,2025-06-28,San Francisco Unicorns,Washington Freedom,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.65628,0.34372
10,2025-06-28,Los Angeles Knight Riders,Seattle Orcas,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.525853,0.474147
9,2025-06-29,Texas Super Kings,MI New York,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.617664,0.382336


In [78]:
abbrs = {
    'San Francisco Unicorns': 'SFU',
    'Los Angeles Knight Riders': 'LAKR',
    'MI New York': 'MINY',
    'Seattle Orcas': 'SEO',
    'Texas Super Kings': 'TSK',
    'Washington Freedom': 'WF'
}

In [79]:
curr_elos = pd.DataFrame([team_elos, abbrs], index=[0, 1]).T
curr_elos = curr_elos.rename({0: 'Current Elo', 1:'Abbr.'}, axis=1)
curr_elos = curr_elos[['Abbr.', 'Current Elo']]
curr_elos.to_csv('curr_elos.csv')
curr_elos['Current Elo'] = curr_elos['Current Elo'].map(lambda x: int(np.round(x, 0)))
curr_elos

Unnamed: 0,Abbr.,Current Elo
San Francisco Unicorns,SFU,1733
Washington Freedom,WF,1619
Texas Super Kings,TSK,1533
MI New York,MINY,1449
Los Angeles Knight Riders,LAKR,1342
Seattle Orcas,SEO,1324


In [80]:
curr_elos.to_csv('curr_elos_display.csv')

In [81]:
# str(int(np.round(max(0.3, 0.7) * 100, 0)))

In [82]:
upcoming_disp = upcoming.copy()
upcoming_disp['Match'] = upcoming_disp.apply(lambda x: abbrs[x['first_team']] + ' v. ' + abbrs[x['second_team']], axis=1)
upcoming_disp['Odds'] = upcoming_disp.apply(lambda x: str(int(np.round(max(x['first_team_win_chance'], x['second_team_win_chance']) * 100, 0))) +
                                                         '% ' + str((abbrs[x['first_team']] if x['first_team_win_chance'] > x['second_team_win_chance']
                                                                else abbrs[x['second_team']])), axis=1)
upcoming_disp['Predicted Winner'] = upcoming_disp.apply(lambda x: (abbrs[x['first_team']] if x['first_team_win_chance'] > x['second_team_win_chance']
                                                                else abbrs[x['second_team']]), axis=1)
upcoming_disp

Unnamed: 0,date,first_team,second_team,venue,venue_country,year,first_team_win_chance,second_team_win_chance,Match,Odds,Predicted Winner
4,2025-06-24,Texas Super Kings,Los Angeles Knight Riders,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.747226,0.252774,TSK v. LAKR,75% TSK,TSK
5,2025-06-25,San Francisco Unicorns,Seattle Orcas,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.910419,0.089581,SFU v. SEO,91% SFU,SFU
6,2025-06-26,Los Angeles Knight Riders,Washington Freedom,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.17243,0.82757,LAKR v. WF,83% WF,WF
7,2025-06-27,MI New York,Seattle Orcas,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.6699,0.3301,MINY v. SEO,67% MINY,MINY
8,2025-06-28,San Francisco Unicorns,Washington Freedom,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.65628,0.34372,SFU v. WF,66% SFU,SFU
10,2025-06-28,Los Angeles Knight Riders,Seattle Orcas,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.525853,0.474147,LAKR v. SEO,53% LAKR,LAKR
9,2025-06-29,Texas Super Kings,MI New York,"Grand Prairie Stadium, Dallas",Texas Super Kings,2025,0.617664,0.382336,TSK v. MINY,62% TSK,TSK


In [83]:
upcoming_disp = upcoming_disp.rename({'date': 'Date'}, axis=1)
upcoming_disp['venue'] = upcoming_disp['venue'].map(lambda x: '^Venue: ' + x + '^')
upcoming_disp['Match'] = upcoming_disp['Match'] + upcoming_disp['venue']
upcoming_disp = upcoming_disp[['Date', 'Match', 'Odds', 'Predicted Winner']]
upcoming_disp

Unnamed: 0,Date,Match,Odds,Predicted Winner
4,2025-06-24,"TSK v. LAKR^Venue: Grand Prairie Stadium, Dallas^",75% TSK,TSK
5,2025-06-25,"SFU v. SEO^Venue: Grand Prairie Stadium, Dallas^",91% SFU,SFU
6,2025-06-26,"LAKR v. WF^Venue: Grand Prairie Stadium, Dallas^",83% WF,WF
7,2025-06-27,"MINY v. SEO^Venue: Grand Prairie Stadium, Dallas^",67% MINY,MINY
8,2025-06-28,"SFU v. WF^Venue: Grand Prairie Stadium, Dallas^",66% SFU,SFU
10,2025-06-28,"LAKR v. SEO^Venue: Grand Prairie Stadium, Dallas^",53% LAKR,LAKR
9,2025-06-29,"TSK v. MINY^Venue: Grand Prairie Stadium, Dallas^",62% TSK,TSK


In [84]:
upcoming_disp.to_csv('upcoming_display.csv')