In [1]:
import numpy as np
import pandas as pd
import random
from deap import base, creator, tools, algorithms
import json
from joblib import Parallel, delayed

In [2]:
# Define the number of innings as a global variable
num_innings = 4

In [3]:
# Input the players who will play in today's game
playing_today = [1, 2, 4, 7, 8, 3, 5]

In [4]:
#Load player stats

with open('player_stats.json', 'r') as f:
    player_stats = json.load(f)

In [5]:
#Show player stats before updating
list(player_stats.values())

[{'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 0}]

In [6]:
#Function to update player stats after each game
def update_player_stats(df, player_stats):
    for _, row in df.iterrows():
        player_id = str(row['player_id'])
        player_stats[player_id]['bases_advanced'] += row['bases_advanced']
        player_stats[player_id]['out'] += row['outs']
        player_stats[player_id]['home_run'] += row['home_runs']
        player_stats[player_id]['at_bats'] += row['at_bats']

In [7]:
# Example DataFrame containing the previous game data
# DataFrame structure:
# - game_id: Identifier for the game
# - player_id: Identifier for the player
# - batting_order: Position of the player in the batting order
# - bases_advanced: Number of bases the player advanced in this game
# - outs: Number of times the player got out in this game, meaning they didn't get on base. 
#         Do NOT count an out if they get out while running the bases.
# - home_runs: Number of home runs the player hit in this game
# - at_bats: Number of times the player batted in this game

data = {
    'game_id': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
    'player_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
    'batting_order': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
    'bases_advanced': [4, 2, 3, 5, 1, 0, 1, 3, 2, 1, 3, 4, 2, 0, 1],
    'outs': [1, 1, 1, 0, 2, 0, 0, 1, 2, 0, 1, 1, 0, 1, 1],
    'home_runs': [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'at_bats': [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3]
}

In [8]:
# Assuming df is the DataFrame containing the latest game data
df = pd.DataFrame(data)
update_player_stats(df, player_stats)

In [9]:
#Show player_stats after updating
list(player_stats.values())

[{'bases_advanced': 4, 'out': 1, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 2, 'out': 1, 'home_run': 1, 'at_bats': 4},
 {'bases_advanced': 3, 'out': 1, 'home_run': 1, 'at_bats': 4},
 {'bases_advanced': 5, 'out': 0, 'home_run': 1, 'at_bats': 4},
 {'bases_advanced': 1, 'out': 2, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 0, 'out': 0, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 1, 'out': 0, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 3, 'out': 1, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 2, 'out': 2, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 1, 'out': 0, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 3, 'out': 1, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 4, 'out': 1, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 2, 'out': 0, 'home_run': 0, 'at_bats': 4},
 {'bases_advanced': 0, 'out': 1, 'home_run': 0, 'at_bats': 3},
 {'bases_advanced': 1, 'out': 1, 'home_run': 0, 'at_bats': 3}]

In [10]:
#Define the simulations of innings and games

def simulate_inning(batting_order, player_stats, start_batter_index):
    bases = [0, 0, 0]
    outs = 0
    runs = 0
    batter_index = start_batter_index
    num_players = len(batting_order)
    
    while outs < 3:
        batter = batting_order[batter_index % num_players]
        stats = player_stats[str(batter)]
        
        # Calculate probabilities based on stats
        at_bats = stats['at_bats']
        if at_bats == 0:
            outs += 1
            batter_index += 1
            continue
            
        prob_out = stats['out'] / at_bats
        prob_home_run = stats['home_run'] / at_bats
        prob_bases_advanced = stats['bases_advanced'] / at_bats
        
        result = np.random.choice(
            ['out', 'home_run', 'bases_advanced'],
            p=[prob_out, prob_home_run, 1 - prob_out - prob_home_run]
        )
        
        if result == 'out':
            outs += 1
        elif result == 'home_run':
            runs += sum(bases) + 1
            bases = [0, 0, 0]
        else:
            bases_advanced = min(int(prob_bases_advanced), 3)
            for i in range(2, -1, -1):
                if bases[i] == 1:
                    if i + bases_advanced >= 3:
                        runs += 1
                        bases[i] = 0
                    else:
                        bases[i + bases_advanced] = 1
                        bases[i] = 0
            bases[bases_advanced - 1] = 1
            
        batter_index += 1
        
    return runs, batter_index

def simulate_game(batting_order, player_stats, num_innings=num_innings, num_simulations=2):
    total_runs = 0
    
    for _ in range(num_simulations):
        runs = 0
        batter_index = 0
        for _ in range(num_innings):
            inning_runs, batter_index = simulate_inning(batting_order, player_stats, batter_index)
            runs += inning_runs
        total_runs += runs
    
    return total_runs / num_simulations


In [11]:
# Define the Genetic Algorithm setup that will find the batting order that maximizes runs per game
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("indices", random.sample, range(len(playing_today)), len(playing_today))
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.indices)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

def evalBattingOrder(individual):
    order = [playing_today[i] for i in individual]
    avg_runs = np.mean([simulate_game(order, player_stats, num_innings=num_innings) for _ in range(len(playing_today))])
    return avg_runs,

# Parallelize the evaluation of individuals
def parallel_eval_population(population):
    fitnesses = Parallel(n_jobs=-1)(delayed(evalBattingOrder)(ind) for ind in population)
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

toolbox.register("mate", tools.cxOrdered)
toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evalBattingOrder)

def find_optimal_order(player_stats, playing_today, pop_size, num_gens):
    population = toolbox.population(n=pop_size)
    for gen in range(num_gens):
        offspring = algorithms.varAnd(population, toolbox, cxpb=0.7, mutpb=0.2)
        parallel_eval_population(offspring)
        population = toolbox.select(offspring, k=len(population))
    best_individual = tools.selBest(population, 1)[0]
    best_order = [playing_today[i] for i in best_individual]
    best_avg_runs = evalBattingOrder(best_individual)[0]
    return best_order, best_avg_runs

In [12]:
# Re-run the optimization before today's game
#Lower the pop_size and num_gens to speed up computation at the expense of solution quality
optimal_order, best_avg_runs = find_optimal_order(player_stats, playing_today, pop_size=50, num_gens=10)
print(f"Optimal batting order: {optimal_order}, Average runs: {best_avg_runs}")

Optimal batting order: [1, 3, 7, 4, 5, 8, 2], Average runs: 22.071428571428573


In [13]:
#Save player stats

#with open('player_stats.json', 'w') as f:
#    json.dump(player_stats, f)