In [27]:
import pandas as pd
import numpy as np
from math import ceil
import sqlite3
from pprint import pprint
conn = sqlite3.connect('horses.db')
curs = conn.cursor()

def convert_to_odds(wins, trials):
    numer = 1 - (wins/trials)
    denom = wins/trials
    scalar = 1/denom
    numer *= scalar
    denom = 1
    if numer < denom:
        denom = (denom * 100) / (numer * 100)
        denom = round(denom,5)
        numer = 1
    return (numer, denom)

def round_plus(frac_tuple):
    numer, denom = frac_tuple
    if numer >= 4 and denom == 1:
        numer = numer//1
        return (int(numer), int(denom))
    elif denom == 4:
        return (int(numer//1), int(denom//1))
    elif numer % 1 < 0.5:
        return (int(numer//1), int(denom//1))
    else:
        return round_plus((numer*2, denom*2))

def round_minus(frac_tuple):
    numer, denom = frac_tuple
    if denom >= 4 and numer == 1:
        denom = ceil(denom)
        return (int(numer), int(denom))
    elif numer == 4:
        return (int(ceil(numer)), int(ceil(denom)))
    elif denom % 1 >= 0.5:
        return (int(round(numer)), int(round(denom)))
    else:
        return round_minus((numer*2, denom*2))

def round_odds(frac_tuple):
    numer, denom = frac_tuple
    if numer > denom:
        return round_plus(frac_tuple)
    else:
        return round_minus(frac_tuple)

class Horse():
    
    def __init__(self, name, speed_rating, cons_rating, end_rating):
        self.name = name
        self.top_speed = speed_rating
        self.consistency = cons_rating
        self.endurance = end_rating
        self.position = 0
        self.finished = False
    
    def get_velocity(self, time_df, distance):
        time_df['rating'] = -pd.qcut(time_df['top_speed'], 8, labels=False) + 8
        samples = time_df[time_df.rating == self.top_speed]
        mps = distance/samples['top_speed']
        xbar, sigma = mps.mean(), mps.std()
        self.velocity = np.random.normal(xbar, sigma)
        return
    
    def get_stdev(self, times_df, distance):
        times = times_df[['horse_id','finish_time']].copy()
        times['finish_time'] = distance/times['finish_time']
        st_devs = times.groupby(['horse_id'],as_index=False).std()
        st_devs['rating'] = -pd.qcut(st_devs['finish_time'], 8, labels=False) + 8
        samples = st_devs[st_devs.rating == self.consistency]
        xbar, sigma = samples.finish_time.mean(), samples.finish_time.std()
        min_stdev = min(samples.finish_time)
        self.stdev = max((min_stdev, np.random.normal(xbar, sigma)))
        return
    
    def get_fatigue(self, times_df, distance):
        num_sects = distance // 400 + (distance % 400 > 50)
        last_sect = 'time' + str(num_sects)
        times_df['time_diff'] = 400/times_df[last_sect] - 400/times_df['time2']
        times = times_df[['horse_id','time_diff']]
        times = times.groupby(['horse_id'],as_index=False).mean()
        times['rating'] = -pd.qcut(times['time_diff'], 8, labels=False) + 8
        samples = times[times.rating == self.endurance]
        xbar, sigma = samples.time_diff.mean(), samples.time_diff.std()
        self.fatigue = np.random.normal(xbar, sigma)
        return
    
    def move(self, distance):
        if not self.finished:
            step = np.random.normal(self.velocity, self.stdev)
            if self.position >= distance - 400:
                step -= self.fatigue
            self.position += step
            if self.position % (distance/4) < step:
                self.stdev *= 2
                self.fatigue *= 1.1
        return
     
    def copy(self):
        return Horse(self.name, self.top_speed, self.consistency, self.endurance)
    
class Track:
    SPEED_SQL = """
            WITH ranked_times AS 
            (
              SELECT horse_id, finish_time, ROW_NUMBER() OVER (PARTITION BY horse_id ORDER BY finish_time) AS row_num
              FROM tRuns 
              JOIN tRaces USING(race_id)
              WHERE distance = :distance
            )
            SELECT horse_id, num_races, avg(finish_time) as top_speed
            FROM (SELECT horse_id, count(horse_id) as num_races FROM ranked_times GROUP BY horse_id)
            JOIN ranked_times USING(horse_id)
            WHERE row_num <= 3
                AND num_races >= 10
            GROUP BY horse_id;
         """
    CONS_SQL = """
            WITH horses_dist AS 
            (
              SELECT horse_id, time1, time2, time3, time4, time5, time6, finish_time
              FROM tRuns 
              JOIN tRaces USING(race_id)
              WHERE distance = :distance
            )
            SELECT horse_id, time1, time2, time3, time4, time5, time6, finish_time
            FROM (SELECT horse_id, count(horse_id) as num_races FROM horses_dist GROUP BY horse_id)
            JOIN horses_dist USING(horse_id)
            WHERE num_races >= 10
            ORDER BY horse_id;
            """
    def __init__(self, distance, conn):
        self.distance = distance
        self.conn = conn
        self.grouped_data, self.ungrouped_data = self.get_dist_data()
        
    def get_dist_data(self):
        params = {'distance':self.distance}        
        top_speeds_df = pd.read_sql(self.SPEED_SQL, self.conn, params=params)
        times_df = pd.read_sql(self.CONS_SQL, self.conn, params=params)
        return top_speeds_df, times_df

    
class Race:
    VALID_DISTANCES = [1200, 1400, 1650, 1000, 1600, 1800]
    
    def __init__(self, conn, horses='random', track='random', num_horses='random'):
        self.conn = conn
        self.horses = horses
        self.track = track
        self.num_horses = num_horses
        if self.horses == 'random':
            self.generate_random_horses()
        if self.track == 'random':
            self.generate_random_track()
        pprint(self.get_race_odds())
        
        
    def generate_random_horses(self):
        if self.num_horses == 'random':
            self.num_horses = np.random.randint(4, 21)
        horses = []
        with open('horse_names.txt', 'r') as h:
            names = h.readlines()
        num_names = len(names)
        used_names = []
        for _ in range(self.num_horses):
            while True:
                name_idx = np.random.randint(1, num_names)
                name = names[name_idx].split('\n')[0]
                if name not in used_names:
                    used_names.append(name)
                    break
            speed = np.random.randint(1, 9)
            cons = np.random.randint(1, 9)
            endur = np.random.randint(1, 9)
            horses.append(Horse(name, speed, cons, endur))
        h.close()
        self.horses = horses
            
            
    def generate_random_track(self):
        track_idx = np.random.randint(1, len(self.VALID_DISTANCES))
        distance = self.VALID_DISTANCES[track_idx]
        self.track = Track(distance, self.conn)
        
    
    
    def preprocess(self):
        self.winner = False
        distance = self.track.distance
        for horse in self.horses:
            horse.get_velocity(self.track.grouped_data.copy(), distance)
            horse.get_stdev(self.track.ungrouped_data.copy(), distance)
            horse.get_fatigue(self.track.ungrouped_data.copy(), distance)
            horse.position = 0
            horse.finished = False
        return
    
    def get_race_odds(self, n=100):
        odds = {horse.name:0 for horse in self.horses}
        for _ in range(n):
            self.simulate_race(show_finishers=False)
            odds[self.winner.name] += 1
        for key, value in odds.items():
            if value == 0: 
                odds[key] += 0.75
            if odds[key] == n:
                odds[key] -= 0.25
            odds[key] = round_odds(convert_to_odds(odds[key], n))
        return odds
    
    def simulate_race(self, show_finishers=True):
        self.preprocess()
        place = 1
        time = 0
        while True:
            time += 1
            for horse in self.horses:
                horse.move(self.track.distance)
            if any([horse.position >= self.track.distance and not horse.finished  for horse in self.horses]):
                finishers = [horse for horse in self.horses if horse.position >= self.track.distance and not horse.finished]
                ranked_finishers = sorted(finishers, key=lambda x:x.position)
                for horse in ranked_finishers:
                    if show_finishers:
                        print(f'{horse.name} finishes number {place} in {time} seconds!')
                    horse.finished = True
                    if not self.winner:
                        self.winner = horse
                    place += 1
            if all([horse.finished for horse in self.horses]):
                break 
        return     
    
    def determine_winner(self):
        winner = max(self.horses, key=lambda horse: horse.position)
        print(f"The winner is {winner.name}!")      
        return
        

In [29]:
a = Horse('Horse A', 8,8,8)
b = Horse('Horse B', 1,8,8)
c = Horse('Horse C', 8,4,2)
d = Horse('Horse D', 6,5,7)
e = Horse('Horse E', 7,7,5)
f = Horse('Horse F', 8,2,4)
g = Horse('Horse G', 7,2,3)

t = Track(1400, conn)

r = Race(conn, [a,b,c,d,e,f,g], t)
r.simulate_race()

{'Donald': (32, 1), 'Ty': (1, 33)}
Ty finishes number 1 in 80 seconds!
Donald finishes number 2 in 82 seconds!


In [30]:
r = Race(conn,track=Track(1400,conn))
r.simulate_race()

{'Alan-a-Dale': (132, 1),
 'Clyde Van Dusen': (24, 1),
 'Medina Spirit': (132, 1),
 'Monarchos': (132, 1),
 'Needles': (11, 4),
 'Northern Dancer': (2, 1),
 'Plaudit': (5, 1),
 'Strike the Gold': (11, 1),
 'Winning Colors': (4, 1)}
Winning Colors finishes number 1 in 81 seconds!
Northern Dancer finishes number 2 in 81 seconds!
Needles finishes number 3 in 81 seconds!
Strike the Gold finishes number 4 in 82 seconds!
Plaudit finishes number 5 in 82 seconds!
Clyde Van Dusen finishes number 6 in 82 seconds!
Monarchos finishes number 7 in 83 seconds!
Alan-a-Dale finishes number 8 in 83 seconds!
Medina Spirit finishes number 9 in 83 seconds!


In [10]:
r = Race(conn,track=Track(1400,conn))
r.simulate_race()

{'Alan-a-Dale': (8, 1),
 'Azra': (99, 1),
 'Barbaro': (132, 1),
 'California Chrome': (32, 1),
 'Carry Back': (1, 3),
 'Chateaugay': (132, 1),
 'Hindoo': (99, 1),
 'Jet Pilot': (49, 1),
 'Swale': (32, 1),
 'War Admiral': (15, 1)}
Swale finishes number 1 in 83 seconds!
Carry Back finishes number 2 in 83 seconds!
Jet Pilot finishes number 3 in 84 seconds!
Hindoo finishes number 4 in 84 seconds!
California Chrome finishes number 5 in 84 seconds!
Barbaro finishes number 6 in 84 seconds!
War Admiral finishes number 7 in 84 seconds!
Alan-a-Dale finishes number 8 in 84 seconds!
Chateaugay finishes number 9 in 85 seconds!
Azra finishes number 10 in 85 seconds!


In [3]:
r.simulate_race()

Gallant Fox finishes number 1 in 94 seconds!
Bold Venture finishes number 2 in 95 seconds!
Determine finishes number 3 in 96 seconds!
Lawrin finishes number 4 in 96 seconds!
Sunday Silence finishes number 5 in 97 seconds!
Stone Street finishes number 6 in 97 seconds!


In [301]:
counts = {horse.name:0 for horse in r.horses}
for _ in range(35):
    r.simulate_race()
    counts[r.winner.name] += 1
counts

{'Jeremy': 5,
 'Donald': 6,
 'Horse B': 8,
 'Ron': 5,
 'Tijuan': 6,
 'Xavier': 2,
 'John': 3}

In [291]:
def Get_Race_Odds(race, n=35):
    odds = {horse.name:0 for horse in race.horses}
    for _ in range(n):
        race.simulate_race()
        odds[race.winner.name] += 1
    for key, value in odds.items():
        if value == 0: 
            odds[key] += 0.75
        odds[key] = round_odds(convert_to_odds(odds[key], n))
    return odds

In [292]:
Get_Race_Odds(r, 50)

{'Jeremy': (11, 1),
 'Donald': (7, 2),
 'Horse B': (5, 1),
 'Ron': (4, 1),
 'Tijuan': (6, 1),
 'Xavier': (4, 1),
 'John': (24, 1)}

In [258]:
r.simulate_race()

In [199]:
total = sum(counts.values())
for key in counts.keys():
    counts[key] = round_odds(convert_to_odds(counts[key], total))
counts

4.833333333333333 1
1.3333333333333333 1
3.375 1
4.833333333333333 1


ZeroDivisionError: float division by zero

In [200]:
counts

{'Jeremy': (4, 1),
 'Donald': (1, 1),
 'Horse B': (3, 1),
 'Ron': (4, 1),
 'Tijuan': 0,
 'Xavier': 0,
 'John': 0}

In [241]:
def convert_to_odds(wins, trials):
    numer = 1 - (wins/trials)
    denom = wins/trials
    scalar = 1/denom
    numer *= scalar
    denom = 1
    if numer < denom:
        denom = (denom * 100) / (numer * 100)
        denom = round(denom,5)
        numer = 1
    return (numer, denom)

In [338]:
def round_plus(frac_tuple):
    # For plus odds rn
    numer, denom = frac_tuple
    if numer >= 4 and denom == 1:
        numer = numer//1
        return (int(numer), int(denom))
    elif denom == 4:
        return (int(numer//1), int(denom//1))
    elif numer % 1 < 0.5:
        return (int(numer//1), int(denom//1))
    else:
        return round_plus((numer*2, denom*2))

def round_minus(frac_tuple):
    numer, denom = frac_tuple
    if denom >= 4 and numer == 1:
        denom = ceil(denom)
        return (int(numer), int(denom))
    elif numer == 4:
        return (int(ceil(numer)), int(ceil(denom)))
    elif denom % 1 >= 0.5:
        return (int(round(numer)), int(round(denom)))
    else:
        return round_minus((numer*2, denom*2))

def round_odds(frac_tuple):
    numer, denom = frac_tuple
    if numer > denom:
        return round_plus(frac_tuple)
    else:
        return round_minus(frac_tuple)

In [334]:
frac = convert_to_odds(21, 37)
frac

(1, 1.3125)

In [339]:
round_odds((1,1.5))

(1, 2)

In [144]:
x = ()
x[1]

(2, 1)

In [45]:
conn.close()

In [11]:
x = 3
y = 2


SyntaxError: invalid syntax (2805911722.py, line 3)

In [194]:
int((2.1,2.3))

TypeError: int() argument must be a string, a bytes-like object or a number, not 'tuple'

In [174]:
int(2.9)

2

2

In [348]:
with open('horse_names.txt', 'r') as h:
    names = h.readlines()
idx = np.random.randint(len(names))
print(names[idx])
h.close()

Omar Khayyam



In [356]:
np.random.randint(6)

0