In [1]:
import pandas as pd
import numpy as np
from RefrenceDict import teams
import json

In [2]:
# insert file.csv into dataframe
df = pd.read_csv('mlb_odds_data_2022.csv', index_col=0)
df.head()

Unnamed: 0,timeGame,team1Name,team1Score,team1Odds,team2Name,team2Score,team2Odds
0,16:05,Chicago Cubs,5,1.66,Oakland Athletics,4,2.15
1,16:05,Los Angeles Dodgers,6,1.55,Cleveland Guardians,2,2.36
2,16:05,San Francisco Giants,3,1.53,Arizona Diamondbacks,5,2.41
3,16:05,Texas Rangers,14,2.2,Chicago White Sox,5,1.64
4,13:05,Atlanta Braves,7,1.78,Tampa Bay Rays,7,1.98


In [3]:
# convert dtypes
df.dtypes

timeGame       object
team1Name      object
team1Score      int64
team1Odds     float64
team2Name      object
team2Score      int64
team2Odds     float64
dtype: object

In [4]:
result = []
winningOdds = []
for tm1, tm2, sc1, sc2, od1, od2 in zip(df['team1Name'], df['team2Name'], df['team1Score'], df['team2Score'], df['team1Odds'], df['team2Odds']):
    if (sc1 > sc2):
        result.append(tm1)
        winningOdds.append(od1)
    else:
        result.append(tm2)
        winningOdds.append(od2)
df['result'] = result
df['winningOdds'] = winningOdds
df.head()

Unnamed: 0,timeGame,team1Name,team1Score,team1Odds,team2Name,team2Score,team2Odds,result,winningOdds
0,16:05,Chicago Cubs,5,1.66,Oakland Athletics,4,2.15,Chicago Cubs,1.66
1,16:05,Los Angeles Dodgers,6,1.55,Cleveland Guardians,2,2.36,Los Angeles Dodgers,1.55
2,16:05,San Francisco Giants,3,1.53,Arizona Diamondbacks,5,2.41,Arizona Diamondbacks,2.41
3,16:05,Texas Rangers,14,2.2,Chicago White Sox,5,1.64,Texas Rangers,2.2
4,13:05,Atlanta Braves,7,1.78,Tampa Bay Rays,7,1.98,Tampa Bay Rays,1.98


In [5]:
# if you bet $1 on every game and won every bet, how much money can you make?
def ReturnStandard(df):
    high = 0
    mid = 0
    low = 0
    for rslt, tm1, tm2, odds1, odds2 in zip(df['result'], df['team1Name'], df['team2Name'], df['team1Odds'], df['team2Odds']):
        if (rslt == tm1):
            high += odds1
        else:
            high += odds2
        mid += 1

    print("highest possible earnings if one dollar is bet on every game: ${}".format(high))
    print("total saved if one dollar is put in piggy bank every game: ${}".format(mid))
    print("lowest possible earnings if one dollar is bet on every game: ${}".format(low))


if __name__ == "__main__":
    ReturnStandard(df)

highest possible earnings if one dollar is bet on every game: $4154.099999999998
total saved if one dollar is put in piggy bank every game: $2156
lowest possible earnings if one dollar is bet on every game: $0


In [6]:
# count the number of wins in the year
def CountWins(data):
    freq = {}
    for tm1 in data:
        if (tm1 in freq):
            freq[tm1] += 1
        else:
            freq[tm1] = 1
    for key, value in freq.items():
        print("{} : {}".format(key, value))

if __name__ == "__main__":
    data = df['result']
    CountWins(data)

Chicago Cubs : 66
Los Angeles Dodgers : 90
Arizona Diamondbacks : 74
Texas Rangers : 68
Tampa Bay Rays : 74
Minnesota Twins : 68
Pittsburgh Pirates : 52
Houston Astros : 106
Miami Marlins : 54
Philadelphia Phillies : 84
New York Mets : 96
Los Angeles Angels : 68
Chicago White Sox : 78
Cleveland Guardians : 76
Kansas City Royals : 60
Cincinnati Reds : 58
Toronto Blue Jays : 86
Atlanta Braves : 82
Baltimore Orioles : 80
New York Yankees : 88
Colorado Rockies : 52
Seattle Mariners : 74
Boston Red Sox : 62
Detroit Tigers : 64
San Diego Padres : 76
San Francisco Giants : 70
Oakland Athletics : 50
St.Louis Cardinals : 84
Washington Nationals : 46
Milwaukee Brewers : 70


In [7]:
# theory: if the team has previously won more games than their opponent, then they are expected to win
# test: 
# 1) iterate through the rows 
"""
if tm1Wins > tm2Wins:
    predict = tm1
else:
    predict = tm2 

if predict == result:
    account += winOdds
# else we lose and make no money
# final comparison will be with 

"""
#   -   make prediction
#   -   track prediction outcome
#   -   compare with no risk outcome found to be: $2156
def PredictionModel(result, team1, team2, winningOdd):
    account = 0
    for rslt, tm1, tm2, wnod in zip(result, team1, team2, winningOdd):
        wns1, wns2 = GetWinTally(tm1, tm2)
        if wns1 > wns2:
            predict = tm1
        else:
            predict = tm2
        if predict == rslt:
            account += wnod
        UpdateWinTally(rslt)
    print("final account: ${}".format(account))
    print("break even amount: 2156")

def UpdateWinTally(rslt):
    with open("winDict.json", "r") as jsonFile:
        winDict = json.load(jsonFile)
    winDict[rslt] += 1
    with open("winDict.json", "w") as jsonFile:
        json.dump(winDict, jsonFile)

# read first then write
def GetWinTally(tm1, tm2):
    with open("winDict.json", "r") as openfile:
        winDict = json.load(openfile)
    wns1 = winDict[tm1]
    wns2 = winDict[tm2]
    return wns1, wns2


if __name__ == "__main__":
    data = df
    result = df['result']
    team1 = df['team1Name']
    team2 = df['team2Name']
    winningOdd = df['winningOdds']
    PredictionModel(result, team1, team2, winningOdd)

final account: $2235.8799999999947
break even amount: 2156


In [7]:
df

Unnamed: 0,timeGame,team1Name,team1Score,team1Odds,team2Name,team2Score,team2Odds,result,winningOdds
0,16:05,Chicago Cubs,5,1.66,Oakland Athletics,4,2.15,Chicago Cubs,1.66
1,16:05,Los Angeles Dodgers,6,1.55,Cleveland Guardians,2,2.36,Los Angeles Dodgers,1.55
2,16:05,San Francisco Giants,3,1.53,Arizona Diamondbacks,5,2.41,Arizona Diamondbacks,2.41
3,16:05,Texas Rangers,14,2.20,Chicago White Sox,5,1.64,Texas Rangers,2.20
4,13:05,Atlanta Braves,7,1.78,Tampa Bay Rays,7,1.98,Tampa Bay Rays,1.98
...,...,...,...,...,...,...,...,...,...
2151,20:03,Houston Astros,5,1.61,Philadelphia Phillies,6,2.42,Philadelphia Phillies,2.42
2152,19:07,New York Yankees,5,1.83,Houston Astros,6,2.05,Houston Astros,2.05
2153,14:37,Philadelphia Phillies,4,1.72,San Diego Padres,3,2.20,Philadelphia Phillies,1.72
2154,19:45,Philadelphia Phillies,10,1.89,San Diego Padres,6,1.96,Philadelphia Phillies,1.89


In [102]:
teams = {
        "Chicago White Sox": 0,
        "Cleveland Guardians": 0,
        "Detroit Tigers": 0,
        "Kansas City Royals": 0,
        "Minnesota Twins": 0,
        "Baltimore Orioles": 0,
        "Boston Red Sox": 0,
        "New York Yankees": 0,
        "Tampa Bay Rays": 0,
        "Toronto Blue Jays": 0,
        "Houston Astros": 0,
        "Los Angeles Angels": 0,
        "Oakland Athletics": 0,
        "Seattle Mariners": 0,
        "Texas Rangers": 0,
        "Chicago Cubs": 0,
        "Cincinnati Reds": 0,
        "Milwaukee Brewers": 0,
        "Pittsburgh Pirates": 0,
        "St.Louis Cardinals": 0,
        "Atlanta Braves": 0,
        "Miami Marlins": 0,
        "New York Mets": 0,
        "Philadelphia Phillies": 0,
        "Washington Nationals": 0,
        "Arizona Diamondbacks": 0,
        "Colorado Rockies": 0,
        "Los Angeles Dodgers": 0,
        "San Diego Padres": 0,
        "San Francisco Giants": 0
}

# Serializing json
json_object = json.dumps(teams, indent=4)

with open("winDict.json", "w") as outfile:
    outfile.write(json_object)



In [84]:
def CountTeams(data):
    freq = {}
    for tm1 in data:
        if (tm1 in freq):
            freq[tm1] =0
    for key, value in freq.items():
        print("{} : {}".format(key, value))

# Driver function
if __name__ == "__main__":
    data = df['team1Name']
    CountWins(data)

Chicago Cubs : 62
Los Angeles Dodgers : 84
San Francisco Giants : 68
Texas Rangers : 64
Atlanta Braves : 78
Boston Red Sox : 74
Detroit Tigers : 82
Houston Astros : 66
Miami Marlins : 70
Philadelphia Phillies : 86
New York Mets : 72
Arizona Diamondbacks : 78
Los Angeles Angels : 78
Milwaukee Brewers : 74
Seattle Mariners : 70
Cleveland Guardians : 72
Kansas City Royals : 72
Toronto Blue Jays : 72
Minnesota Twins : 72
Baltimore Orioles : 76
New York Yankees : 80
Colorado Rockies : 66
San Diego Padres : 68
Chicago White Sox : 74
St.Louis Cardinals : 72
Pittsburgh Pirates : 54
Oakland Athletics : 76
Washington Nationals : 70
Cincinnati Reds : 62
Tampa Bay Rays : 64
