In [241]:
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

In [244]:
def htmlToGames(html_file):
    '''
    input: path to .html file
    returns: bs4 result set of games on page
    
    reads html file from montana state lottery website daily scrathers pages and returns bs4 result set of games
    '''
    with open(html_file, 'r') as file:
        soup = BeautifulSoup(file, 'lxml')
    games_soup = soup.find("div",{"class":"col-md-12 bgw"}) #isolates the center column of page where all the games are
    games = games_soup.findAll("div",{"class":"row scratch-game pad50 mg0"}) #divides the games into a bs4 result set
    return games

def gamesToList(games_in):
    out = []
    today = datetime.today().strftime('%Y-%m-%d')
    for gameTag in games_in:
        out.append({"name":getGameName(gameTag),"date":today,"data":getGameData(gameTag)})
    return out

def getGameName(gameTag):
    return gameTag.find("span",{"scratch-game-title cr-main text-uppercase pull-left" }).text

def cleanCells(cells):
    out = {}
    for cell in cells:
        out[str(cell).split('data-bind="text: ')[1].split('"')[0]]=cell.text
    return out

def getGameData(gameTag):
    out = []
    rows = gameTag.tbody.findAll("tr")
    for row in rows[:-1]:
        cells = row.findAll("td")
        out.append(cleanCells(cells))
    out.append({'overall odds': rows[-1].text.split()[2]})
    
    return out
    
        
    
    
final = gamesToList(games)    

In [305]:
one_dollar_games = htmlToGames('data/20210406_one_dollar.html')
one_list = gamesToList(one_dollar_games)
one_list

[{'name': '3-2-Won!',
  'date': '2021-04-07',
  'data': [{'win': '$1', 'prize': '$1', 'odds': '1:7.14'},
   {'win': '$1 w/ DBLR', 'prize': '$2', 'odds': '1:50.00'},
   {'win': '$2', 'prize': '$2', 'odds': '1:50.00'},
   {'win': '$5', 'prize': '$5', 'odds': '1:66.67'},
   {'win': '$5 w/ DBLR', 'prize': '$10', 'odds': '1:200.00'},
   {'win': '$10', 'prize': '$10', 'odds': '1:200.00'},
   {'win': '$5 x 3', 'prize': '$15', 'odds': '1:400.00'},
   {'win': '$15', 'prize': '$15', 'odds': '1:400.00'},
   {'win': '$5 x 4', 'prize': '$20', 'odds': '1:1,428.57'},
   {'win': '$20', 'prize': '$20', 'odds': '1:2,000.00'},
   {'win': '$20 w/ DBLR + $10', 'prize': '$50', 'odds': '1:1,176.47'},
   {'win': '$50', 'prize': '$50', 'odds': '1:5,000.00'},
   {'win': '$50 w/ DBLR', 'prize': '$100', 'odds': '1:10,000.00'},
   {'win': '$100', 'prize': '$100', 'odds': '1:20,000.00'},
   {'win': '$1,500', 'prize': '$1,500', 'odds': '1:110,000.00'},
   {'overall odds': '4.71'}]},
 {'name': 'Gnome Me The Money',
 

In [307]:
twenty_dollar_games = htmlToGames('data/20210406_twenty_dollar.html')
twenty_dollar = gamesToList(twenty_dollar_games)
twenty_dollar

[{'name': '3-2-Won!',
  'date': '2021-04-07',
  'data': [{'win': '$1', 'prize': '$1', 'odds': '1:7.14'},
   {'win': '$1 w/ DBLR', 'prize': '$2', 'odds': '1:50.00'},
   {'win': '$2', 'prize': '$2', 'odds': '1:50.00'},
   {'win': '$5', 'prize': '$5', 'odds': '1:66.67'},
   {'win': '$5 w/ DBLR', 'prize': '$10', 'odds': '1:200.00'},
   {'win': '$10', 'prize': '$10', 'odds': '1:200.00'},
   {'win': '$5 x 3', 'prize': '$15', 'odds': '1:400.00'},
   {'win': '$15', 'prize': '$15', 'odds': '1:400.00'},
   {'win': '$5 x 4', 'prize': '$20', 'odds': '1:1,428.57'},
   {'win': '$20', 'prize': '$20', 'odds': '1:2,000.00'},
   {'win': '$20 w/ DBLR + $10', 'prize': '$50', 'odds': '1:1,176.47'},
   {'win': '$50', 'prize': '$50', 'odds': '1:5,000.00'},
   {'win': '$50 w/ DBLR', 'prize': '$100', 'odds': '1:10,000.00'},
   {'win': '$100', 'prize': '$100', 'odds': '1:20,000.00'},
   {'win': '$1,500', 'prize': '$1,500', 'odds': '1:110,000.00'},
   {'overall odds': '4.71'}]},
 {'name': 'Gnome Me The Money',
 

In [195]:
rows =games[0].tbody.findAll("tr")
rows

[<tr>
 <td class="text-center" data-bind="text: win">$1</td>
 <td class="text-center cr-main" data-bind="text: prize">$1</td>
 <td class="text-center" data-bind="text: odds">1:7.14</td>
 </tr>,
 <tr>
 <td class="text-center" data-bind="text: win">$1 w/ DBLR</td>
 <td class="text-center cr-main" data-bind="text: prize">$2</td>
 <td class="text-center" data-bind="text: odds">1:50.00</td>
 </tr>,
 <tr>
 <td class="text-center" data-bind="text: win">$2</td>
 <td class="text-center cr-main" data-bind="text: prize">$2</td>
 <td class="text-center" data-bind="text: odds">1:50.00</td>
 </tr>,
 <tr>
 <td class="text-center" data-bind="text: win">$5</td>
 <td class="text-center cr-main" data-bind="text: prize">$5</td>
 <td class="text-center" data-bind="text: odds">1:66.67</td>
 </tr>,
 <tr>
 <td class="text-center" data-bind="text: win">$5 w/ DBLR</td>
 <td class="text-center cr-main" data-bind="text: prize">$10</td>
 <td class="text-center" data-bind="text: odds">1:200.00</td>
 </tr>,
 <tr>
 <

In [238]:
rows[-1].text.split()[2]

'4.71'

In [204]:
for cell in cells:
    print({str(cell).split('data-bind="text: ')[1].split('"')[0]:cell.text})

{'win': '$1'}
{'prize': '$1'}
{'odds': '1:7.14'}


In [302]:
rows[0].findAll(string='data-blind')

[]

In [292]:
games[0].select('tr')

[<tr>
 <th class="text-center text-uppercase">Win</th>
 <th class="text-center text-uppercase cr-main">Prize</th>
 <th class="text-center text-uppercase">Odds</th>
 </tr>,
 <tr>
 <td class="text-center" data-bind="text: win">$1</td>
 <td class="text-center cr-main" data-bind="text: prize">$1</td>
 <td class="text-center" data-bind="text: odds">1:7.14</td>
 </tr>,
 <tr>
 <td class="text-center" data-bind="text: win">$1 w/ DBLR</td>
 <td class="text-center cr-main" data-bind="text: prize">$2</td>
 <td class="text-center" data-bind="text: odds">1:50.00</td>
 </tr>,
 <tr>
 <td class="text-center" data-bind="text: win">$2</td>
 <td class="text-center cr-main" data-bind="text: prize">$2</td>
 <td class="text-center" data-bind="text: odds">1:50.00</td>
 </tr>,
 <tr>
 <td class="text-center" data-bind="text: win">$5</td>
 <td class="text-center cr-main" data-bind="text: prize">$5</td>
 <td class="text-center" data-bind="text: odds">1:66.67</td>
 </tr>,
 <tr>
 <td class="text-center" data-bind=

In [185]:
str(cells[0]).split('data-bind="text: ')[1].split('"')[0],cells[0].text

('win', '$1')

In [141]:
games = htmlToGames("data/20210406_one_dollar.html")

x = 0
for g in games:
    print(g)
    print("*"*20)
    x+=1
    
x

<div class="row scratch-game pad50 mg0">
<div class="col-md-12 mgb20 pd0">
<span class="text-uppercase bold new pull-left" data-bind="visible: isNewGame == 1" style="display: none;"> New!</span>
<span class="scratch-game-title cr-main text-uppercase pull-left" data-bind="html: gameName">3-2-Won!</span>
</div>
<div class="col-md-6 left">
<div class="scratch-game-img-container mgb20" data-bind="if:visibleImage()!=''">
<img alt="3-2-Won" data-bind="click: imageFaceToggle, attr: { src: visibleImage, alt: name }" src="/static/assets/1176_front.png"/>
</div>
<label class="cr-main font15 text-uppercase mgb10 pointer" data-bind="click: imageFaceToggle">CLICK FOR TICKET BACK</label>
<div class="scratch-game-description" data-bind="html: howToPlay"><p>Reveal 2 like symbols in the same GAME, win prize shown for that GAME! Reveal 3 like symbols in the same GAME, win DOUBLE the prize shown for that GAME!</p>
</div>
</div>
<!-- GAME ODDS -->
<div class="col-md-6 pd0">
<table>
<thead class="bgg-secon

16

In [None]:
tables = output.findAll("table")
games = []
for table in tables:
     if table.findParent("table") is None:
            if table.thead is not None:
                games.append(table)