/
get_scrabble_freqs.py
94 lines (70 loc) · 2.63 KB
/
get_scrabble_freqs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import string
import requests
from bs4 import BeautifulSoup
from app import db
from app.models import Set, LetterScore
def tidy_name(raw_name):
# Only use first line
result = raw_name.split('<')[0]
# Remove anything past "(" or "#"
result = result.split('(')[0].split('#')[0]
# Remove 'letter', 'distribution' and 'Scrabble'
for word in ['letter', 'distribution', 'Scrabble']:
result = result.replace(word, '')
return result.strip()
class PointsTable():
def __init__(self, soup_table):
# print(soup_table)
self.name = tidy_name(soup_table.find('caption').text)
self.soup_table = soup_table
def parse_table(self):
result = {}
# First row contains the header - distribution, skip this
for table_row in self.soup_table.findAll('tr')[1:]:
row_value = int(table_row.find('th').text)
for table_cell in table_row.findAll('td'):
letters = table_cell.text.upper()
letters = letters.replace('BLANK', '').replace(' ', '')
for letter in letters:
if letter in string.ascii_uppercase:
result[letter] = row_value
self.points = result
def extract_points(soup):
languages_found = []
result = []
for soup_table in soup.findAll('table')[1:]:
points_table = PointsTable(soup_table)
# Only use first table for each language
if not points_table.name in languages_found:
points_table.parse_table()
if points_table.points.keys():
languages_found.append(points_table.name)
result.append(points_table)
return result
def store_points(points):
for points_table in points:
# Skip the 'Super' set, as it is not a separate language
if points_table.name == 'Super':
continue
# Skip tables with an unprintable name
try:
print(points_table.name)
except:
continue
set_record = Set(name=points_table.name)
db.session.add(set_record)
db.session.commit()
for letter, score in points_table.points.iteritems():
score_record = LetterScore(set_id=set_record.id, letter=letter, score=score)
db.session.add(score_record)
db.session.commit()
db.create_all()
print("Deleting any previous data")
db.session.query(LetterScore).delete()
db.session.query(Set).delete()
db.session.commit()
print("Done")
r = requests.get('https://en.wikipedia.org/wiki/Scrabble_letter_distributions')
soup = BeautifulSoup(r.text)
points = extract_points(soup)
store_points(points)