In [4]:
given = """
1. Embassy
2. Jetson's
3. Protection or safety
4. Shaquille O'Neal
5. Calf
6. Gone off on their own
7. Related to the
8. Might be used for (hyph.)
9. Covered in
10. Fill
11. Dog
12. Unwilling (in ALL OTHERS)
13. Oaxaca
14. Eighth
15. Like an Olympic
16. Crimson ____
17. Number 18
18. One who has
19. Superhero film
20. It's what karate
21. Prayer ending
22. Motherless
23. One who provides
24. Unlucky day
25. Kind of machine
26. Greek letter
27. Equipment for
28. Animal involved
29. Steady
30. They ask
31. In court
32. Or trespassing
33. Your foot
34. Variety of
35. Employee
36. Embarrassed
37. Made more
38. Streaker
39. Amusement
40. Attest to
41. Mess
42. Breaking into
43. Penguin
44. That's not chewable
45. Goodbye in
46. Efficient
47. Wander in
48. With wonder
49. Wheel that stays
50. It supports
51. A monarch's
52. Snow
53. Helper
54. Oxidization
55. Small amount of drink
56. Alaskan city
57. In the Belmont Stakes
58. Richard
59. In a gallery
60. Material that
61. Hall
62. Place of
63. Monster
64. Galactic
65. For change
66. Recuperate
67. Harbinger
68. Time of rule
69. Roundtree role
70. Cause to be
71. A specific purpose
72. Moon
73. Avatar of Vishnu
74. Athlete, typically
75. Traveler over
76. Rube
77. It's often hanging
78. Enriches the body
79. Search of adventure
80. Like steak
81. Element
82. Executive's
83. From Hell
84. Unintelligent
"""

def parse(s):
  result = []
  for line in s.split('\n'):
    if not line:
      continue
    n, clue = line.split('. ', maxsplit=1)
    n = int(n)
    result.append((n, clue))
  return result

clues = parse(given)

In [3]:
targets = [
  127, 127, 74, 85, 40, 40, 27, 28, 58, 58, 89, 62,
  36, 126,
  124, 87, 71,
  83, 83, 95, 102, 102,
  24, 123, 98, 98, 78,
  135, 65, 77,
  79, 119, 128, 13, 106, 63, 160,
  138, 136,
  146, 37, 23,
]

In [1]:
import forge
from data import warehouse
from puzzle.puzzlepedia import prod_config

prod_config.init()

In [35]:
import itertools
import re

_END = r'[^\w](\w+)$'
_BAD_ENDINGS = {'s', 'be', 'of', 'in', 'the', 'has', 'for', 'to'}

def process(clues, targets):
  bigrams = warehouse.get('/words/bigram')
  candidates = set(targets)
  results = []
  for (i1, c1), (i2, c2) in itertools.combinations(clues, 2):
    s = i1 + i2
    if s not in targets:
      continue
    best_weight = -1
    first, second = c1, c2
    for a, b in [(c1, c2), (c2, c1)]:
      b_ending = re.findall(_END, b)
      bad_start = '(' in a
      if bad_start or b_ending and b_ending[0] in _BAD_ENDINGS:
        weight = -1
      else:
        end = a.lower().split()[-1]  # Join end of first...
        start = b.lower().split()[0]  # ...with start of last.
        weight = bigrams.get('%s %s' % (end, start), 0)
      if weight > best_weight:
        best_weight = weight
        first, second = a, b
    if best_weight >= 0:
      results.append((best_weight, '%s\t%s\t%s\t%s|%s' % (s, i1, i2, first, second)))
  return sorted(results, key=lambda x: x[0], reverse=True)


In [36]:
for weight, row in process(clues, set(targets)):
  print(row)

98	27	71	Equipment for|A specific purpose
95	33	62	Place of|Your foot
78	33	45	Goodbye in|Your foot
89	18	71	One who has|A specific purpose
24	7	17	Related to the|Number 18
127	57	70	Cause to be|In the Belmont Stakes
85	28	57	Animal involved|In the Belmont Stakes
87	28	59	Animal involved|In a gallery
102	34	68	Variety of|Time of rule
146	71	75	Traveler over|A specific purpose
85	23	62	Place of|One who provides
119	59	60	Material that|In a gallery
74	9	65	Covered in|For change
77	27	50	Equipment for|It supports
63	23	40	Attest to|One who provides
126	57	69	Roundtree role|In the Belmont Stakes
128	59	69	Roundtree role|In a gallery
124	59	65	For change|In a gallery
98	33	65	For change|Your foot
119	40	79	Attest to|Search of adventure
95	45	50	Goodbye in|It supports
83	23	60	Material that|One who provides
128	60	68	Material that|Time of rule
58	27	31	Equipment for|In court
77	9	68	Covered in|Time of rule
62	7	55	Related to the|Small amount of drink
77	32	45	Goodbye in|Or trespassing
79	32	

In [40]:
for clue in clues:
  print('%s\t%s' % clue)

1	Embassy
2	Jetson's
3	Protection or safety
4	Shaquille O'Neal
5	Calf
6	Gone off on their own
7	Related to the
8	Might be used for (hyph.)
9	Covered in
10	Fill
11	Dog
12	Unwilling (in ALL OTHERS)
13	Oaxaca
14	Eighth
15	Like an Olympic
16	Crimson ____
17	Number 18
18	One who has
19	Superhero film
20	It's what karate
21	Prayer ending
22	Motherless
23	One who provides
24	Unlucky day
25	Kind of machine
26	Greek letter
27	Equipment for
28	Animal involved
29	Steady
30	They ask
31	In court
32	Or trespassing
33	Your foot
34	Variety of
35	Employee
36	Embarrassed
37	Made more
38	Streaker
39	Amusement
40	Attest to
41	Mess
42	Breaking into
43	Penguin
44	That's not chewable
45	Goodbye in
46	Efficient
47	Wander in
48	With wonder
49	Wheel that stays
50	It supports
51	A monarch's
52	Snow
53	Helper
54	Oxidization
55	Small amount of drink
56	Alaskan city
57	In the Belmont Stakes
58	Richard
59	In a gallery
60	Material that
61	Hall
62	Place of
63	Monster
64	Galactic
65	For change
66	Recuperate
67	Harbinge

In [45]:
print('\n'.join(map(str, sorted(targets))))

13
23
24
27
28
36
37
40
40
58
58
62
63
65
71
74
77
78
79
83
83
85
87
89
95
98
98
102
102
106
119
123
124
126
127
127
128
135
136
138
146
160
