In [1]:
import re
from pprint import pprint
from collections import Counter

In [2]:
items = [] 

with open("20221004_item_list.txt") as infile : 
    next(infile)
    
    for idx, row in enumerate(infile) : 
        items.append(row.strip())


In [5]:
prefix_pattern = re.compile(r"^.{0,4} ")
spaced_hyphen_pattern = re.compile(r" - ")

After exploration, one annoying pattern has emerged. When we have one hyphen, such as `26 Infidel Belgian IPA - Selkirk Abbey`, the pattern seems to be `beer - brewery`. When we have three hyphens, it seems that often brewer comes first. For instance, `Z Mountains Walking - Sweets - Fruited Berlinerweisse 16oz can` and `Ommegang - Rare Vos Belgian Style Amber Ale  - 6.5% ABV 20 IBU`. 

I'm going to try to do a two-pass analysis, going through the "one hyphen" products and pulling out the breweries, then testing to see where the brewery is in the item. For the record, the larger cell that creates `item_translation` is where I started my work and then I added this other stuff.

In [6]:
[(prefix_pattern.sub("",item).lower(),item) for item in items[:10]]

[('abbey dubbel - new belgium', '26 Abbey Dubbel - New Belgium'),
 ('hop head red - green flash', '7 Hop Head Red - Green Flash'),
 ('blackfoot ipa', '15 Blackfoot IPA'),
 ('mystical stout - wildwood', '20 Mystical Stout - Wildwood'),
 ('fresh squeezed ipa - deschutes', '17 Fresh Squeezed IPA - Deschutes'),
 ('paddle amber - muddy creek', '6 No paddle Amber - Muddy Creek'),
 ('grace scotch ale - selkirk abbey', '9 Grace Scotch Ale - Selkirk Abbey'),
 ('lockhorn - bone dry cider 16 oz', 'Lockhorn - Bone Dry Cider 16 oz'),
 ('cadillac black ipa - great northern',
  '14 Cadillac Black IPA - Great Northern'),
 ("willie's bourbon stout - :lone peak",
  "23 Willie's Bourbon Stout - :Lone Peak")]

In [7]:
likely_breweries = list()

for item in items :
    clean_item = prefix_pattern.sub("",item).lower()
    pieces = spaced_hyphen_pattern.split(clean_item)
    
    if len(pieces) == 2 :
        likely_breweries.append(pieces[1].strip())


In [8]:
Counter(likely_breweries).most_common(15)

[('draught works', 176),
 ('sierra nevada', 162),
 ('new belgium', 157),
 ('firestone walker', 156),
 ('odell', 145),
 ('stone', 142),
 ('deschutes', 136),
 ('big sky', 127),
 ('great burn', 110),
 ('oskar blues', 108),
 ('blacksmith', 105),
 ('kettlehouse', 104),
 ('hopworks', 95),
 ('grand teton', 94),
 ('elysian', 92)]

In [9]:
brewery_set = (set(likely_breweries).union({"widmer","breakside brewing","homestead ales",
                                            "bitburger","stone"}) - 
    {"ipa","amber","lager","pilsner","stout"})

In [10]:
brewery_set

{'la araucaria rosado 2020',
 'ayinger',
 'angle of repose rose',
 'albarino 2019',
 'lone peak',
 'big sky brewing',
 'cote du rhone red blend',
 "mac and jack's",
 'black diamond',
 'avi deixler of absentee winery',
 'etna rossa (nerello mascalese)',
 'cascade white blend',
 'new hokkaido',
 'last chance cidel mill',
 'piccola',
 'lush ipa',
 'boulevard',
 'nostrale catarratto 2018',
 'grazing clouds new england ipa',
 'specific void ipa',
 'wine by joe',
 'iron hosre',
 'cuvee domaine cabernet franc 2018',
 'kettlehouse',
 '34 oz stainless 50/50',
 'bonsai',
 '11.2 oz blue label',
 'space rex hazy ipa 12oz',
 'black hook porter',
 'mcintosh',
 'bitterroot brewing',
 'by all means',
 'harvest moon',
 'upslope',
 'les rouquins pinot noir 2018',
 'blackfoote',
 '64 oz ss 50/50',
 'ace cider',
 'schneider weisse',
 'fiano',
 'weihenstephaner',
 'trinquames sauvignon blanc 2019',
 'red lodge (1)',
 'pinkus',
 'head full of dynamite 16oz cans',
 'h.a. brewing',
 'running a business- 2020'

In [33]:
item_translation = dict()
# key = original item
# value = [clean_item_name, brewery (if present), remainder (if present)]

clean_items = set()

for item in items :
    
    clean_item = prefix_pattern.sub("",item).lower()
    
    clean_item = clean_item.replace("windmere","widmer")
    
    clean_items.add(clean_item)
    pieces = spaced_hyphen_pattern.split(clean_item)
    
    beer = ""
    brewery = ""
    other_info = ""
    
    # This next section tries to get the right values in the right 
    # places for beer/brewery/other stuff
    if len(pieces) > 1 : 
        pieces = [p.strip() for p in pieces]
        
        if len(pieces) == 2 :        
            if pieces[0] in brewery_set : 
                brewery = pieces[0]
                beer = pieces[1]
            
            
            else :
                brewery = pieces[1]
                beer = pieces[0]

                
        elif len(pieces) == 3 :
            if pieces[0] in brewery_set : 
                brewery, beer, other_info = pieces
            else :
                beer, brewery, other_info = pieces
        else : 
            if pieces[0] in brewery_set : 
                brewery, beer = pieces[:2]
            else :
                beer = pieces[0]
                brewery = pieces[1]
                
            other_info = " - ".join(pieces[1:]).strip()
        
    else :
        
        clean_item_tokens = clean_item.split()
        
        
        
        for bry in brewery_set :
            if bry in clean_item : 
                brewery = bry
                beer = clean_item.replace(brewery,"")
                print(f"Brewery = {brewery}; Beer = {beer}")
            
    
#        for token in clean_item_tokens :
#            if token in brewery_set :
#                brewery = token
        
#        beer = " ".join([token for token in clean_item_tokens if token != brewery])
                
            
    item_translation[item] = [beer, brewery, other_info]

    if "Cioke" in item :
        print(item)
        print(item_translation[item])



Brewery = black; Beer = foot ipa
Brewery = blackfoot; Beer =  ipa
Brewery = laughing dog; Beer =  alpha imperial ipa 22 oz
Brewery = anchor; Beer = winter wheat 
Brewery = aged pale sour; Beer = petrus 
Brewery = petrus; Beer =  aged pale sour
Brewery = schmaltz; Beer =  he'brew jewbelation 15
Brewery = wine; Beer = top  growler 750 ml
Brewery = deschutes; Beer =  dissident 2014
Brewery = 12; Beer = glass snifter  oz
Brewery = barrel aged; Beer = scotch silly cognac  belgian
Brewery = julian's; Beer =  black and blue 22 oz
Brewery = black; Beer = julian's  and blue 22 oz
Brewery = anthem; Beer = cherry 
Brewery = ginger; Beer = glacier  brew
Brewery = anthem; Beer = cherry 
Brewery = dubuisson; Beer =  scaldis noel
Brewery = bayern; Beer = bad santa eisbock- 
Brewery = great burn; Beer =  ipa
Brewery = anchor; Beer =  christmas
Brewery = katabatic; Beer =  mosaic pale ale
Brewery = stem; Beer = glass munique  16.5 oz
Brewery = green flash; Beer =  imperial ipa 22 oz
Brewery = black; Be

Brewery = pinot; Beer = westrey wine co  gris
Brewery = wine; Beer = westrey  co pinot gris
Brewery = wine; Beer = volcanic s!
Brewery = cabernet sauvignon; Beer = steele wines red hills ava 
Brewery = wine; Beer = steele s red hills ava cabernet sauvignon
Brewery = thirsty street; Beer = bucket list nitro coco stout 
Brewery = sierra; Beer =  sidecar orange ipa
Brewery = kettlehouse; Beer = mixed berry milkshake ipa  brewing
Brewery = kettlehouse brewing; Beer = mixed berry milkshake ipa 
Brewery = firestone walker; Beer =  bretta weisse
Brewery = stone; Beer = fire walker bretta weisse
Brewery = firestone; Beer =  walker bretta weisse
Brewery = bitterroot; Beer =  huckleberry honey ale
Brewery = philipsburg; Beer = gonk amber  brewing
Brewery = philipsburg brewing; Beer = gonk amber 
Brewery = 2016; Beer = vigneti del sole pinot grigio 
Brewery = pinot; Beer = vigneti del sole  grigio 2016
Brewery = mayador; Beer =  sidra espumante
Brewery = poggio anima; Beer =  grillo
Brewery = bla

Brewery = 2019; Beer = martha stoumen post flirtation 
Brewery = martha stoumen; Beer =  post flirtation 2019
Brewery = 2018; Beer = heidi shrock biscaya rose' 
Brewery = sierra; Beer =  nevada wild little thing
Brewery = sierra nevada; Beer =  wild little thing
Brewery = 12; Beer = dolle extra special export stout oz bottle
Brewery = 12oz bottle; Beer = dolle extra special export stout 
Brewery = teton; Beer = farmstead cider  pet nat!
Brewery = farmstead; Beer =  cider teton pet nat!
Brewery = cider; Beer = farmstead  teton pet nat!
Brewery = gruner vetliner; Beer = pratsch 
Brewery = pratsch; Beer =  gruner vetliner
Brewery = cider; Beer = jaanihanso rose 
Brewery = propolis; Beer =  sigrid quad ale
Brewery = ace; Beer = fremont sp rex
Brewery = fremont; Beer =  space rex
Brewery = wasatch; Beer =  pumpkin ale
Brewery = coopers hall; Beer =  cascade red
Brewery = cascade red; Beer = coopers hall 
Brewery = pinot gris; Beer = eyrie 
Brewery = pinot; Beer = eyrie  gris
Brewery = pinot

Brewery = mountains walking; Beer =  sweets blueberry lemon
Brewery = fremont; Beer =  20th anniversary stout
Brewery = fremont; Beer =  20th anniversary stout
Brewery = pinot noir; Beer = patton valley 
Brewery = pinot; Beer = patton valley  noir
Brewery = morning birds; Beer = + cookie pairing package: a collaboration with  bakery preorder for 2/11/22 pick up!
Brewery = morning birds bakery; Beer = + cookie pairing package: a collaboration with  preorder for 2/11/22 pick up!
Brewery = goose island; Beer =  bourbon county stout 2021
Brewery = 2021; Beer = goose island bourbon county stout 
Brewery = shades; Beer = peach cobbler-  brewing
Brewery = shades brewing; Beer = peach cobbler- 
Brewery = 2018; Beer = cooper's hall sauvignon blanc 
Brewery = cooper's hall; Beer =  sauvignon blanc 2018
Brewery = imagine nation; Beer =  flight of 4!
Brewery = chardonnay; Beer = laurent dufouleur 
Brewery = wine; Beer = contact  tasting
Brewery = wine; Beer = contact  4 pack!
Brewery = pinot; Beer

In [32]:
"here's a string".replace("string","XXX")

"here's a XXX"

In [22]:
item

'15 Blackfoot IPA'

In [23]:
pieces

['blackfoot ipa']

In [25]:
token in brewery_set

True

In [28]:
                brewery = token


In [29]:
beer = " ".join([token for token in clean_item_tokens if token != brewery])

In [30]:
beer

'ipa'

In [12]:
def quote_wrap(text) : 
    return('"' + text + '"')
    

# Quotes in fields cause problems for GBQ upload, so I'll wrap those pieces in quotes for
# writing out. For instance, here's an item: 
# Quarticello ""Cioke"" Lambrusco - 00 - 2020
# with these fields: 
# ['quarticello ""cioke"" lambrusco', '00', '2020']

with open("item_lookup.txt",'w') as outfile :
    outfile.write("item\tbeer\tbrewery\tother_info\n")
    
    for item, pieces in item_translation.items() :
        pieces = [quote_wrap(p) for p in pieces]
        
        beer, brewery, other_info = pieces
        
        outfile.write(f"{item}\t{beer}\t{brewery}\t{other_info}\n")



['"quarticello ""cioke"" lambrusco"', '"00"', '"2020"']


In [13]:
# For convenience with some other places I need this file. 

with open("/Users/chandler/dropbox/teaching/repos/ada-python-gbq/item_lookup.txt",'w') as outfile :
    outfile.write("item\tbeer\tbrewery\tother_info\n")
    
    for item, pieces in item_translation.items() :
        pieces = [quote_wrap(p) for p in pieces]
        
        beer, brewery, other_info = pieces
        
        outfile.write(f"{item}\t{beer}\t{brewery}\t{other_info}\n")


In [14]:
# For convenience with some other places I need this file. 

with open("/Users/chandler/dropbox/teaching/2022/ada/week-07/item_lookup.txt",'w') as outfile :
    outfile.write("item\tbeer\tbrewery\tother_info\n")
    
    for item, pieces in item_translation.items() :
        pieces = [quote_wrap(p) for p in pieces]
        
        beer, brewery, other_info = pieces
        
        outfile.write(f"{item}\t{beer}\t{brewery}\t{other_info}\n")


In [23]:
count = 0

for idx, item in enumerate(item_translation) :
    beer, brewery, other_info = item_translation[item] 
    if brewery and brewery not in brewery_set :
        count +=1 
        print(f"{idx} and {item_translation[item]}")
        break
    
    
print(f"Count is {count}")

62 and ['widmer omission', 'lager', '']
Count is 1


In [32]:
for item, pieces in item_translation.items() :
    beer, brewery, other_info = pieces
    
    if brewery == "big sky" : 
        print(f"The item is {item}.")
        print(f"Beer: {beer}\tBrewery: {brewery}")
        print("--------------------------------------")
    

The item is 10 Scapegoat - Big Sky.
Beer: scapegoat	Brewery: big sky
--------------------------------------
The item is 13 Biere De Noel - Big Sky.
Beer: biere de noel	Brewery: big sky
--------------------------------------
The item is 21 Ivan the Terrible - Big Sky.
Beer: ivan the terrible	Brewery: big sky
--------------------------------------
The item is 28 BA Power Wagon Wheat Wine - Big Sky.
Beer: ba power wagon wheat wine	Brewery: big sky
--------------------------------------
The item is 12 Big Sky - Glacier Hop Ranch Wet Hop.
Beer: glacier hop ranch wet hop	Brewery: big sky
--------------------------------------
The item is 12 Rye Pale Ale - Big Sky.
Beer: rye pale ale	Brewery: big sky
--------------------------------------
The item is 12 NITRO Moose Drool - Big Sky.
Beer: nitro moose drool	Brewery: big sky
--------------------------------------
The item is 11 Cream Ale - Big Sky.
Beer: cream ale	Brewery: big sky
--------------------------------------
The item is 1 Trout Slayer