In [25]:
# Preamble
import numpy as np
import pandas as pd
import plotly.express as px
import re
import os
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA

import spacy
from thinc.api import set_gpu_allocator, require_gpu

In [26]:
# Load in beer spreadsheet
main_file_path = "../"

# Load in both datasets
df_23 = pd.read_csv(main_file_path + "Data/GABS_2023_Festival_Beers.csv")
df_23['year'] = '2023' # Add year col

df_24 = pd.read_csv(main_file_path + "Data/GABS_2024_Festival_Beers.csv")
df_24['year'] = '2024' # Add year col

# Combine into one df
df = pd.concat([df_23.rename(columns={'Number':'num', 'Section':'section', 'Brewery':'brewery',
                                      'Beer':'name', 'Style':'style', 'Description':'desc'}),
                df_24
                ]).drop(columns=['abv', 'state', 'Anticipation', 'Anticipation notes',
                                 'Reality', 'Reality notes']).set_index(['num', 'year'])

# Create a column combining all text descriptor fields
df['words'] = [str(name).strip().upper() + ' '
               + str(style).strip().upper() + ' '
               + str(description).strip().upper()
               for name, style, description
               in zip(df['name'], df['style'], df['desc'])]

In [100]:
# Use the GPU, with memory allocations directed via PyTorch.
# This prevents out-of-memory errors that would otherwise occur from competing
# memory pools.
set_gpu_allocator("pytorch")
require_gpu(0)

nlp = spacy.load("en_core_web_trf")

In [101]:
# Initialise list of embeddings
beer_embeddings = []

# Populate embeddings
for beer in nlp.pipe(df.words):
    tokvecs = beer._.trf_data
    beer_embeddings.append(np.mean(tokvecs.all_outputs[0].data, axis=0))

# make it into a useable thing
beer_embeddings = np.stack(beer_embeddings)

# Calculate similarity
beer_semantic_cos_sim = cosine_similarity(beer_embeddings.get())
np.fill_diagonal(beer_semantic_cos_sim, 0)

In [103]:
# Plot some clusters maybe?
pca_arr_all = PCA(n_components=2).fit_transform(beer_embeddings.get())
print(pca_arr_all.shape)

pca_df = pd.DataFrame(pca_arr_all).reset_index(drop=True).set_index(df.index)
pca_df['Section'] = df['section']
pca_df['Number'] = [x[0] for x in df.index]
pca_df['Beer'] = df['name']
pca_df['Brewery'] = df['brewery']
pca_df['Style'] = df['style']
pca_df['Year'] = [x[1] for x in df.index]

fig = px.scatter(pca_df,
                 x=0,
                 y=1,
                 color="Year",
                 hover_data=["Beer", "Section", "Number", "Style"],
                 title="GABS 2023-24 (Semantic embeddings, PCA reduction to 2-dim)"
                )
fig.show()

(237, 2)


In [105]:
beer_semantic_cos_sim.shape

(237, 237)

In [107]:
test_num = 108
num_recs = 5
display(df.iloc[test_num])
display(df.iloc[np.argsort(beer_semantic_cos_sim[test_num])[-num_recs:]])

section                                                    6
brewery                                     Hiker Brewing Co
name                                           Hold the Pork
style                                              Specialty
desc       Infused with smoked pineapple and szechuan pep...
words      HOLD THE PORK SPECIALTY INFUSED WITH SMOKED PI...
Name: (109.0, 2023), dtype: object

Unnamed: 0_level_0,Unnamed: 1_level_0,section,brewery,name,style,desc,words
num,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
85.0,2024,5,Wolf of the Willows,The Terpinator,IPA - American,"Dry hopped with at a massive zero g/L, all the...",THE TERPINATOR IPA - AMERICAN DRY HOPPED WITH ...
112.0,2024,6,Stoic Brewing,Bacon Me Crazy,American IPA,A bold brew where Smoky bacon aroma dances fro...,BACON ME CRAZY AMERICAN IPA A BOLD BREW WHERE ...
58.0,2024,3,Slipstream Brewing Co.,Pick of the Bunch,Specialty Beer,"Capturing the essence of Banana Cream Pie, thi...",PICK OF THE BUNCH SPECIALTY BEER CAPTURING THE...
90.0,2024,5,Blackman's,Rocky road campfire chocolate smoked stout,Pastry Stout,A dessert stout made to take to you to the ima...,ROCKY ROAD CAMPFIRE CHOCOLATE SMOKED STOUT PAS...
17.0,2024,1,Cavalier Brewing,Smoke Rings,Kettle Sour,Inspired by your childhood christmases spent a...,SMOKE RINGS KETTLE SOUR INSPIRED BY YOUR CHILD...


In [110]:
def get_same_year_recs(ind, num_recs):
    base_year = df.iloc[ind].name[1]
    recs = []
    rec_inds = np.argsort(beer_semantic_cos_sim[ind])
    i = 1
    while len(recs) < num_recs:
        rec_ind = rec_inds[-i]
        rec_df = df.iloc[rec_ind]
        if rec_df.name[1] == base_year:
            recs.append(rec_ind)
        i += 1

    return recs

def get_beer_ind(num, year):
    df_entry = df.query(f"num=={int(num)}&year=='{int(year)}'")
    return df.index.get_loc(df_entry.index[0])

In [111]:
test_num = 108
num_recs = 5

display(df.iloc[test_num])
display(df.iloc[get_same_year_recs(test_num, num_recs)])

section                                                    6
brewery                                     Hiker Brewing Co
name                                           Hold the Pork
style                                              Specialty
desc       Infused with smoked pineapple and szechuan pep...
words      HOLD THE PORK SPECIALTY INFUSED WITH SMOKED PI...
Name: (109.0, 2023), dtype: object

Unnamed: 0_level_0,Unnamed: 1_level_0,section,brewery,name,style,desc,words
num,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
67.0,2023,4,Shark Island Brewing Co,Life Can Be Sweet & Sour,Saison,A zesty Saison infused with Cinnamon and Pinea...,LIFE CAN BE SWEET & SOUR SAISON A ZESTY SAISON...
45.0,2023,3,Craft and Co,The Craft and Co. WIPA,IPA,"Bursting with bright citrus, stone fruit, and ...",THE CRAFT AND CO. WIPA IPA BURSTING WITH BRIGH...
69.0,2023,4,Jetty Road Brewery,Barrel-Aged Raspberry Imperial Stout,Stout,Just like a Black Forrest cake your nan made f...,BARREL-AGED RASPBERRY IMPERIAL STOUT STOUT JUS...
98.0,2023,5,Boatrocker Brewers & Distillers,Gulls Just Wanna Have Fun (Fish & Chip Gose),Sour,"Sea harvest ingredients, fresh lemon peel & ma...",GULLS JUST WANNA HAVE FUN (FISH & CHIP GOSE) S...
46.0,2023,3,Tallboy and Moose,Waggle Buzz,Braggot,Honey undertones mingle with caramel and biscu...,WAGGLE BUZZ BRAGGOT HONEY UNDERTONES MINGLE WI...
