In [1]:
# standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# web scraping imports
import requests
from bs4 import BeautifulSoup

In [2]:
# reading the data
df = pd.read_csv('full_data_cleaned.csv')

In [4]:
# creating the urls
beer_info_page = df['beer_info_page']
beer_info_page

page_urls = ['https://www.beeradvocate.com' + page for page in beer_info_page]
page_urls

0             /beer/profile/39/131/
1           /beer/profile/911/2899/
2        /beer/profile/1432/556187/
3       /beer/profile/32906/547137/
4            /beer/profile/252/779/
                   ...             
8760    /beer/profile/35633/201871/
8761    /beer/profile/31540/187158/
8762    /beer/profile/33018/102888/
8763    /beer/profile/40927/131389/
8764    /beer/profile/32319/145691/
Name: beer_info_page, Length: 8765, dtype: object

In [7]:
# Using requests to get the main page
url = 'https://www.beeradvocate.com/beer/profile/39/131/'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')

In [21]:
# isolating the Notes sections
soup.find_all(style='clear:both; margin:0; padding:0px 20px; font-size:1.05em;')[0].text.strip()

'Notes:\nA beer that has a dominant malty taste. This beer’s origins in a monk’s recipe are reflected in its heartiness. The Pope of Beers, Conrad Seidl, describes it as: “Almost black with a very slight red tone, a sensational, festive foam and truly extraordinary fragrance that at first summons up visions of greaves lard. The first taste is of mild fullness with an accompanying coffee tone, which becomes more dominant with the aftertaste. There is very little of the sweetness that is frequently to be tasted with doppelbock beer.” The Ayinger Celebrator has been ranked among the best beers of the world by the Chicago Testing Institute several times and has won numerous medals.'

In [23]:
url = 'https://www.beeradvocate.com/beer/profile/741/399497/'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
soup.find_all(style='clear:both; margin:0; padding:0px 20px; font-size:1.05em;')[0].text.strip()

'Notes:\nNone'

In [24]:
# This function gets the notes, otherwise returns NaN
def get_notes(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    if soup.find_all(style='clear:both; margin:0; padding:0px 20px; font-size:1.05em;'):
        notes = soup.find_all(style='clear:both; margin:0; padding:0px 20px; font-size:1.05em;')[0].text.strip()
    else:
        notes = np.nan
    return notes

In [27]:
beer_notes = pd.Series(page_urls).apply(get_notes)

In [56]:
# beer_notes[3].replace('Notes:\n', '')
df['notes'] = beer_notes.apply(lambda x: x.replace('Notes:\n', '', 1))

In [70]:
# replace None with np.nan
df

Unnamed: 0,name,rank,brewery,style,n_ratings,avg_rating,beer_info_page,abv,BA_official_score,ranking_vs_all,...,status,last_rated,date_added,n_wants,n_gots,country,state,style_family,substyle,notes
0,Ayinger Celebrator,1,Ayinger Privatbrauerei,Bock - Doppelbock,6976,4.34,/beer/profile/39/131/,6.7,96.0,883,...,Active,"May 21, 2022",2001-01-10,623,1142,Germany,,Bock,Doppelbock,A beer that has a dominant malty taste. This b...
1,Doppelbock Dunkel,2,Klosterbrauerei Andechs,Bock - Doppelbock,1316,4.28,/beer/profile/911/2899/,7.1,95.0,1546,...,Active,"May 25, 2022",2002-06-16,450,192,Germany,,Bock,Doppelbock,This world famous bock from Bavaria’s Holy Mou...
2,Convex Refraction,3,Jackie O's Pub & Brewery,Bock - Doppelbock,24,4.42,/beer/profile/1432/556187/,13.7,95.0,1772,...,Active,"May 09, 2022",2021-06-26,1,0,USA,Ohio,Bock,Doppelbock,Collaboration with Burial\n\nBourbon Barrel Ag...
3,Indikator,4,Schilling Beer Co.,Bock - Doppelbock,12,4.49,/beer/profile/32906/547137/,8.0,94.0,2641,...,Active,"Apr 16, 2022",2021-05-01,1,0,USA,New Hampshire,Bock,Doppelbock,
4,Weihenstephaner Korbinian,5,Bayerische Staatsbrauerei Weihenstephan,Bock - Doppelbock,2968,4.20,/beer/profile/252/779/,7.4,93.0,2948,...,Active,"May 30, 2022",2001-09-12,251,399,Germany,,Bock,Doppelbock,"Our Korbinian, the full-bodied, dark Doppelboc..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8760,The Cut: Peach,96,Casey Brewing & Blending,Wild Ale,82,4.36,/beer/profile/35633/201871/,5.0,96.0,1157,...,Active,Yesterday at 11:56 PM,2015-11-30,22,12,USA,Colorado,Wild Ale,,
8761,Roseberry,97,de Garde Brewing,Wild Ale,81,4.36,/beer/profile/31540/187158/,6.2,96.0,1171,...,Active,"Sep 05, 2021",2015-08-21,16,11,USA,Oregon,Wild Ale,,"A golden wild ale, aged in oak wine barrels, t..."
8762,Ensorcelled,98,The Rare Barrel,Wild Ale,346,4.34,/beer/profile/33018/102888/,5.8,96.0,991,...,Active,"Jul 10, 2021",2013-10-21,88,82,USA,California,Wild Ale,,
8763,Befuddlement,99,Bruery Terreux,Wild Ale,245,4.34,/beer/profile/40927/131389/,9.9,96.0,1028,...,Active,"Oct 10, 2021",2014-07-30,59,38,USA,California,Wild Ale,,Hoarders Society exclusive blend of our Sour I...


In [72]:
# removing newline characters
df['notes'] = df['notes'].apply(lambda x: x.replace('\n', ' '))

In [78]:
# Saving the data
df.to_csv('beeradvocate_full_data.csv', index=False)