# London Cocktail Week 2024

London Cocktail Week is the world's original Cocktail Week and takes place in the cocktail capital of the world.

Every year, in October, London's best bars specially create Signature Cocktails, and allows cocktail enthusiasts to navigate their way through london, drink by drink.

More information can be found here: https://londoncocktailweek.com/


## What is this repository about?

As a cocktail enthusiast myself, I've attended London Cocktail Week since moving to London, and each year I scrape the website to allow me find where I want to visit in an easier way! With each year that passes, this notebook gets more advanced!

## The Code

### Import Statements

In [73]:
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from geopy.geocoders import OpenCage
import numpy as np
import os
import pandas as pd
import re
import requests
import time

### Setup API Keys

In [2]:
load_dotenv()

True

### Parse the Website

In [3]:
def parse_london_cocktail_week_website (url):
    # Get the PDF from the website
    request = requests.get(url)
    
    # Parse the website using Beautiful Soup
    parsed_website = BeautifulSoup(request.text, 'html.parser')

    return parsed_website

### Collate bars into a Data Frame

In [4]:
def create_bars_csv(year, url):

    # Get the parsed website
    parsed_website = parse_london_cocktail_week_website(url)

    # Create the dataframe
    df = pd.DataFrame(columns=[
        'Bar Name',
        'Address',
        'Phone Number',
        'Description',
        'MON',
        'TUE',
        'WED',
        'THU',
        'FRI',
        'SAT',
        'SUN',
    ])
    
    # Get the list of bars
    bars = parsed_website.find('ul').findChildren("li", recursive=False)
    
    # Loop through each bar
    for i, bar in enumerate(bars):
        
        # Store the bar name, address, phone number & description
        bar_name     = bar.find('h2', {'class': 'bar_name'}).getText()
        address      = bar.find('div', {'class': 'text'}).getText()
        phone_number = bar.find('div', {'class': 'text--padded'}).getText()
        description  = bar.find('p', {'class': 'text text--padded'}).getText()
    
        # Get the opening hours container for each bar
        weekly_opening_hours_container = bar.find('ul', {'class': 'opening_hours__container'}).find_all('li', {'class': 'opening_hours__times'})
    
        # Store the times for each day of the week
        times = [opening_hours.find('li', {'class': 'text'}).getText() for opening_hours in weekly_opening_hours_container]
    
        # Save all bar data to the dataframe
        df.loc[i] = [
            bar_name,
            address,
            phone_number,
            description
        ] + times

    export_to_csv(df, year)

    return df

### Export to CSV

In [5]:
def export_to_csv(df, year):
    csv_string = 'bar_csvs/bar_list_' + str(year) + '.csv'
    df.to_csv(csv_string)

### Check if the bar list has already been parsed

In [6]:
def get_bar_dataframe (year, url):
    file_path = 'bar_csvs/bar_list_' + str(year) + '.csv'
    
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
    else:
        df = create_bars_csv(year, url)

    if 'Latitude' not in df.columns and 'Longitude' not in df.columns:
        df = add_latitude_and_longitude(df)
        export_to_csv(df, year)

    return df

### Update the dataframe to store latitude and longitude

In [7]:
def add_latitude_and_longitude(df):
    df[['Latitude', 'Longitude']] = df['Address'].apply(get_coordinates).apply(pd.Series)
    return df

### Get the coordinates for the bars

In [8]:
def get_coordinates (address):
    geolocator = OpenCage(api_key=os.getenv('OPEN_CAGE_API_KEY'))
    
    location = geolocator.geocode(address)

    latitude = location.latitude
    longitude = location.longitude

    return [latitude, longitude]

### Get Drinks Dataframe

In [95]:
def get_drinks_dataframe ():

    # Get the parsed website
    parsed_website = parse_london_cocktail_week_website(url)
    bars = parsed_website.find('ul').findChildren("li", recursive=False)
    
    # # Create the dataframe
    drinks_df = pd.DataFrame(columns=[
        'ID',
        'Bar',
        'Drink',
        'Price',
        'Is Non Alcoholic',
        'Description'
    ])
    
    for i, bar in enumerate(bars):
    
        bar_name = bar.find('h2', {'class': 'bar_name'}).getText()
        
        signature_serve_section = bar.find('h4', string="LONDON COCKTAIL WEEK SIGNATURE SERVE")
    
        if signature_serve_section:
            drinks_list = signature_serve_section.find_next('ul')  # Get the next <ul> after the heading
            
            for li in drinks_list.find_all('li'):
                drink_name = li.find_all('div', class_='text text--padded')[0].text.strip()
                drink_description = li.find_all('div', class_='text text--padded')[1].text.strip()
                # drinks.append((drink_name, drink_description))
    
                # print(drink_name)
                if drink_name != '':
                    
                    name, price, is_non_alc = parse_drink_info(drink_name)

                    drinks_df.loc[len(drinks_df)] = [
                        i,
                        bar_name,
                        name,
                        price,
                        is_non_alc,
                        drink_description
                    ]
    return drinks_df

### Parse the Drink Name

In [85]:
def parse_drink_info(drink_string):
    # Regex pattern to capture drink name, price, and non-alc status
    pattern = r'^(.*) - (£\d+) ?(\((NON ALC|Non-Alc)\))?$'
    match = re.match(pattern, drink_string)

    if match:
        drink_name = match.group(1).strip()
        price = match.group(2)
        non_alcoholic = match.group(4) is not None
        return drink_name, price, non_alcoholic
    else:
        return None, None, None  # In case of no match


## London Cocktail Week 2024

In [96]:
year = 2024
url  = 'https://londoncocktailweek.com/bars/print/?collectionId=0&whatId=0&areaId=0&spiritId=0&openNow=0&search='
df   = get_bar_dataframe (year, url)
drinks_df = get_drinks_dataframe()