In [251]:
# import dependencies for web scraping 
import requests
from bs4 import BeautifulSoup as bs
from bs4 import NavigableString
import pandas as pd
import numpy as np

In [252]:
# URL for scraping
wineURL = "https://spoonacular.com/food-api/docs#Wine-Guide"

In [253]:
# Retrieve page with the requests module
response = requests.get(wineURL)

In [254]:
# Create BeautifulSoup object and parse with 'html.parser'
soup = bs(response.text, 'html.parser')

In [255]:
# examine website elements with chrome dev tools and extract the list of wines
# remove white space and store wines in an interable list
results = soup.find_all('pre')[-1].text.strip().replace('\n','').replace('-','')
wines = results.split()
wines

wine_selection = []
for wine in wines:
    wine_selection.append(wine)


In [256]:
# obtain length of wine list
wine_selections = wine_selection[1:110]
len(wine_selections)

109

In [257]:
# import dependencies for API call to spoonacular API
import json
from pprint import pprint
import pandas as pd
from config import api_key

In [258]:
# create URL params for GET requests
wine_url = "https://api.spoonacular.com/food/wine/pairing"
wine_url_base ="https://api.spoonacular.com/food/wine/dishes?"
wine = "sparkling_rose"
wine_query = f"{wine_url_base}&apiKey={api_key}&wine={wine}"

In [259]:
# Perform a GET request and test the API call with one wine pairing
s = requests.Session()
response = s.get(wine_query)
pprint(response.json())

# response = requests.get(wine_query).json()
# response

{'pairings': ['chole', 'tacos', 'curry', 'sushi', 'nachos'],
 'text': ' It goes especially well with chole, tacos, curry, sushi, and '
         'nachos.'}


In [260]:
# print(json.dumps(response, indent=4, sort_keys=True))

In [261]:
# create empty dataframe to collect dish pairings from list of wines 
columns = ['wine']
dish_pairing_df = pd.DataFrame(columns=columns)
dish_pairing_df['wine'] = wine_selections
dish_pairing_df

Unnamed: 0,wine
0,white_wine
1,dry_white_wine
2,assyrtiko
3,pinot_blanc
4,cortese
...,...
104,dry_sherry
105,vermouth
106,dry_vermouth
107,fruit_wine


In [262]:
# add columns based on the Keys in the API response
dish_pairing_df['pairing'] = ''
dish_pairing_df['description'] = ''
dish_pairing_df

Unnamed: 0,wine,pairing,description
0,white_wine,,
1,dry_white_wine,,
2,assyrtiko,,
3,pinot_blanc,,
4,cortese,,
...,...,...,...
104,dry_sherry,,
105,vermouth,,
106,dry_vermouth,,
107,fruit_wine,,


In [263]:
# reference columns with .loc to append wine pairing data; for use later
# dish_pairing_df.loc[index,"pairing"]

In [264]:
# build URL, run API request, loop through the wine_selections list and print json response
get_dish_pairing = f"{wine_url_base}&apiKey={api_key}&wine="

# for wine in wine_selections:
#     wine_response = requests.get(get_dish_pairing + wine).json()
#     print(wine_response)
        

In [265]:
# loop through the wine options, make the API request, and extract the results to the pandas dataframe
# params = {}
for index, row in dish_pairing_df.iterrows():
    wine_option = row['wine']
#     params['wine'] = wine_option

    response = requests.get(get_dish_pairing + wine_option).json()   
    
    try:
        dish_pairing_df.loc[index,"pairing"] = response['pairings']
        dish_pairing_df.loc[index,"description"] = response['text']
    except (KeyError, IndexError):
        dish_pairing_df.loc[index,"pairing"] = response['status']
        dish_pairing_df.loc[index,"description"] = response['message']     

In [266]:
# display results
dish_pairing_df

Unnamed: 0,wine,pairing,description
0,white_wine,"[pho, stew, crab, carp, fish]","It goes especially well with pho, stew, crab,..."
1,dry_white_wine,"[stew, crab, carp, fish, clams]","It goes especially well with stew, crab, carp..."
2,assyrtiko,"[gyros, greek, souvlaki, moussaka, spanakopita]",Assyrtiko is a dry white wine which is crisp a...
3,pinot_blanc,failure,No dish pairings found for wine pinot_blanc
4,cortese,failure,No dish pairings found for wine cortese
...,...,...,...
104,dry_sherry,[gazpacho],It goes especially well with gazpacho.
105,vermouth,failure,No dish pairings found for wine vermouth
106,dry_vermouth,failure,No dish pairings found for wine dry_vermouth
107,fruit_wine,failure,No dish pairings found for wine fruit_wine


In [267]:
# identify wines with now pairings
no_pairings = dish_pairing_df.loc[(dish_pairing_df['pairing'] == 'failure')]
no_pairings

Unnamed: 0,wine,pairing,description
3,pinot_blanc,failure,No dish pairings found for wine pinot_blanc
4,cortese,failure,No dish pairings found for wine cortese
5,roussanne,failure,No dish pairings found for wine roussanne
10,greco,failure,No dish pairings found for wine greco
11,marsanne,failure,No dish pairings found for wine marsanne
15,white_rioja,failure,No dish pairings found for wine white_rioja
18,l_acadie_blanc,failure,No dish pairings found for wine l_acadie_blanc
21,catarratto,failure,No dish pairings found for wine catarratto
23,arneis,failure,No dish pairings found for wine arneis
26,soave,failure,No dish pairings found for wine soave


In [268]:
# there are 51 out of 109 wines with no listed pairings (47 percent of the list)
no_pairings.count()

wine           51
pairing        51
description    51
dtype: int64

In [324]:
# group the wines with no pairings into their parent wine category
# of dry white wines, dry red wines, red wines, dessert wines, sparkling wines

# dry_white_wines indexes
dry_white_wines = no_pairings.loc[3:39]
dw_wines_idx = dry_white_wines.index

# dry_red_wines indexes
dry_red_wines = no_pairings.loc[42:70]
dr_wines_idx = dry_red_wines.index

# red_wines indexes
red_wines = no_pairings.loc[74:78]
r_wines_idx = red_wines.index

# dessert_wines indexes
dessert_wines = no_pairings.loc[83:91]
des_wines_idx = dessert_wines.index

# sparkling_wines indexes
sparkling_wines = no_pairings.loc[97:100]
sp_wines_idx = sparkling_wines.index


In [325]:
# pull the parent wine category from the dish_pairing_df 
# dry white wine is at loc[1][1:3]
d_w_wine = dish_pairing_df.loc[(dish_pairing_df['wine'] == 'dry_white_wine')]
d_w_wine
# dry red wine is at loc[41][1:3]
d_r_wine = dish_pairing_df.loc[(dish_pairing_df['wine'] == 'dry_red_wine')]
d_r_wine
# red wine is at loc[40][1:3]
r_wine = dish_pairing_df.loc[(dish_pairing_df['wine'] == 'red_wine')]
r_wine
# dessert_wine is at loc[82][1:3]
des_wine = dish_pairing_df.loc[(dish_pairing_df['wine'] == 'dessert_wine')]
des_wine
# dessert_wine is at loc[95][1:3]
sp_wine = dish_pairing_df.loc[(dish_pairing_df['wine'] == 'sparkling_wine')]
sp_wine

r_wine

Unnamed: 0,wine,pairing,description
40,red_wine,"[stew, crab, tuna, carp, fish]","It goes especially well with stew, crab, tuna..."


In [326]:
dish_pairing_df.loc[1][1:3]

pairing                          [stew, crab, carp, fish, clams]
description     It goes especially well with stew, crab, carp...
Name: 1, dtype: object

In [357]:
# def update_pair_desc(wines_idx):   
#     for x in wines_idx:
#         try:
#             if wines_idx == dw_wines_idx:
#                 dish_pairing_df.at[x][1:3] = dish_pairing_df.loc[1][1:3]
#             elif wines_idx == r_wines_idx:
#                 dish_pairing_df.at[x][1:3] = dish_pairing_df.loc[40][1:3]
#             elif wines_idx == dr_wines_idx:
#                 dish_pairing_df.at[x][1:3] = dish_pairing_df.loc[41][1:3]
#             elif wines_idx == des_wines_idx:
#                 dish_pairing_df.at[x][1:3] = dish_pairing_df.loc[82][1:3]
#             else: #wines_idx == sp_wines_idx:
#                 dish_pairing_df.at[x][1:3] = dish_pairing_df.loc[95][1:3]
#         except (KeyError, ValueError):
#             print("This function did not work")
            

In [360]:
# wines_to_pair = [dw_wines_idx, dr_wines_idx, r_wines_idx, des_wines_idx, sp_wines_idx] 
# for wine in wines_to_pair:
#     update_pair_desc(wine)

In [361]:
#  create function to update all the wines with no pairings
# insert suggested pairings from the parent categoies
def update_pair_desc(wines_idx, wine_loc):   
    for x in wines_idx:
        dish_pairing_df.loc[x][1:3] = dish_pairing_df.loc[wine_loc][1:3]

In [362]:
# insert parent wine data into the child elements with no pairings

update_pair_desc(dw_wines_idx, 1)
update_pair_desc(r_wines_idx, 40)
update_pair_desc(dr_wines_idx, 41)
update_pair_desc(des_wines_idx, 82)
update_pair_desc(sp_wines_idx, 95)

In [364]:
# extract data for remaining wines with no pairings
dish_pairing_df.loc[(dish_pairing_df['pairing'] == 'failure')]

Unnamed: 0,wine,pairing,description
105,vermouth,failure,No dish pairings found for wine vermouth
106,dry_vermouth,failure,No dish pairings found for wine dry_vermouth
107,fruit_wine,failure,No dish pairings found for wine fruit_wine
108,mead,failure,No dish pairings found for wine mead


In [408]:
# research pairings and create new data to insert into main dishpairing df
custom_pairings = [{"wine": "vermouth",
                  "pairing":["oysters, crab, prawns"],
                  "description":"Served on the rocks they’re a natural match for seafood especially shellfish like oysters, crab and prawns"},
                  {"wine": "mead",
                  "pairing":["steak, stew, chili"],
                  "description":"Dry meads go with dinner, sweet meads are for after dinner and sparkling meads are for drinking on their own. A mead like Antinomy is an excellent choice as a pairing wine with steaks, stews, chili and other dark, heavy meals.\
                  You could also try Skol, but it may not pair quite as perfectly as Antinomy."}]
custom = pd.DataFrame.from_dict(custom_pairings)
custom

Unnamed: 0,wine,pairing,description
0,vermouth,"[oysters, crab, prawns]",Served on the rocks they’re a natural match fo...
1,mead,"[steak, stew, chili]","Dry meads go with dinner, sweet meads are for ..."


In [448]:
# insert pairings for vermouth and mead
# drop rows for fruit wine and dry vermouth bexause they did not turn up good results
dish_pairing_df.loc[105][1:3] = custom.loc[0][1:3]
dish_pairing_df.loc[108][1:3] = custom.loc[1][1:3]
# dish_pairing_df.drop([106,107], inplace=True)
dish_pairing_df.reset_index(drop=True, inplace=True)

In [422]:
pwd

'C:\\Users\\a_mcr\\Desktop\\Bootcamp\\Group_Projects\\Project-3\\asmvm_files'

In [449]:
dish_pairing_df

Unnamed: 0,wine,pairing,description
0,white_wine,"[pho, stew, crab, carp, fish]","It goes especially well with pho, stew, crab,..."
1,dry_white_wine,"[stew, crab, carp, fish, clams]","It goes especially well with stew, crab, carp..."
2,assyrtiko,"[gyros, greek, souvlaki, moussaka, spanakopita]",Assyrtiko is a dry white wine which is crisp a...
3,pinot_blanc,"[stew, crab, carp, fish, clams]","It goes especially well with stew, crab, carp..."
4,cortese,"[stew, crab, carp, fish, clams]","It goes especially well with stew, crab, carp..."
...,...,...,...
102,sherry,"[turnover, gazpacho, onion tart, chocolate sou...","It goes especially well with turnover, gazpac..."
103,cream_sherry,"[turnover, hand pies, onion tart, pastry pillo...","It goes especially well with turnover, hand p..."
104,dry_sherry,[gazpacho],It goes especially well with gazpacho.
105,vermouth,"[oysters, crab, prawns]",Served on the rocks they’re a natural match fo...


In [450]:
# create csv file of wine pairing data for import in MongoDB
dish_pairing_df.to_csv('dish_pairing_for_wine.csv')