<a href="https://colab.research.google.com/github/Joshua-K-Bowles/Vivino-Webscrape-and-Data-Analysis/blob/main/Vivino_Web_Scraper_with_Ratings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Vivino: Web Scraping for Spanish Wine

Thanks to the wonderful community at Stack Overflow. [link](https://stackoverflow.com/questions/69358974/scraping-the-english-vivino-com-reviews-from-the-website/69369414#69369414) 

Original Authors: 
[B Moeskops](https://stackoverflow.com/users/16976919/b-moeskops), [Andrej Kesely](https://stackoverflow.com/users/10035985/andrej-kesely) and [DisappointedByUnaccountableMod](https://stackoverflow.com/users/2318649/disappointedbyunaccountablemod)



When using Google Colab drive connection (see below):

In [None]:
from google.colab import drive
drive.mount('drive')

Mounted at drive


Data Scrape:

In [None]:
# Import packages
import requests
import json
import pandas as pd
import time

# Get request from the Vivino website
def get_wine_data(wine_id, year, page):
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
    }

    api_url = f"https://www.vivino.com/api/wines/{wine_id}/reviews?per_page=50&year={year}&page={page}"
    print(api_url)

    data = requests.get(api_url, headers=headers).json()

    return data


# Get request from the Vivino website
r = requests.get(
    "https://www.vivino.com/api/explore/explore",
    params={
        "country_codes[]": ["es"],  # "FR", "IT", "DE", "CL", "PT", "AU", "AT", "AR", "US" <-- can add more country codes here
        "currency_code": "EUR",
        "grape_filter": "varietal",
        "min_rating": "1",
        "order_by": "price",
        "order": "asc",
        "page": 1,
        "price_range_max": "500",
        "price_range_min": "0",
        "wine_type_ids[]": "1",
    },
    headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"
    },
)

# Variables to scrape from the Vivino website
results = [
    (
        t["vintage"]["wine"]["winery"]["name"],
        t["vintage"]["year"],
        t["vintage"]["wine"]["id"],
        f'{t["vintage"]["wine"]["name"]} {t["vintage"]["year"]}',
        t["vintage"]["statistics"]["ratings_average"],
        t["vintage"]["statistics"]["ratings_count"],
        t["prices"][0]["amount"],
        t["vintage"]["wine"]["style"]["acidity"],
        t["vintage"]["wine"]["style"]["blurb"],
        t["vintage"]["wine"]["style"]["body"],
        t["vintage"]["wine"]["style"]["body_description"],
        t['vintage']['wine']['region']['country']['name'],
        t['vintage']['wine']['region']['name'],
        t['vintage']['wine']['style']['description'],

    )
    for t in r.json()["explore_vintage"]["matches"] 
]       

# Saving the results in a dataframe
dataframe = pd.DataFrame(
    results,
    columns=["Winery", "Year", "Wine ID", "Wine", "Rating", "num_review", "price", "acidity",'Blurb','Body','Body_des','Country','Region', 'wine_des']
)

# Scraping the reviews from the Vivino website
ratings = []

for _, row in dataframe.iterrows():
    page = 1
    while True:
        print(
            f'Getting info about wine {row["Wine ID"]}-{row["Year"]} Page {page}'
        )

        d = get_wine_data(row["Wine ID"], row["Year"], page)

        if not d["reviews"]:
            break

        for r in d["reviews"]:
            #if r["language"] != "en": # <-- get only english reviews
                #continue

            ratings.append(
                [
                    row["Year"],
                    row["Wine ID"],
                    r["rating"],
                    r["note"],
                    r["created_at"],
                ]
          )

        page += 1


ratings = pd.DataFrame(
    ratings, columns=["Year", "Wine ID", "User Rating", "Note", "CreatedAt"]
)

# Merging the two datasets; results and ratings.
df_out = ratings.merge(dataframe)



In [None]:
df_out.to_csv("data3.csv", index=False)
!cp data3.csv "drive/My Drive/"

In [None]:
df_out.columns

Index(['User Rating', 'Note', 'CreatedAt', 'Winery', 'Year', 'Wine ID', 'Wine',
       'Rating', 'num_review', 'price', 'acidity', 'Blurb', 'Body', 'Body_des',
       'Country', 'Region', 'wine_des'],
      dtype='object')

In [None]:
print(df_out)

       Year  ...                                           wine_des
0      2014  ...  The savory red wines of Spain are brooding and...
1      2014  ...  The savory red wines of Spain are brooding and...
2      2014  ...  The savory red wines of Spain are brooding and...
3      2014  ...  The savory red wines of Spain are brooding and...
4      2014  ...  The savory red wines of Spain are brooding and...
...     ...  ...                                                ...
47789  2015  ...                                               None
47790  2015  ...                                               None
47791  2015  ...                                               None
47792  2015  ...                                               None
47793  2015  ...                                               None

[47794 rows x 17 columns]


In [None]:
df_out

Unnamed: 0,Year,Wine ID,User Rating,Note,CreatedAt,Winery,Wine,Rating,num_review,price,acidity,Blurb,Body,Body_des,Country,Region,wine_des
0,2014,2356292,3.5,I agree with most of the previous ratings; tob...,2016-10-30T12:12:53.000Z,Luzon,Finca Luzon 2014,3.1,489,7.933702,3,Often Tempranillo-based,4,Full-bodied,Spain,Jumilla,The savory red wines of Spain are brooding and...
1,2014,2356292,3.5,I wanted to try a Jumilla as I am lately readi...,2016-09-25T15:17:56.000Z,Luzon,Finca Luzon 2014,3.1,489,7.933702,3,Often Tempranillo-based,4,Full-bodied,Spain,Jumilla,The savory red wines of Spain are brooding and...
2,2014,2356292,3.5,"Powerhouse with taste of cherries, laurel, clo...",2016-04-24T15:12:46.000Z,Luzon,Finca Luzon 2014,3.1,489,7.933702,3,Often Tempranillo-based,4,Full-bodied,Spain,Jumilla,The savory red wines of Spain are brooding and...
3,2014,2356292,3.5,Potente pero bastante agradable.,2016-05-28T13:14:15.000Z,Luzon,Finca Luzon 2014,3.1,489,7.933702,3,Often Tempranillo-based,4,Full-bodied,Spain,Jumilla,The savory red wines of Spain are brooding and...
4,2014,2356292,3.5,"Easy drinking and approachable, typical budget...",2017-08-09T20:29:43.000Z,Luzon,Finca Luzon 2014,3.1,489,7.933702,3,Often Tempranillo-based,4,Full-bodied,Spain,Jumilla,The savory red wines of Spain are brooding and...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47789,2015,1236683,1.0,Malo. Malo de verdad. Ácido. Muy malo.,2019-03-24T15:39:58.000Z,Alvarez de Toledo,Mencia Roble 2015,3.6,459,8.816205,3,,3,Medium-bodied,Spain,Bierzo,
47790,2015,1236683,2.0,Świeże. I tyle.,2015-04-01T20:49:45.000Z,Alvarez de Toledo,Mencia Roble 2015,3.6,459,8.816205,3,,3,Medium-bodied,Spain,Bierzo,
47791,2015,1236683,4.0,Muito bom Paiva,2014-11-01T04:45:43.000Z,Alvarez de Toledo,Mencia Roble 2015,3.6,459,8.816205,3,,3,Medium-bodied,Spain,Bierzo,
47792,2015,1236683,4.0,Maravilhoso. Combina com filé ou queijo Brie,2014-11-12T23:21:27.000Z,Alvarez de Toledo,Mencia Roble 2015,3.6,459,8.816205,3,,3,Medium-bodied,Spain,Bierzo,


In [None]:
df_out.Region

0        Castilla y León
1        Castilla y León
2        Castilla y León
3        Castilla y León
4        Castilla y León
              ...       
56361            Jumilla
56362            Jumilla
56363            Jumilla
56364            Jumilla
56365            Jumilla
Name: Region, Length: 56366, dtype: object