In [None]:
# import libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs

In [None]:
# Reading csv with country and continents
country_list = pd.read_csv("./country.csv",  encoding='latin-1')

In [None]:
country_list.head()

In [None]:
# Transforming both name and continent to lowercase
country_list["name"] = country_list["name"].apply(lambda x: str.lower(x))
country_list["continent"] = country_list["continent"].apply(lambda x: str.lower(x))

In [None]:
"""
    Input: dataframe
    Output: Dictionary
    
    Working: 
    1. Replace spaces in name with "-"
    2. Create url by joining base_url, continent and country
    3. Using beautifulSoup scrap the page, specifically <p> tag from 4:10 
    4. Save tags to dict
"""

def data_scraping(df):
    
    review_dict = {}
    
    for i in range(len(df)):
        
        name = df.iloc[i]["name"]
        processed_name = name.replace(" ", "-")
        continent = df.iloc[i]["continent"]

        base_url = "https://www.worldtravelguide.net/guides/"
        url = base_url + continent + "/"+ processed_name +"/"

        response = requests.get(url)
        soup = bs(response.content, 'html.parser')
        rev_div = soup.findAll("p")[4:10]

        review = []
        for j in range(len(rev_div)):
            review.append(rev_div[j].text)
        
        final_review = ' '.join(map(str, review))
          
        review_dict[name] = final_review
        
    return review_dict

In [None]:
# Run data_scraping function and get dict
review_dict = data_scraping(country_list)
len(review_dict)

In [None]:
# Create column and save reviews and check for NaN values
country_list["review"]  = country_list["name"].map(review_dict)
country_list["review"].isna()

In [None]:
# Save dataframe with fields name, continent and reviews
country_list.to_csv("processed_country.csv", index = False)