**Extracting reviews from the Metacritic website:**

In [None]:
# Importing necessary libraries
import requests  # Making HTTP requests
from bs4 import BeautifulSoup  # Parsing HTML and XML documents
from fake_useragent import UserAgent  # Generating fake user agents to mimic browser behavior
import pandas as pd  
import time 

# Setting the URL of the webpage from which we're scraping data
url = ''
# Generating a random user-agent header for our HTTP request
ua = UserAgent()
headers = {'User-Agent': ua.chrome}

# Sending a GET request to the specified URL with the headers
response = requests.get(url, headers=headers)
# Parsing the response content with BeautifulSoup using 'html.parser'
soup = BeautifulSoup(response.content, 'html.parser')
# Extracting the text of the last page number from the page navigation
last_page = soup.find('li', class_='page last_page').get_text().strip()

# Determining the actual last page number if pagination includes an ellipsis ('…')
if '…' in last_page:
    last_page = int(last_page.split('…')[-1])
else:
    last_page = int(last_page)

# Initializing a list to store data collected from each page
data = []

# Iterating over each page URL by updating the page query parameter
for page in range(0, last_page * 10, 10):
    # Constructing the URL for the current page by appending the page number
    page_url = f'{url}?page={page}'
    # Making a GET request to fetch the page content
    response = requests.get(page_url, headers=headers)
    # Parsing the fetched page content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Finding all div elements that contain user reviews
    user_reviews = soup.find_all('div', class_='review_body')
    # Finding all div elements that contain user ratings
    user_rates = soup.find_all('div', class_='review_grade')
    # Finding all div elements that contain posting dates
    user_dates = soup.find_all('div', class_='review_critic')

    # Extracting text data from each user review and appending it to our data list
    for i in range(len(user_reviews)):
        review = user_reviews[i].get_text().strip()
        rate = user_rates[i].get_text().strip()
        date = user_dates[i].get_text().strip()
        data.append({'User Review': review, 'User Rate': rate, 'Post Date': date})

    # Pausing the script for 1 second to reduce load on the server
    time.sleep(1)

# Converting the list of data into a DataFrame
df = pd.DataFrame(data)
# Saving the DataFrame to an Excel
df.to_excel('dataset.xlsx', index=False)
# Printing the total number of comments saved as a confirmation
print(f'{len(data)} comments saved to dataset.xls')


**Translating Non-English reviews to English:**

In [None]:
# Importing libraries 
import pandas as pd  
from langdetect import detect  # Detecting the language of texts
from googletrans import Translator  # Translating text using Google Translate API

# Loading dataset
df = pd.read_excel('')

# Creating a Translator object specifying Google's translate service URL
translator = Translator(service_urls=['translate.google.com'])
# Setting a minimum length threshold for texts to translate
length_threshold = 3

# Setting a default language in case detection fails
default_language = 'en'

# Iterating over each row in the DataFrame
for i, row in df.iterrows():
    review = row['User Review']  # Extracting the user review text

    # Checking if the review is a string and not just white space
    if isinstance(review, str):
        # Skipping translation for reviews that are too short or non-textual
        if len(review) < length_threshold or not review.strip():
            continue
        
        # Attempting to detect the language of the review
        try:
            detected_language = detect(review)
        except:
            # Using the default language if detection fails
            detected_language = default_language
        
        # Translating the review to English if it's not already in English
        if detected_language != 'en':
            try:
                # Translating the review and updating the DataFrame with the translated text
                translated_review = translator.translate(review, dest='en').text
                print(f"Translated review: {translated_review}")
                df.at[i, 'User Review'] = translated_review
            except Exception as e:
                # Handling translation errors and logging them
                print(f"Translation error occurred for review: {review}")
                print(f"Error message: {str(e)}")
                continue

# Saving the updated DataFrame with translated reviews to an Excel file
df.to_excel('translated_dataset.xlsx', index=False)
# Confirming that the dataset has been saved
print('Translated dataset saved to translated_dataset.xlsx')
