In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to scrape reviews from a single page
def scrape_page(url):
    html_text = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}).text
    soup = BeautifulSoup(html_text, 'html.parser')
    reviews = soup.find_all('div', class_='styles_reviewCardInner__UZk1x')

    # Initialize an empty list to store data from each page
    page_data = []

    for review in reviews:
        try:
            author_name = review.find('span', class_='typography_heading-xxs__UmE9o typography_appearance-default__t8iAq').text.strip()
        except AttributeError:
            author_name = "N/A"

        try:
            review_text = review.find('p', class_='typography_body-l__v5JLj typography_appearance-default__t8iAq typography_color-black__wpn7m').text.strip()
        except AttributeError:
            review_text = "N/A"

        try:
            date_published = review.find('p', class_='typography_body-m__k2UI7 typography_appearance-default__t8iAq').text.strip()
        except AttributeError:
            date_published = "N/A"

        try:
            star_rating = review.find('div', class_='styles_reviewHeader__xV2js').find('img')['alt']
        except AttributeError:
            star_rating = "N/A"

        # Append data for each review to the page_data list
        page_data.append({
            'Author Name': author_name,
            'Review Message': review_text,
            'Review Date': date_published,
            'Star Rating': star_rating
        })

    return page_data

# Main function to scrape multiple pages
def scrape_multiple_pages(base_url, num_pages):
    all_data = []  # Initialize an empty list to store data from all pages
    for page_num in range(1, num_pages + 1):
        url = f"{base_url}?page={page_num}"
        print(f"\nScraping reviews from page {page_num}: {url}\n")
        try:
            page_data = scrape_page(url)
            all_data.extend(page_data)  # Extend the list with data from the current page
            time.sleep(2)  # Delay to avoid rate-limiting
        except Exception as e:
            print(f"An error occurred while scraping {url}: {e}")
    return all_data

# Example usage
base_url = 'https://www.trustpilot.com/review/www.tescobank.com'
num_pages = 1500  # Number of pages to scrape

data = scrape_multiple_pages(base_url, num_pages)

# Create and save DataFrame from the collected data
df = pd.DataFrame(data)
df.to_csv('Tescobank.csv', index=False)
df.to_excel('Tescobank.xlsx', index=False)
print('Saved to file.')


In [None]:
from google.colab import files

# Download the CSV file
files.download('Tescobank.csv')

# Download the Excel file
files.download('Tescobank.xlsx')