In [None]:
import requests
import os
import csv
from bs4 import BeautifulSoup

def fetchAndSaveToFile(url, path):
    r = requests.get(url)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'w', encoding='utf-8') as f:
        f.write(r.text)


def scrapeReviewsFromHtml(file_path, csv_path,max_reviews=100):
    with open(file_path, 'r', encoding='utf-8') as f:
        soup = BeautifulSoup(f, 'html.parser')

    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        username = review.find('span', {'class': 'a-profile-name'}).text.strip()
        review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
        review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
        reviews.append([username, review_date, review_text])

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(reviews)

url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"
html_path = "data/times.html"
csv_path = "data/reviews.csv"

# Fetch and save HTML
fetchAndSaveToFile(url, html_path)

# Scrape reviews and save to CSV
scrapeReviewsFromHtml(html_path, csv_path)


In [None]:
import requests
import csv
from bs4 import BeautifulSoup
import os
import time

def fetchReviewsFromUrl(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.text

def parseReviews(html):
    soup = BeautifulSoup(html, 'html.parser')
    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        username = review.find('span', {'class': 'a-profile-name'}).text.strip()
        review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
        review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
        reviews.append([username, review_date, review_text])
    return reviews

def scrapeReviewsToCsv(url, csv_path):
    base_url = url.split('/ref=')[0]  # Base URL for navigating pages
    current_page = 1
    all_reviews = []

    while True:
        print(f"Scraping page {current_page}...")
        page_url = f"{base_url}/ref=cm_cr_arp_d_paging_btm_next_{current_page}?ie=UTF8&reviewerType=all_reviews&pageNumber={current_page}"
        html = fetchReviewsFromUrl(page_url)
        reviews = parseReviews(html)
        if not reviews:
            break  # No more reviews to scrape
        all_reviews.extend(reviews)
        current_page += 1
        time.sleep(2)  # Be polite and avoid hammering the server

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(all_reviews)
    print(f"Finished scraping. Total reviews fetched: {len(all_reviews)}")

url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"
csv_path = "data/reviews.csv"

os.makedirs(os.path.dirname(csv_path), exist_ok=True)
scrapeReviewsToCsv(url, csv_path)


Scraping page 1...
Finished scraping. Total reviews fetched: 0


In [None]:
import requests
import os
import csv
from bs4 import BeautifulSoup
import time

def fetchReviewsFromUrl(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.text

def parseReviews(html):
    soup = BeautifulSoup(html, 'html.parser')
    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        username = review.find('span', {'class': 'a-profile-name'}).text.strip()
        review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
        review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
        reviews.append([username, review_date, review_text])
    return reviews

def scrapeReviewsToCsv(url, csv_path, max_reviews=1000):
    base_url = url.split('/ref=')[0]  # Base URL for navigating pages
    current_page = 1
    all_reviews = []

    while len(all_reviews) < max_reviews:
        print(f"Scraping page {current_page}...")
        page_url = f"{base_url}/ref=cm_cr_getr_d_paging_btm_next_{current_page}?ie=UTF8&reviewerType=all_reviews&pageNumber={current_page}"
        html = fetchReviewsFromUrl(page_url)
        reviews = parseReviews(html)
        if not reviews:
            break  # No more reviews to scrape
        all_reviews.extend(reviews)
        current_page += 1
        time.sleep(2)  # Be polite and avoid hammering the server

    # Trim to the max_reviews count
    all_reviews = all_reviews[:max_reviews]

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(all_reviews)
    print(f"Finished scraping. Total reviews fetched: {len(all_reviews)}")

url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"
csv_path = "data/reviews.csv"

os.makedirs(os.path.dirname(csv_path), exist_ok=True)
scrapeReviewsToCsv(url, csv_path, max_reviews=1000)


Scraping page 1...
Finished scraping. Total reviews fetched: 0


In [None]:
import requests
import os
import csv
from bs4 import BeautifulSoup
import time

def fetchReviewsFromUrl(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.text

def parseReviews(html):
    soup = BeautifulSoup(html, 'html.parser')
    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        username = review.find('span', {'class': 'a-profile-name'}).text.strip()
        review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
        review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
        reviews.append([username, review_date, review_text])
    return reviews

def scrapeReviewsToCsv(url, csv_path, max_reviews=1000):
    base_url = url.split('/ref=')[0]  # Base URL for navigating pages
    current_page = 1
    all_reviews = []

    while len(all_reviews) < max_reviews:
        print(f"Scraping page {current_page}...")
        page_url = f"{base_url}/ref=cm_cr_getr_d_paging_btm_next_{current_page}?ie=UTF8&reviewerType=all_reviews&pageNumber={current_page}"
        html = fetchReviewsFromUrl(page_url)
        reviews = parseReviews(html)
        if not reviews:
            break  # No more reviews to scrape
        all_reviews.extend(reviews)
        current_page += 1
        time.sleep(2)  # Be polite and avoid hammering the server

    # Trim to the max_reviews count
    all_reviews = all_reviews[:max_reviews]

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(all_reviews)
    print(f"Finished scraping. Total reviews fetched: {len(all_reviews)}")

url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"
csv_path = "data/reviews.csv"

os.makedirs(os.path.dirname(csv_path), exist_ok=True)
scrapeReviewsToCsv(url, csv_path, max_reviews=1000)


Scraping page 1...
Scraping page 2...
Finished scraping. Total reviews fetched: 10


In [None]:
import requests
import os
import csv
from bs4 import BeautifulSoup
import time

def fetchReviewsFromUrl(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.text

def parseReviews(html):
    soup = BeautifulSoup(html, 'html.parser')
    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        username = review.find('span', {'class': 'a-profile-name'}).text.strip()
        review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
        review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
        reviews.append([username, review_date, review_text])
    return reviews

def scrapeReviewsToCsv(base_url, csv_path, max_reviews=1000):
    current_page = 1
    all_reviews = []

    while len(all_reviews) < max_reviews:
        print(f"Scraping page {current_page}...")
        page_url = f"{base_url}&pageNumber={current_page}"
        html = fetchReviewsFromUrl(page_url)
        reviews = parseReviews(html)
        if not reviews:
            break  # No more reviews to scrape
        all_reviews.extend(reviews)
        current_page += 1
        time.sleep(2)  # Be polite and avoid hammering the server

    # Trim to the max_reviews count
    all_reviews = all_reviews[:max_reviews]

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(all_reviews)
    print(f"Finished scraping. Total reviews fetched: {len(all_reviews)}")

base_url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_arp_d_paging_btm_prev_2?ie=UTF8&reviewerType=all_reviews&pageNumber=2"
csv_path = "data/reviews.csv"

os.makedirs(os.path.dirname(csv_path), exist_ok=True)
scrapeReviewsToCsv(base_url, csv_path, max_reviews=1000)


Scraping page 1...
Scraping page 2...
Finished scraping. Total reviews fetched: 10


In [None]:
import requests
import os
import csv
from bs4 import BeautifulSoup
import time

def fetchReviewsFromUrl(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.text

def parseReviews(html):
    soup = BeautifulSoup(html, 'html.parser')
    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        username = review.find('span', {'class': 'a-profile-name'}).text.strip()
        review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
        review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
        reviews.append([username, review_date, review_text])
    return reviews

def scrapeReviewsToCsv(base_url, csv_path, max_reviews=1000):
    current_page = 1
    all_reviews = []

    while len(all_reviews) < max_reviews:
        print(f"Scraping page {current_page}...")
        page_url = f"{base_url}&pageNumber={current_page}"
        html = fetchReviewsFromUrl(page_url)
        reviews = parseReviews(html)
        if not reviews:
            break  # No more reviews to scrape
        all_reviews.extend(reviews)
        current_page += 1
        time.sleep(2)  # Be polite and avoid hammering the server

    # Trim to the max_reviews count
    all_reviews = all_reviews[:max_reviews]

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(all_reviews)
    print(f"Finished scraping. Total reviews fetched: {len(all_reviews)}")

# Adjusted base URL for pagination
base_url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_arp_d_paging_btm_prev_2?ie=UTF8&reviewerType=all_reviews"
csv_path = "data/reviews.csv"

os.makedirs(os.path.dirname(csv_path), exist_ok=True)
scrapeReviewsToCsv(base_url, csv_path, max_reviews=1000)


Scraping page 1...
Scraping page 2...
Finished scraping. Total reviews fetched: 10


In [None]:
import requests
import os
import csv
from bs4 import BeautifulSoup

def fetchAndSaveToFile(url, path):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
    }
    r = requests.get(url, headers=headers)
    r.raise_for_status()  # Ensure we notice bad responses
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'w', encoding='utf-8') as f:
        f.write(r.text)


def scrapeReviewsFromHtml(file_path, csv_path, max_reviews=100):
    with open(file_path, 'r', encoding='utf-8') as f:
        soup = BeautifulSoup(f, 'html.parser')

    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        try:
            username = review.find('span', {'class': 'a-profile-name'}).text.strip()
            review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
            review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
            reviews.append([username, review_date, review_text])
        except AttributeError:
            continue  # Skip if any of the expected fields are not found

    if reviews:
        with open(csv_path, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['Username', 'Review Date', 'Review'])
            writer.writerows(reviews)
        print(f"Successfully scraped {len(reviews)} reviews.")
    else:
        print("No reviews found.")


url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_getr_d_paging_btm_next_2?ie=UTF8&reviewerType=all_reviews&pageNumber=2"
html_path = "data/times.html"
csv_path = "data/reviews2.csv"

# Fetch and save HTML
fetchAndSaveToFile(url, html_path)

# Scrape reviews and save to CSV
scrapeReviewsFromHtml(html_path, csv_path)


No reviews found.


In [None]:
import requests
import os
import csv
from bs4 import BeautifulSoup
import time

def fetchReviewsFromUrl(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.text

def parseReviews(html):
    soup = BeautifulSoup(html, 'html.parser')
    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        try:
            username = review.find('span', {'class': 'a-profile-name'}).text.strip()
            review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
            review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
            reviews.append([username, review_date, review_text])
        except AttributeError:
            continue  # Skip if any of the expected fields are not found
    return reviews

def scrapeReviewsToCsv(url, csv_path, max_reviews=1000):
    base_url = url.split('/ref=')[0]  # Base URL for navigating pages
    all_reviews = []
    current_page = 1

    while len(all_reviews) < max_reviews:
        print(f"Scraping page {current_page}...")
        page_url = f"{base_url}/ref=cm_cr_getr_d_paging_btm_next_{current_page}?ie=UTF8&reviewerType=all_reviews&pageNumber={current_page}"
        html = fetchReviewsFromUrl(page_url)
        reviews = parseReviews(html)
        if not reviews:
            break  # No more reviews to scrape
        all_reviews.extend(reviews)
        current_page += 1
        time.sleep(2)  # Be polite and avoid hammering the server

    # Trim to the max_reviews count
    all_reviews = all_reviews[:max_reviews]

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(all_reviews)
    print(f"Finished scraping. Total reviews fetched: {len(all_reviews)}")

url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_getr_d_paging_btm_next_1?ie=UTF8&reviewerType=all_reviews&pageNumber=2"
csv_path = "data/reviews.csv"

os.makedirs(os.path.dirname(csv_path), exist_ok=True)
scrapeReviewsToCsv(url, csv_path, max_reviews=1000)


Scraping page 1...
Scraping page 2...
Finished scraping. Total reviews fetched: 10


In [None]:
import requests
import csv
from bs4 import BeautifulSoup

def fetchPageContent(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.text

def parseReviews(html):
    soup = BeautifulSoup(html, 'html.parser')
    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        try:
            username = review.find('span', {'class': 'a-profile-name'}).text.strip()
            review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
            review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
            reviews.append([username, review_date, review_text])
        except AttributeError:
            continue  # Skip if any of the expected fields are not found
    return reviews

def saveReviewsToCsv(reviews, csv_path):
    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(reviews)
    print(f"Successfully saved {len(reviews)} reviews to {csv_path}")

# URL of the product reviews on page 2
url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_getr_d_paging_btm_next_2?ie=UTF8&reviewerType=all_reviews&pageNumber=2"
csv_path = "data/reviews_page_2.csv"

# Fetch HTML content of page 2
html_content = fetchPageContent(url)

# Parse reviews from the HTML content
reviews = parseReviews(html_content)

# Save the reviews to a CSV file
saveReviewsToCsv(reviews, csv_path)


Successfully saved 0 reviews to data/reviews_page_2.csv


In [None]:
import requests
import csv
from bs4 import BeautifulSoup

def fetchReviewsFromUrl(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.text

def parseReviews(html):
    soup = BeautifulSoup(html, 'html.parser')
    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        try:
            username = review.find('span', {'class': 'a-profile-name'}).text.strip()
            review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
            review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
            reviews.append([username, review_date, review_text])
        except AttributeError:
            continue  # Skip if any of the expected fields are not found
    return reviews

def scrapeReviewsFromPageToCsv(url, csv_path):
    html = fetchReviewsFromUrl(url)
    reviews = parseReviews(html)

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(reviews)
    print(f"Finished scraping. Total reviews fetched: {len(reviews)}")

# URL for page 3 of the reviews
url = "https://www.amazon.in/Intel-i7-13700K-Desktop-Processor-P-cores/product-reviews/B0BCF57FL5/ref=cm_cr_getr_d_paging_btm_next_3?ie=UTF8&reviewerType=all_reviews&pageNumber=3"
csv_path = "data/reviews_page_3.csv"

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(csv_path), exist_ok=True)

# Scrape reviews from page 3 and save to CSV
scrapeReviewsFromPageToCsv(url, csv_path)


Finished scraping. Total reviews fetched: 0


In [None]:
import requests
import os
import csv
from bs4 import BeautifulSoup

def fetchAndSaveToFile(url, path):
    r = requests.get(url)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'w', encoding='utf-8') as f:
        f.write(r.text)


def scrapeReviewsFromHtml(file_path, csv_path,max_reviews=100):
    with open(file_path, 'r', encoding='utf-8') as f:
        soup = BeautifulSoup(f, 'html.parser')

    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        username = review.find('span', {'class': 'a-profile-name'}).text.strip()
        review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
        review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
        reviews.append([username, review_date, review_text])

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(reviews)

url = "https://www.amazon.in/Intel-Generation-Desktop-Processor-Warranty/product-reviews/B09MDFH5HY/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"
html_path = "data/times.html"
csv_path = "data/reviewsi5.csv"

# Fetch and save HTML
fetchAndSaveToFile(url, html_path)

# Scrape reviews and save to CSV
scrapeReviewsFromHtml(html_path, csv_path)


In [None]:
import requests
import os
import csv
from bs4 import BeautifulSoup

def fetchAndSaveToFile(url, path):
    r = requests.get(url)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'w', encoding='utf-8') as f:
        f.write(r.text)


def scrapeReviewsFromHtml(file_path, csv_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        soup = BeautifulSoup(f, 'html.parser')

    reviews = []
    for review in soup.find_all('div', {'data-hook': 'review'}):
        username = review.find('span', {'class': 'a-profile-name'}).text.strip()
        review_date = review.find('span', {'data-hook': 'review-date'}).text.strip()
        review_text = review.find('span', {'data-hook': 'review-body'}).text.strip()
        reviews.append([username, review_date, review_text])

    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Username', 'Review Date', 'Review'])
        writer.writerows(reviews)

url = "https://www.amazon.in/Intel-Core-i9-12900KS-Hexadeca-core-Processor/product-reviews/B09RWL74GY/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"
html_path = "data/times.html"
csv_path = "data/reviewsi94.csv"

# Fetch and save HTML
fetchAndSaveToFile(url, html_path)

# Scrape reviews and save to CSV
scrapeReviewsFromHtml(html_path, csv_path)

In [None]:
import csv

# List of new files to be appended
new_files = [
    "reviewsi9.csv",
    "reviewsi91.csv",
    "reviewsi92.csv",
    "reviewsi93.csv",
    "reviewsi94.csv"
]

# Path to the existing combined file
combined_file = "reviewsi_combined.csv"

def appendCsvFiles(combined_file, new_files):
    # Open the combined file in append mode
    with open(combined_file, 'a', newline='', encoding='utf-8') as outfile:
        writer = csv.writer(outfile)

        for new_file in new_files:
            with open(new_file, 'r', encoding='utf-8') as infile:
                reader = csv.reader(infile)

                # Skip the header of the new file
                next(reader)

                # Write the rows from the new file to the combined file
                for row in reader:
                    writer.writerow(row)

# Append the new files to the combined file
appendCsvFiles(combined_file, new_files)

# Download the updated combined file
from google.colab import files
files.download(combined_file)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>