In [None]:
import requests
from bs4 import BeautifulSoup
import json
import csv

def scrape_web_page(url):
    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the web page
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract relevant information such as text, images, or links
        # For this example, let's extract the text from all paragraphs
        paragraphs = soup.find_all('p')
        extracted_text = '\n'.join([p.text.strip() for p in paragraphs])

        return extracted_text
    else:
        print("Failed to fetch the web page. Status code:", response.status_code)
        return None

def save_to_csv(data, output_file):
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Extracted Text'])
        writer.writerow([data])

def save_to_json(data, output_file):
    with open(output_file, 'w', encoding='utf-8') as jsonfile:
        json.dump({'Extracted_Text': data}, jsonfile, indent=4)

def main():
    # URL of the web page to scrape
    url = 'https://pubchem.ncbi.nlm.nih.gov/compound/N-Vinyl-2-pyrrolidone'  # Replace with the URL of the attached web page

    # Output file paths
    output_csv_file = 'output.csv'
    output_json_file = 'output.json'

    # Scrape the web page
    extracted_text = scrape_web_page(url)

    if extracted_text:
        # Save to CSV
        save_to_csv(extracted_text, output_csv_file)

        # Save to JSON
        save_to_json(extracted_text, output_json_file)

if __name__ == "__main__":
    main()


In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import json

# Function to scrape data
def scrape_data(url):
    # Send a GET request to the URL
    response = requests.get(url)

    # Check if request was successful
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Initialize lists to store extracted data
        extracted_data = []

        # Example: Extracting text from paragraphs with class 'content'
        paragraphs = soup.find_all('p', class_='content')
        for paragraph in paragraphs:
            extracted_data.append({
                'text': paragraph.get_text(strip=True)
            })

        # Example: Extracting image URLs from img tags
        images = soup.find_all('img')
        for image in images:
            # Check if the 'src' attribute exists before accessing it
            if 'src' in image.attrs:
                extracted_data.append({
                    'image_url': image['src']
                })

        # Example: Extracting links
        links = soup.find_all('a', href=True)
        for link in links:
            extracted_data.append({
                'link_text': link.text,
                'link_url': link['href']
            })

        return extracted_data
    else:
        print("Error: Failed to retrieve webpage.")
        return None

# Function to save data to CSV
def save_to_csv(data, filename):
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = data[0].keys() if data else []
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for item in data:
            writer.writerow(item)

# Function to save data to JSON
def save_to_json(data, filename):
    with open(filename, 'w', encoding='utf-8') as jsonfile:
        json.dump(data, jsonfile, indent=4)

if __name__ == "__main__":
    # Example usage
    url = 'https://products.basf.com/global/en/ci/n-vinyl-2-pyrrolidone.html'
    data = scrape_data(url)

    if data:
        # Save data to CSV
        save_to_csv(data, 'scraped_data.csv')

        # Save data to JSON
        save_to_json(data, 'scraped_data.json')

        print("Scraping and saving successful.")
    else:
        print("Scraping failed.")


Scraping and saving successful.
