<a href="https://colab.research.google.com/github/ASWATHI2811/Webscraping/blob/main/Web_scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph
from reportlab.lib.styles import getSampleStyleSheet
import requests
from bs4 import BeautifulSoup
import csv

def duckduckgo_search(query):
    url = f"https://duckduckgo.com/html/?q={query}"
    headers = {
        "User-Agent": "Chrome/58.0.3029.110"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    results = soup.find_all('a', class_='result__a')

    scraped_data = []
    for result in results:
        title = result.text
        link = result['href']
        paragraph = get_paragraph_content(link)
        if paragraph:
            scraped_data.append({'title':title,'link': link, 'paragraph': paragraph})

    return scraped_data

def get_paragraph_content(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.find_all('p')
        content = "\n".join([p.get_text() for p in paragraphs])
        return content
    except Exception as e:
        print(f"Error fetching content from {url}: {e}")
        return ""

def save_to_csv(scraped_data, filename):
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['title', 'link', 'paragraph']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for item in scraped_data:
            writer.writerow(item)

def save_to_pdf(scraped_data, filename):
    doc = SimpleDocTemplate(filename, pagesize=letter)
    styles = getSampleStyleSheet()
    content = []

    for item in scraped_data:
        title = item['title']
        link = item['link']
        paragraph = item['paragraph']

        content.append(Paragraph(f"Title: {title}", styles['Title']))
        content.append(Paragraph(f"Link: {link}", styles['Normal']))
        content.append(Paragraph("Paragraph Content:", styles['Normal']))
        content.append(Paragraph(paragraph, styles['Normal']))
        content.append(Paragraph("\n", styles['Normal']))

    doc.build(content)

def main():
    queries = input("Enter your search queries separated by commas: ").split(',')
    for query in queries:
        results = duckduckgo_search(query)
        print(f"Search Results for '{query}':")
        for idx, result in enumerate(results, 1):
            print(f"{idx}. {result['link']}")
            print("   Paragraph Content:")
            print(result['paragraph'])
            print()
        csv_filename = f"{query}_search_results.csv"
        pdf_filename = f"{query}_search_results.pdf"
        save_to_csv(results, csv_filename)
        save_to_pdf(results, pdf_filename)
        print(f"Data saved to {csv_filename} and {pdf_filename}")

if __name__ == "__main__":
    main()

Enter your search queries separated by commas: Canoo overview
Search Results for 'Canoo overview':
1. https://www.globaldata.com/company-profile/canoo-inc/
   Paragraph Content:
Canoo Inc (Canoo) is a high tech advanced mobility technology company. The company’s product offerings include true steer-by-wire, leaf spring suspension systems, advanced drivetrain systems, battery and battery management systems, electrical systems architecture, lifestyle vehicles and delivery, multi-purpose delivery vehicles and pickup. It provides electric vehicle concept, design and engineering services for auto original equipment manufacturers (OEMs). Canoo develops the Canoo Digital Ecosystem software to provide key tools for passenger vehicles and fleets with products spanning lifestyle, fleet management, security and safety, household and asset management use cases. The company serves businesses and consumers. It operates offices in California, Arkansas, Oklahama, Texas and Michigan. Canoo is headquart