In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
import csv

# List of queries
queries = ["Identify the industry in which Canoo operates, along with its size, growth rate, trends, and key players",
           "Analyze Canoo's main competitors, including their market share, products or services offered, pricing strategies, and marketing efforts",
           "Identify key trends in the market, including changes in consumer behavior, technological advancements, and shifts in the competitive landscape",
           "Gather information on Canoo's financial performance, including its revenue, profit margins, return on investment, and expense structure."]



# Function to extract structured information from the linked page
def extract_structured_response(url):
    try:
        page_response = requests.get(url)
        soup = BeautifulSoup(page_response.content, 'html.parser', from_encoding='utf-8')
        paragraphs = soup.find_all('p')
        structured_response = "\n".join([p.get_text() for p in paragraphs])
        return structured_response
    except Exception as e:
        print(f"Error fetching content from {url}: {e}")
        return ""

# Initialize CSV file for saving results
output_csv_path = '/content/drive/MyDrive/DATA/output_data.csv'
with open(output_csv_path, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file, quoting=csv.QUOTE_ALL)
    writer.writerow(["URL", "StructuredResponse"])

    # Loop through queries
    for query in queries:
        with DDGS() as ddgs:
            results = [{'url': r['href']} for r in ddgs.text(query, max_results=10)]

            # Loop through search results
            for result in results:
                url = result.get('url', '')

                # Extract structured information using a function
                structured_response = extract_structured_response(url)

                # Write to CSV
                writer.writerow([url, structured_response])

# Print a message indicating the export is complete
print("CSV file exported successfully.")


CSV file exported successfully.
