In [91]:
import csv 
import requests
import os
from openai import OpenAI
from bs4 import BeautifulSoup
from typing import Dict, List
from urllib.parse import urlparse

In [92]:
client = OpenAI(
    api_key = 'OpenAI API Key',
)

In [101]:
def scrape_websites_from_csv(csv_file_path: str) -> List[Dict[str, str]]:
# Function to scrape CSV and domains in it -> to return parsed HTML content.
    scraped_data = []

    with open(csv_file_path, 'r') as csvfile:
        csv_reader = csv.reader(csvfile)
        next(csv_reader, None)  # Skip the header row if it exists

        for row in csv_reader:
            if not row:  # Skip empty rows
                continue
            
            url = row[0].strip()  # Assuming the URL is in the first column
            if not url.startswith('http'):
                url = 'http://' + url

            try:
                response = requests.get(url, timeout=10)
                soup = BeautifulSoup(response.content, 'html.parser')
                
                data = {
                    'url': url,
                    'domain': urlparse(url).netloc,
                    'title': soup.title.string if soup.title else 'No title found',
                    'meta_description': soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else 'No meta description found',
                    'headings': [h.text for h in soup.find_all(['h1', 'h2', 'h3'])[:5]],
                    'paragraphs': [p.text for p in soup.find_all('p')[:20]]
                }
                
                scraped_data.append(data)
                print(scraped_data)
                print(f"Successfully scraped: {url}")
            
            except Exception as e:
                print(f"Error scraping {url}: {str(e)}")
                scraped_data.append({
                    'url': url,
                    'domain': urlparse(url).netloc,
                    'error': str(e)
                })

    return scraped_data

In [59]:
# def analyze_content(data: Dict[str, str]) -> List[str]:
#     #Analyze scraped content to identify key themes and topics.
#     relevant_content = []
    
#     keywords = ['achievement','professional','community','remote work', 'solution', 'connection', 'recognition', 'retention'] #identify content that matches Litespaces's values
    
#     all_text = ' '.join(data['headings'] + data['paragraphs']).lower()
    
#     for keyword in keywords:
#         if keyword in all_text:
#             relevant_content.append(f"Website mentions {keyword}")
    
#     return relevant_content

In [148]:
def generate_personalized_email(website_data: List[str]) -> str:
    #Generate a personalized email for prospects

    content_string = '. '.join(website_data) # <- this line doesn't work

    prompt = f""" Write an introductory line to an email that sounds friendly and personal. Avoid formal or exaggerated expressions, 
    and aim for a tone that is casual, friendly, and sounds like a real person speaking. 
    Look at the following company’s website description and use it to share a positive detail about their company’s business: 
    {content_string}. Start the line with ‘I was on your site and saw you’ and continue by sharing something they value, 
    prioritize, or are aiming to accomplish that relates to Litespace. Do not include any quotation marks and write just one example, do not make a list of examples.
    End the sentence in requesting to connect to see how we can collaborate together.
    Keep the sentence below 100 words and do not quote their description directly, meaning change up wording
    """

    # print(content_string)
    print(f"what is my prompt: {prompt}")
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that writes personalized outbound sales emails for Litespace who is a startup digitizing human resources and driving performance through improving employee experience."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=200,
            n=1,
            stop=None,
            temperature=0.7 
            )
        print(f"response: {response}")

        generated_email = response.choices[0].message.content
        return generated_email

    except Exception as e:
        print(f"Error generating email: {str(e)}")
        return f"Error generating email for {company_name}: {str(e)}"

In [150]:
def main():
    csv_file_path = input("Enter the path to your CSV file containing website URLs: ")
    scraped_websites = scrape_websites_from_csv(csv_file_path)
    #Scrape domains in CSV file

    # Process each scraped website
    for website_data in scraped_websites:
        print(website_data)
        if 'error' in website_data:
            print(f"Skipping {website_data['url']} due to error: {website_data['error']}")
            continue

        # personalized email
        personalized_email = generate_personalized_email(website_data)

        print(f"\nGenerated Personalized Email for {website_data['domain']}:")
        print("-----------------------------")
        print(personalized_email)
        print("\n")

if __name__ == "__main__":
    main()

[{'url': 'https://www.humi.ca/', 'domain': 'www.humi.ca', 'title': 'Best Payroll, HR, and Benefits Software for Canadian Companies | Humi', 'meta_description': 'Your best-in-one payroll, HR, and benefits solution. Give all your employee data a home, pay employees with ease, and streamline your group benefits experience.', 'headings': ['We’re on a mission to help businesses employ 1 million Canadians.', 'Join us in reshaping the digital employment experience.', 'The key ingredients for employing in Canada.', 'PAYROLL', 'A payroll system built with Canadian businesses in mind.'], 'paragraphs': ['We’re the best-in-one system that’ll help you store important information, run payroll, and manage benefits, without the hours of manual work. Think of us as an extension of your team, here to help with all your people operations tasks – except we can’t get you coffee. Yet.', 'Employee profiles, documents, asset tracker, analytics.', 'Pay your employees on time, every time.', 'Quality benefits wi