In [1]:
import pandas as pd
import numpy as np
import re
import ast
import os 
from dotenv import load_dotenv
import anthropic

# Load environment variables from the .env file
load_dotenv()
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')

# Set Pandas options to display full text for all columns
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)  # Show all columns


In [2]:
# Load the dataset from the './data' folder
people_path = './data/YC F24 Founder People.csv'

# Read data into pandas DataFrame
people_data = pd.read_csv(people_path)

# Relevant columns to keep
relevant_columns = [
    'full_name', 'title', 'location', 'company', 'company_linkedin', 
    'company_description', 'sub_title', 'summary', 'current_job_description', 
    'person_industry', 'skills', 'education', 'work_history'
]

# Filter the relevant columns
people_filtered = people_data[relevant_columns]

# Display basic information about the dataset
#print("Dataset Loaded:")
#display(people_filtered.head())


In [3]:
# Helper function to extract the "name" field using a simple regex
def extract_names_simple(data_str):
    try:
        # Find all occurrences of "name": "value"
        names = re.findall(r'"name":\s?"(.*?)"', data_str)
        return ", ".join(names) if names else None
    except Exception as e:
        return None  # Return None if parsing fails

# Helper function to convert the skills column into a readable string
def process_skills(skills_str):
    try:
        # Convert the string representation of a list into an actual list
        skills_list = ast.literal_eval(skills_str)
        if isinstance(skills_list, list):
            return ", ".join(skills_list)  # Join the list into a string
        return skills_str  # If not a list, return the original string
    except (ValueError, SyntaxError):
        return None  # Return None if parsing fails

# Apply the updated function to the education, work history, and skills columns
def preprocess_columns_simple(df):
    df['education'] = df['education'].apply(extract_names_simple)
    df['work_history'] = df['work_history'].apply(extract_names_simple)
    df['skills'] = df['skills'].apply(process_skills)
    return df

# Preprocess the dataset
people_preprocessed = preprocess_columns_simple(people_filtered.copy())


In [4]:
# Helper function to generate a descriptive paragraph for each row
def generate_paragraph(row):
    # Start the paragraph with basic information
    full_name = row.get('full_name', None)
    title = row.get('title', None)
    company = row.get('company', None)
    location = row.get('location', None)
    company_description = row.get('company_description', None)
    sub_title = row.get('sub_title', None)
    summary = row.get('summary', None)
    current_job_description = row.get('current_job_description', None)
    skills = row.get('skills', None)
    education = row.get('education', None)
    work_history = row.get('work_history', None)

    # Build the descriptive paragraph dynamically
    sentences = []

    if full_name and title and company:
        sentences.append(f"{full_name}, currently working as {title} at {company}.")
    if location:
        sentences[-1] += f" They are based in {location}."
    if company_description:
        sentences.append(f"{company} specializes in {company_description}.")
    if sub_title:
        sentences.append(f"Sub-title: {sub_title}.")
    if summary:
        sentences.append(f"Summary: {summary}.")
    if current_job_description:
        sentences.append(f"Current Job Description: {current_job_description}.")
    if skills:
        sentences.append(f"Their skills include: {skills}.")
    if education:
        sentences.append(f"Their educational background includes: {education}.")
    if work_history:
        sentences.append(f"They have worked in roles such as: {work_history}.")

    # Combine sentences into a single paragraph
    return " ".join(sentences)

# Apply the function to generate paragraphs
people_preprocessed['descriptive_paragraph'] = people_preprocessed.apply(generate_paragraph, axis=1)


In [5]:
# Load the new dataset
personal_data_path = './data/Sudarshan Sridharan People.csv'
personal_data = pd.read_csv(personal_data_path)

# Preprocessing functions reused from the previous notebook
# (No need to redefine extract_names_simple and process_skills)

# Helper function to process the new file into a descriptive paragraph
def generate_advertising_paragraph(row):
    # Basic information
    full_name = row.get('full_name', None)
    title = row.get('title', None)
    company = row.get('company', None)
    location = row.get('location', None)
    company_description = row.get('company_description', None)
    work_history = row.get('work_history', None)
    education = row.get('education', None)
    skills = row.get('skills', None)

    # Build the paragraph dynamically
    sentences = []
    
    if full_name and title and company:
        sentences.append(f"{full_name} is the {title} at {company}.")
    if location:
        sentences[-1] += f" They are based in {location}."
    if company_description:
        sentences.append(f"{company} focuses on: {company_description}.")
    if skills:
        sentences.append(f"Key skills include: {skills}.")
    if education:
        sentences.append(f"Educational background includes: {education}.")
    if work_history:
        sentences.append(f"Past work includes roles such as: {work_history}.")

    # Combine and clean up
    return " ".join(sentences)

# Preprocess the new dataset
personal_data['education'] = personal_data['education'].apply(extract_names_simple)
personal_data['work_history'] = personal_data['work_history'].apply(extract_names_simple)
personal_data['skills'] = personal_data['skills'].apply(process_skills)

# Generate the descriptive paragraph
personal_data['advertising_paragraph'] = personal_data.apply(generate_advertising_paragraph, axis=1)

# Display the output
#print("Sample Advertising Paragraphs:")
#display(personal_data[['full_name', 'advertising_paragraph']].head())


In [24]:
# Ensure 'people_preprocessed' and 'new_people_data' DataFrames exist from previous steps

# Extract the single paragraph from the new people's data
advertising_paragraph = personal_data['advertising_paragraph'].iloc[0]  # Assuming only one row

# Create a DataFrame with pairs of paragraphs
paired_paragraphs = people_preprocessed[['full_name', 'descriptive_paragraph']].copy()
paired_paragraphs['advertising_paragraph'] = advertising_paragraph  # Add the single advertising paragraph to all rows

# Display the paired paragraphs
print("Paired Paragraphs (Personal Profile and Advertising Profile):")
display(paired_paragraphs.head())

# for testing purposes only look at the first 20
paired_paragraphs = paired_paragraphs.head(5)


Paired Paragraphs (Personal Profile and Advertising Profile):


Unnamed: 0,full_name,descriptive_paragraph,advertising_paragraph
0,Edward Aryee,"Edward Aryee, currently working as Co-Founder & CTO at SRE.ai (YC F24). They are based in San Francisco Bay, San Francisco Bay Area, United States of America. SRE.ai (YC F24) specializes in nan. Sub-title: Co-Founder & CTO at SRE.ai (YC F24) | ex-Google. Summary: nan. Current Job Description: An AI-powered automation platform for Salesforce development teams.. Their skills include: Programming, Python, Java, JavaScript, Matlab, Computer Science, Public Speaking. Their educational background includes: Drexel University. They have worked in roles such as: SRE.ai (YC F24), Black Angel Group (BAG), Google, Microsoft, StratIS IoT, Web Presence in China, BuLogics, Virtual Pantry, Columbia University.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital."
1,Victor Cheng,"Victor Cheng, currently working as Co-Founder and CEO at vly.ai (YC F24). They are based in San Francisco, California, United States of America. vly.ai (YC F24) specializes in nan. Sub-title: co-founder @ vly.ai (YC F24). Summary: cooking. Current Job Description: The easiest way to build custom full-stack software without code. Backed by Y Combinator. Their skills include: Startups, TypeScript, Git, Express.js, MongoDB, Full-Stack Development, Node.js, Competitive Programming, Next.js, Software Development, React.js, C++, JavaScript, C#, Java, Unity3D, Business, Mathematics, Robotics, Mobile Application Development, Android Studio. Their educational background includes: University of Washington, Skyline High School, Y Combinator. They have worked in roles such as: vly.ai (YC F24), Hatch Coding, Minute Land, Quantum Labs, Autodesk, FRC Team 2976, Spartabots, Self-employed.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital."
2,Daryl Budiman,"Daryl Budiman, currently working as Co-Founder & CEO at Andoria (YC F24). They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Andoria (YC F24) specializes in nan. Sub-title: Co-Founder at Andoria (YC F24) | Prev. MultiOn & Stanford. Summary: Imagine if you didn't have to learn any piece of software anymore.. Current Job Description: Backed by YC, among other great investors.. Their skills include: Customer Onboarding, Customer Success, SwiftUI, C++, React.js, Microsoft PowerPoint, HTML, CSS, JavaScript, Python, Presentation Skills, Public Speaking, Adobe Photoshop, Research, ArcGIS, Figma, Leadership, User Interface Design, Computer-Aided Design (CAD), Autodesk Fusion 360. Their educational background includes: Stanford University, Y Combinator. They have worked in roles such as: Andoria (YC F24), MultiOn, Amazon, The Marriage Pact, Evernote, Matcha, Stanford University School of Medicine.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital."
3,Finn Mallery,"Finn Mallery, currently working as Co-Founder at Origami Agents (YC F24). They are based in Palo Alto, California, United States of America. Origami Agents (YC F24) specializes in nan. Sub-title: Co-Founder @ Origami Agents (YC F24). Summary: nan. Current Job Description: nan. Their educational background includes: Stanford University, Stanford University School of Engineering, Z Fellows, Canyon Crest Academy. They have worked in roles such as: Origami Agents (YC F24), Fizz, StartX., Stanford Marketing.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital."
4,Arvind V.,"Arvind V., currently working as Co-Founder and CEO at Fresco (YC F24). They are based in San Francisco, California, United States of America. Fresco (YC F24) specializes in nan. Sub-title: Building Fresco (YC F24) | Acquired Founder. Summary: I’m currently building Fresco (YC F24) an AI copilot for construction superintendents. \n\nPreviously, I was Chief of Staff and Director of Partnerships at January AI, a precision medicine company based in the Bay Area. I have my BA and MS from Brown, and I've also worked at NASA (SynBio), Human Capital (VC), and Vault Health (PM).\n\nI enjoyed an exit from my political tech startup, DownBallot Solutions, in 2022. I also make investments in early-stage technology companies, with a focus on consumer software and digital health. I'm always happy to chat about science or entrepreneurship!. Current Job Description: nan. Their skills include: B2B sales, Financial Modeling, Strategic Partnerships, Crispr, Report Writing, Grant Writing, Cell Culture, Stem Cell Research, Synthetic Biology, Laboratory Management, Due Diligence, Investments, Early-stage Startups, Communication, Funnel Optimization, Agile Methodologies, Scrum, PRD, Product Management, Strategy, Project Management, Team Management, Public Speaking, Writing, Data Analysis, Web Design, Leadership, Research, SPSS, CRISPR, Python, Microsoft Office. Their educational background includes: Brown University, Brown University, Edina Senior High School. They have worked in roles such as: Fresco (YC F24), January AI, Vault Health, Downballot Solutions, Human Capital, NASA - National Aeronautics and Space Administration, University of Minnesota-Twin Cities, University of Minnesota Medical Center.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital."


In [25]:
# Initialize Anthropic client
# Initialize Anthropic client
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)



# Define the instruction prompt (system prompt)
system_prompt = """
You are an advanced AI tasked with generating LinkedIn connection notes. Your goal is to create a natural-sounding, personalized invitation note based on the following information:
1. The person initiating the connection request (advertising_paragraph).
2. The person being contacted (descriptive_paragraph).

The connection note must:
- Be concise, with a maximum of 300 characters (hard stop).
- Sound natural, mimicking human language with sentence variations and occasional non-optimal structuring.
- Prioritize shared experiences, such as attending the same university or working at the same place, to create a sense of familiarity.
- Avoid overly formal language; use a friendly, conversational tone with varied sentence lengths.
- Clearly convey the intent to connect without sounding like a sales pitch.
- Include a personalized touch by referencing relevant information from both profiles.

The note should NOT:
- Mention unrelated details or include generic, overused phrases.
- Be robotic or overly polished; embrace a conversational style.

Write the connection note in one paragraph, and ensure it adheres to the character limit and tone requirements.
"""

# Function to generate a LinkedIn connection note using Claude
def generate_linkedin_request(advertising_paragraph, descriptive_paragraph):
    # Format the user content
    user_message = f"""
Person initiating the request:
{advertising_paragraph}

Person being contacted:
{descriptive_paragraph}

Generate LinkedIn request.
"""
    try:
        # Send the message to Claude
        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=300,
            temperature=1,
            system=system_prompt,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": user_message
                        }
                    ]
                }
            ]
        )
        # Return the generated content
        return response.content[0].text.strip()
    except Exception as e:
        return f"Error: {str(e)}"

# Apply the function to generate LinkedIn requests for each pairing
paired_paragraphs['linkedin_request'] = paired_paragraphs.apply(
    lambda row: generate_linkedin_request(row['advertising_paragraph'], row['descriptive_paragraph']),
    axis=1
)

#

In [26]:
paired_paragraphs

Unnamed: 0,full_name,descriptive_paragraph,advertising_paragraph,linkedin_request
0,Edward Aryee,"Edward Aryee, currently working as Co-Founder & CTO at SRE.ai (YC F24). They are based in San Francisco Bay, San Francisco Bay Area, United States of America. SRE.ai (YC F24) specializes in nan. Sub-title: Co-Founder & CTO at SRE.ai (YC F24) | ex-Google. Summary: nan. Current Job Description: An AI-powered automation platform for Salesforce development teams.. Their skills include: Programming, Python, Java, JavaScript, Matlab, Computer Science, Public Speaking. Their educational background includes: Drexel University. They have worked in roles such as: SRE.ai (YC F24), Black Angel Group (BAG), Google, Microsoft, StratIS IoT, Web Presence in China, BuLogics, Virtual Pantry, Columbia University.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital.",Hi Edward! Fellow founder here - noticed you're building SRE.ai through YC. Your background at Google and experience with AI-powered automation caught my eye. Would love to connect and share founder experiences in the Bay Area tech ecosystem.
1,Victor Cheng,"Victor Cheng, currently working as Co-Founder and CEO at vly.ai (YC F24). They are based in San Francisco, California, United States of America. vly.ai (YC F24) specializes in nan. Sub-title: co-founder @ vly.ai (YC F24). Summary: cooking. Current Job Description: The easiest way to build custom full-stack software without code. Backed by Y Combinator. Their skills include: Startups, TypeScript, Git, Express.js, MongoDB, Full-Stack Development, Node.js, Competitive Programming, Next.js, Software Development, React.js, C++, JavaScript, C#, Java, Unity3D, Business, Mathematics, Robotics, Mobile Application Development, Android Studio. Their educational background includes: University of Washington, Skyline High School, Y Combinator. They have worked in roles such as: vly.ai (YC F24), Hatch Coding, Minute Land, Quantum Labs, Autodesk, FRC Team 2976, Spartabots, Self-employed.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital.","Hi Victor! Fellow founder here - noticed you're building vly.ai through YC. As someone working with early-stage startups at Pipeline, I'd love to connect and share experiences about the founder journey. Your no-code full-stack platform sounds fascinating!"
2,Daryl Budiman,"Daryl Budiman, currently working as Co-Founder & CEO at Andoria (YC F24). They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Andoria (YC F24) specializes in nan. Sub-title: Co-Founder at Andoria (YC F24) | Prev. MultiOn & Stanford. Summary: Imagine if you didn't have to learn any piece of software anymore.. Current Job Description: Backed by YC, among other great investors.. Their skills include: Customer Onboarding, Customer Success, SwiftUI, C++, React.js, Microsoft PowerPoint, HTML, CSS, JavaScript, Python, Presentation Skills, Public Speaking, Adobe Photoshop, Research, ArcGIS, Figma, Leadership, User Interface Design, Computer-Aided Design (CAD), Autodesk Fusion 360. Their educational background includes: Stanford University, Y Combinator. They have worked in roles such as: Andoria (YC F24), MultiOn, Amazon, The Marriage Pact, Evernote, Matcha, Stanford University School of Medicine.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital.","Hi Daryl! Fellow Bay Area founder here - love what you're building at Andoria. As another YC founder working on workflow automation tools, I'd love to connect and exchange learnings from our founder journeys. \n\nNote: This connection note (191 characters) is personal, mentioning shared traits (Bay Area, YC, founders) and shows genuine interest in their work while maintaining a casual, natural tone."
3,Finn Mallery,"Finn Mallery, currently working as Co-Founder at Origami Agents (YC F24). They are based in Palo Alto, California, United States of America. Origami Agents (YC F24) specializes in nan. Sub-title: Co-Founder @ Origami Agents (YC F24). Summary: nan. Current Job Description: nan. Their educational background includes: Stanford University, Stanford University School of Engineering, Z Fellows, Canyon Crest Academy. They have worked in roles such as: Origami Agents (YC F24), Fizz, StartX., Stanford Marketing.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital.","Hi Finn! Fellow startup founder here. Noticed you're working on Origami Agents - love seeing other YC founders in the B2B space. As someone who's built sales tools, I'd love to connect and exchange insights about the startup journey. Would you be open to connecting?"
4,Arvind V.,"Arvind V., currently working as Co-Founder and CEO at Fresco (YC F24). They are based in San Francisco, California, United States of America. Fresco (YC F24) specializes in nan. Sub-title: Building Fresco (YC F24) | Acquired Founder. Summary: I’m currently building Fresco (YC F24) an AI copilot for construction superintendents. \n\nPreviously, I was Chief of Staff and Director of Partnerships at January AI, a precision medicine company based in the Bay Area. I have my BA and MS from Brown, and I've also worked at NASA (SynBio), Human Capital (VC), and Vault Health (PM).\n\nI enjoyed an exit from my political tech startup, DownBallot Solutions, in 2022. I also make investments in early-stage technology companies, with a focus on consumer software and digital health. I'm always happy to chat about science or entrepreneurship!. Current Job Description: nan. Their skills include: B2B sales, Financial Modeling, Strategic Partnerships, Crispr, Report Writing, Grant Writing, Cell Culture, Stem Cell Research, Synthetic Biology, Laboratory Management, Due Diligence, Investments, Early-stage Startups, Communication, Funnel Optimization, Agile Methodologies, Scrum, PRD, Product Management, Strategy, Project Management, Team Management, Public Speaking, Writing, Data Analysis, Web Design, Leadership, Research, SPSS, CRISPR, Python, Microsoft Office. Their educational background includes: Brown University, Brown University, Edina Senior High School. They have worked in roles such as: Fresco (YC F24), January AI, Vault Health, Downballot Solutions, Human Capital, NASA - National Aeronautics and Space Administration, University of Minnesota-Twin Cities, University of Minnesota Medical Center.","Sudarshan Sridharan is the Founder at Pipeline. They are based in San Francisco Bay, San Francisco Bay Area, United States of America. Pipeline focuses on: Get more leads, close more deals. Pipeline sets-up and manages the outbound sales stack for early stage B2B startups. Automate your sales.... Key skills include: Investing. Educational background includes: Clemson University. Past work includes roles such as: Pipeline, Fion Technologies, SaveMAPS, Second Reality Interactive, INC., Peak 15 Capital.","Hi Arvind! Fellow founder here - noticed you're building Fresco (YC F24). Really impressed by your journey from DownBallot's exit to YC. As someone also working with early-stage B2B startups, would love to connect and share experiences in the startup ecosystem."
