## Create a Founder object

So it is easier to handle the top few founders and feed their information to LLM

In [1]:
import math

class Founder:
    def __init__(self, id, is_success, name, gender = 'None', age = 0.0, linkedin_url = '',
                 self_description = '', education_backgrounds = '', employment_backgrounds = '',
                 org_name = '', long_description = '', 
                 category_list = '', category_groups_list = '', country_code = '', city = ''):
        self.id = id
        self.is_success = is_success
        self.name = name
        self.gender = gender
        self.age = age
        self.linkedin_url = linkedin_url
        self.self_description = self_description
        self.education_backgrounds = education_backgrounds
        self.employment_backgrounds = employment_backgrounds
        self.org_name = org_name
        self.long_description = long_description
        self.category_list = category_list
        self.category_groups_list = category_groups_list
        self.country_code = country_code
        self.city = city
        
        self.startup_start_date = "[unknown]"

    def get_id(self):
        return self.id

    def get_is_success(self):
        if self.is_success:
            return "Successful"
        return "Failed"

    def get_name(self):
        return self.name

    def get_gender(self):
        return self.gender

    def get_age(self):
        if math.isnan(self.age):
            return "Not Available"
        return str(self.age)

    def get_linkedin_url(self):
        return self.linkedin_url

    def get_self_description(self):
        return self.self_description

    def get_education_backgrounds(self):
        return self.education_backgrounds

    def get_employment_backgrounds(self):
        return self.employment_backgrounds

    def get_org_name(self):
        return self.org_name

    def get_long_description(self):
        return self.long_description
    
    def get_startup_start_date(self):
        return self.startup_start_date

    def get_category_list(self):
        return self.category_list

    def get_category_groups_list(self):
        return self.category_groups_list

    def get_country_code(self):
        return self.country_code

    def get_city(self):
        return self.city



## Create a list of founder profiles that are top n similar Founder objects

In [2]:
# Create a list of founder profiles that are top n similar Founder objects
import pandas as pd

def extract_top_n_founders(filepath, n):
    founder_profiles = []

    df = pd.read_csv(filepath)
    #Randomly select n samples from the file given (ignore if the file contains exactly n rows)
    random_sample = df.sample(n=n)

    for index, row in random_sample.iterrows():

        new_founder = Founder(
            id= row['ID'], 
            is_success = bool(row['isSuccess']), 
            name = row['Name'], 
            gender = row['Gender'], 
            age = row['Age'], 
            linkedin_url = row['linkedin_url'], 
            self_description = row['Self-Description'],
            education_backgrounds = row['Education Backgrounds'], 
            employment_backgrounds = row['Employment Backgrounds'], 
            org_name = row['org_name'], 
            long_description = row['long_description'],
            category_list = row['category_list'], 
            category_groups_list = row['category_groups_list'], 
            country_code = row['country_code'], 
            city = row['city'])

        founder_profiles.append(new_founder)
        
    return founder_profiles



In [65]:
# Example Usage
founder_profiles = extract_top_n_founders('Top Similar Founders.csv', 5)
print([ (x.get_name(), x.get_is_success()) for x in founder_profiles])

[('Peter Abrams', 'Failed'), ('Alejandro Chouza', 'Successful'), ('Robert Reffkin', 'Successful'), ('Felix Jahn', 'Successful'), ('Wendi Burkhardt', 'Failed')]


## Asking LLM: GPT-4 for founder profile Analysis

assuming we already have the founder_profiles list of Founder objects that we want to pass to GPT-4

In [75]:
# Prompt Generation
import ast

def education_details(education_list):
    try:
        education_list = ast.literal_eval(education_list)
    except:
        return "\t - No Education Records"
    
    if education_list == []:
        return "\t - No Education Records"
    
    description = ""
    for education in education_list[:3]: # Keep only up to the most recent 3 educations
        degree = education[1] if education[1] != "N/A" else "[unknown]"
        school = education[0] if education[0] != "N/A" else "[unknown]"
        major = education[2] if education[2] != "N/A" else "[unknown]"
        description += f"\t - {degree} in {major} from {school}" + "\n"
        
    return description


def employment_details(employment_list, org_name):
    try:
        employment_list = ast.literal_eval(employment_list)
    except:
        return "\t - No Employment Records"
    
    if employment_list == []:
        return "\t - No Employment Records"
    
    description = ""
    count = 0  #Keep only up to the most recent 3 jobs, excluding the current startup
    for employment in employment_list:
        company = employment[0]
        roles = ", ".join(employment[1])
        duration = str(round(employment[2], 2))
        start_time = employment[3]
        isCurrent = employment[4]
        if not isCurrent and org_name not in company:
            description += f"\t - Worked in {company} for {duration} years, starting from {start_time}. His roles in the company include {roles}.\n"
            count += 1
        if count >= 3: break
    
    return description

def extract_founder_details(profile, index = 0, hide_success = False):
    founders_details = ""
    founders_details += f"Founder {index+1}: \n"
    founders_details += " - Name: " + profile.get_name() + "\n"
    if not hide_success:
        founders_details += " - Startup Status: " + profile.get_is_success() + "\n"
    founders_details += " - Age: " + profile.get_age() + "\n"
    founders_details += " - Self Description: " + profile.get_self_description() + "\n"
    founders_details += " - Education Backgrounds:\n" + education_details(profile.get_education_backgrounds()) + "\n"
    founders_details += " - Employment Backgrounds:\n " + employment_details(profile.get_employment_backgrounds(), profile.get_org_name()) + "\n"
    founders_details += " - Startup Name: " + profile.get_org_name() + "\n"
    founders_details += " - Startup Idea: " + profile.get_long_description() + "\n"
    founders_details += "\n"
    return founders_details

def generate_prompt_for_rationale(founder_profiles):
    n = len(founder_profiles)
    
    introduction = "I am analyzing the profiles of several startup founders to understand " + \
                   "the key factors contributing to their success or failure. " + \
                   f"Below are the profiles of {n} founders:\n\n"
    
    founders_details = ""
    for index, profile in enumerate(founder_profiles):
        founders_details += extract_founder_details(profile, index)
    
    with open("prompts/rationale_request_body.txt", "r") as f:
        request = f.read()
    
    prompt = introduction + founders_details + request
    return prompt
    

In [76]:
# Example usage
prompt = generate_prompt_for_rationale(founder_profiles)
print(prompt)

I am analyzing the profiles of several startup founders to understand the key factors contributing to their success or failure. Below are the profiles of 5 founders:

Founder 1: 
 - Name: Peter Abrams
 - Startup Status: Failed
 - Age: 31.0
 - Self Description: Peter Abrams is based in Miami, Florida, USA. He has worked as Sales & Business Development Manager at PayJoy, Road Manager & Social Media Director at Sixth Floor Trio, and Co-Founder/Chief Talent Officer at Home61. Peter works or has worked as Founder, VP of Sales at Home61; Sales at Carbon Emery Rv; and Content Manager at Sportle. He studied at Stanford University between 2008 and 2014.
 - Education Backgrounds:
	 - Bachelor's (4 year program) in Urban Studies from Stanford University

 - Employment Backgrounds:
 	 - Worked in PayJoy for 2.0 years, starting from 01/01/2015. His roles in the company include Manager, Management, Business.
	 - Worked in Güero for 1.67 years, starting from 01/01/2014. His roles in the company inclu

In [46]:
# Now passing the prompt to GPT-4
from openai import OpenAI

OPENAI_API_KEY = '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'

def get_rationale(prompt):
    client = OpenAI(api_key = OPENAI_API_KEY)

    prompt = generate_prompt_for_rationale(founder_profiles)
    system_content = "You are an experienced venture capital analyst who is skilled at " +\
                     "synthesizing a rationale for startup success or failure based on founder data."

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_content},
            {"role": "user", "content": prompt}
        ],
        max_tokens=500  # Adjust as needed
    )

    rationale = response.choices[0].message.content
    return rationale

rationale = get_rationale(prompt)
print(rationale)

Success Rationale:
- Successful founders often have extensive work experience in relevant fields before starting their startups. For instance, Alejandro Chouza has experience in management at Uber and OYO USA before co-founding Arrived Homes. Likewise, Felix Jahn was an active entrepreneur and business angel with vast experience and substantial success before founding McMakler. Similarly, Robert Reffkin had an impressive professional background with stints at McKinsey and Goldman Sachs before founding Compass. 
- Success seems correlated with possessing advanced degrees from reputable institutions. Alejandro Chouza and Robert Reffkin both had Master's degrees, while Felix Jahn had a specialized degree in capital market theory, entrepreneurship, operations & logistics.
- The successful entrepreneurs also tended to focus their efforts on innovative business ideas that disrupt traditional markets, such as real estate in the case of Arrived Homes and Compass, or property services in the ca

## Generating Score and Pros and Cons List

### Firstly, given the rationale, I will need to extract the key factors in it

These factors, for success and for failure, will act as a guideline for the scoring of the founder.

In [47]:
from openai import OpenAI

OPENAI_API_KEY = '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'

def request_key_factors(rationale):
    client = OpenAI(api_key = OPENAI_API_KEY)

    # Structuring the prompt for GPT-4
    with open("prompts/scoring_factor_prompt.txt", "r") as f:
        introduction = f.readline()
        request = f.read()
        
    prompt = introduction + "\n" + rationale + "\n" + request
    
    system_content = "You are an experienced venture capital analyst who is skilled at extracting " +\
                     "key founders for the success and failure of startup founders."

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_content},
            {"role": "user", "content": prompt}
        ],
        max_tokens=500  # Adjust as needed
    )

    text_response = response.choices[0].message.content.strip()
    
    return text_response

def extract_key_factors(text_response):

    # Process the response to extract factors
    success_factors = []
    failure_factors = []
    current_list = None

    for line in text_response.split('\n'):
        if line.startswith('Success'):
            success_factors.append(line.strip())
        elif line.startswith('Failure'):
            failure_factors.append(line.strip())

    return success_factors, failure_factors


In [48]:
# Example usage
text_response = request_key_factors(rationale)
success_factors, failure_factors = extract_key_factors(text_response)

# Print the results
print("Success Factors:", success_factors)
print("Failure Factors:", failure_factors)

Success Factors: ["Success 1) Significant Relevant Experience: Founders have demonstrated relevant work experience in their startup's industry before launching their venture. This experience is often in leadership or management roles.", "Success 2) Advanced Educational Degrees: Founders usually possess advanced degrees from prestigious institutions, related or beneficial to their startup's field.", 'Success 3) Focused Entrepreneurial Track Record: Founders often have a history of persisting in entrepreneurship, either as successful entrepreneurs themselves or playing key roles in successful startups.', 'Success 4) Innovative Business Ideas: Founders typically embark on innovative projects that disrupt traditional markets, showing a clear understanding and unique vision for their market.', 'Success 5) Demonstrated Mastery Skills: Founders show high level skills or achievements, such as fluency in multiple languages, sports leadership, or competition success, implying a high level of ded

## Score Evaluation

Ask for individual scores or relevance across all factors

In [77]:
founder = extract_top_n_founders('Input Founder Features.csv', 1)[0]
print(founder.get_name(), founder.get_is_success())

Olivier Brion Failed


In [89]:
from openai import OpenAI
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  

OPENAI_API_KEY = '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'

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def completion_with_backoff(messages = [], model = 'gpt-4', max_tokens = 100):
    client = OpenAI(api_key = OPENAI_API_KEY)
    return client.chat.completions.create(model = model, 
                                          messages = messages, 
                                          max_tokens = max_tokens)

def score_founder_profile(founder, success_factors, failure_factors):

    #=============Initialization=============
    success_scores = []
    failure_scores = []
    # New founder's details
    founder_detail = extract_founder_details(founder, hide_success = True)
    system_content = "You are an experienced venture capital analyst who is skilled at evaluating" +\
                     "whether a startup founder would be successful or not."
    # Make an initial call to GPT-4
    with open("prompts/scoring_generation_prompt.txt", "r") as f:
        introduction = f.readline()
        request = f.readline()
        output_condition = f.read()
    prompt = introduction + "\n" + founder_detail + request
    print(prompt, "\n")
    initial_messages = [{"role": "system", "content": system_content}, {"role": "user", "content": prompt}]
    response = completion_with_backoff(initial_messages)
    text_response = response.choices[0].message.content.strip()
    initial_messages.append({"role": "assistant", "content": text_response})
    print(text_response)
    print()

    #============= Function to construct the prompt for scoring each factor============
    def create_scoring_prompt(factor, factor_type):
        introduction = f"Below is a factor of {factor_type}. Give a score of alignment between the factor and the founder details previously given." +\
                        "Score between 0 to 10, where 10 means a perfect match and 0 means no match. You should be very stringent, and would not usually give a score higher than 6 for success factor, or a score lower than 4 for failure factor, unless clear evidence suggest otherwise.\n Factor:\n"

        prompt = introduction + factor + "\n" + output_condition
        print(factor)
        print()
        return prompt


    #===========Scoring for success factors===========
    for factor in success_factors:
        prompt = create_scoring_prompt(factor, 'success')
        messages = initial_messages + [{"role": "user", "content": prompt}]
        response = response = completion_with_backoff(messages)
        text_response = response.choices[0].message.content.strip()
        #messages.append({"role": "assistant", "content": text_response})
        print(text_response)
        print()
        score = float(text_response.split()[1])
        success_scores.append(score)

    #===========Scoring for failure factors===========
    for factor in failure_factors:
        prompt = create_scoring_prompt(factor, 'failure')
        messages = initial_messages + [{"role": "user", "content": prompt}]
        response = response = completion_with_backoff(messages)
        text_response = response.choices[0].message.content.strip()
        #messages.append({"role": "assistant", "content": text_response})
        print(text_response)
        print()
        score = float(text_response.split()[1])
        failure_scores.append(score)

    # Calculate overall score
    total_succ_score = sum(success_scores)
    total_fail_score = sum(failure_scores)
    if total_succ_score + total_fail_score > 0:
        overall_score = total_succ_score / (total_succ_score + total_fail_score) * 10
    else:
        overall_score = 0

    return success_scores, failure_scores, overall_score


In [90]:
# Example usage
# Assuming founder is a Founder object and success_factors, failure_factors are lists
success_scores, failure_scores, overall_score = score_founder_profile(founder, success_factors, failure_factors)
print(success_scores)
print(failure_scores)
print(overall_score)

You are given the profile details of a new startup founder, as well as a list of factors that signals successful or unsuccessful founders. Your task is to give this founder a relevance score on each of the factors given. The founder's profile details is given as such:

Founder 1: 
 - Name: Olivier Brion
 - Age: 48.0
 - Self Description: 15 years of experience in the Information Technology sector. Business and Internet entrepreneur with strong managerial and leadership skills. Online marketplaces, International sales & marketing, business operations, strategy and development specialist. Real Estate, Travel and Leisure.HEC: Executive MBA (CPA) Ranked #1 in the Financial Times European Business School rankings since 2006- Co-founder Home61 Inc : Real Estate Online Brokerage- Lofty / Roomorama (CEO / COO): Online Global Marketplace for Vacation rentals and short term accommodations. Over 100,000 properties in 4,500 destinations.- Previsite (CEO): SaaS Marketing Solutions for real estate in

## Generate pros and cons list

1. Analyze Scores and Factors:
Review Scores: Look at the individual scores assigned to each success and failure factor for the founder's profile.
Identify Strengths and Weaknesses: High scores in success factors and low scores in failure factors indicate strengths (Pros), while low scores in success factors and high scores in failure factors indicate weaknesses (Cons).
2. Construct the Pros and Cons List:
Pros (Strengths):
Include factors where the founder scored high in success factors.
Highlight attributes or aspects of the founder's profile that align well with the identified keys to success.
Cons (Weaknesses):
Include factors where the founder scored high in failure factors.
Point out areas where the founder's profile lacks or diverges from the success criteria.

In [91]:
from openai import OpenAI

OPENAI_API_KEY = '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'

def generate_pros_and_cons(founder, success_factors, failure_factors, success_scores, 
                           failure_scores, threshold = 5):
    
    # New founder's details
    founder_detail = extract_founder_details(founder, hide_success = True)
    system_content = "You are an experienced venture capital analyst who is skilled at evaluating" +\
                     "whether a startup founder would be successful or not."
    with open("prompts/pros_and_cons_prompt.txt", "r") as f:
        introduction = f.readline()
        request = f.read()

    def summarize_factors_prompt(factor_list, score_list, factor_type):
        # Filter factors based on the threshold score
        relevant_factors = [factor for factor, score in zip(factor_list, score_list) if score > threshold]
        relevant_scores = [score for score in score_list if score > threshold]
        if len(relevant_factors) < 3:
            relevant_factors = factor_list
            relevant_scores = score_list

        prompt="\n".join(f"- {factor} [Score: {score} / 10]" for factor, score in zip(relevant_factors, relevant_scores))
        return prompt


    prompt = introduction + "\nFounder Details: " + founder_detail
    pros = summarize_factors_prompt(success_factors, success_scores, "success")
    cons = summarize_factors_prompt(failure_factors, failure_scores, "failure")
    prompt += "Success Factors:\n" + pros
    prompt += "\n\nFailure Factors:\n" + cons
    prompt += "\n" + request
    #print(prompt,"\n")

    messages = [{"role": "system", "content": system_content}, {"role": "user", "content": prompt}]
    response = completion_with_backoff(messages = messages, max_tokens = 400)
    return response.choices[0].message.content.strip()


In [92]:
# Example usage:
# Assuming founder is a Founder object, and we have lists for success/failure factors and their scores
response = generate_pros_and_cons(founder, success_factors, failure_factors, success_scores, failure_scores)
print(response)

Pros:
1. Significant Relevant Experience: Olivier has worked in multiple leadership roles in technology and real estate sectors, crucial to his business model for Home61. His score of 8.0 signifies a valuable depth of industry knowledge.
2. Advanced Educational Degrees: His EMBA from HEC Paris and Bachelor's in Finance demonstrate high level education that pertains directly to his venture, resulting in a score of 8.0, further bolstering his leadership skills.
3. Focused Entrepreneurial Track Record: As Olivier has already co-founded multiple businesses and held CEO positions, he's shown clear abilities and determination in entrepreneurship, earning him a score of 7.0.

Cons:
1. Operating in Crowded Markets: Home61 is facing stiff competition in the online real-estate space which increases the risk of failure, reflected by a high score of 7.0 in this failure factor.
2. Mismatched Skills: While Olivier has relevant experience, there may be a gap in technical skills specifically related t