In [1]:
import os 

import pandas as pd
import numpy as np

from dotenv import load_dotenv
import openai

COMPANIES_LIST_FOLDER = "data/companies_lists/"

load_dotenv()

client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [2]:
def call_openai(system_prompt, prompt, temperature=0.5):
    try:
        result = client.chat.completions.create(model="gpt-4",
        temperature=temperature,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt},
        ])
        return result.choices[0].message.content
    except openai.BadRequestError as e:
        error_msg = f"An error occurred with OpenAI: {e}"
        print(error_msg)
        return error_msg

In [21]:
def get_company_description(company_name, company_url):
    system_prompt = "You are a business analyst and your task is to help me categorise generative AI companies."

    prompt = f"""
    Here are examples of companies and their descriptions:
    Company Name: AI2SQL
    Company Description: Write SQL in seconds. With AI2sql, engineers and non-engineers can easily write efficient, error-free SQL queries without knowing SQL. It's time to take back your time!

    Company Name: ReSpeecher
    Company Description: Voice Cloning for Content Creators. Create speech that's indistinguishable from the original speaker. Perfect for filmmakers, game developers, and other content creators

    Now, I will give you a company name and the URL to their website, I want you to search the company and give me a short description (30 words max) of what they do.

    Company Name: {company_name}
    Company URL: {company_url}
    """

    return call_openai(system_prompt, prompt)

In [None]:
def get_value_chain_layer(company_name, company_description):
    system_prompt = "You are a business analyst and your task is to help me categorise generative AI companies."

    prompt = f"""
    I will give you a company name and a description, and for each I want you to tell me which Value Chain layer it corresponds to.

    The 4 different types of Value Chain Layer are:

    Applications (Vertical) - Applications that serve the specific needs of a particular function, line of business, or industry

    Applications (Horizontal) - Applications that serve a broad range of business functions and users across an organisation or industry.

    AI Development & Tools - Companies that build AI models and supporting tools.

    Infrastructure - The basic systems and services that provide support for generative AI functions.

    Here are a few examples

    # Applications (Vertical)
    Company Name: AI Dungeon
    Company Description:The future of AI-generated games. We're making AI a tool of creativity and freedom for everyone.

    Company Name: Pepper Content
    Company Description: Scale your content needs. Fast. Pepper is the one-stop platform that handles everything from content ideation to delivery. Just enter your requirements and we'll make it happen with our team of 1,00,000+ content creators.

    # AI Development & Tools
    Company Name: AI21 Labs
    Company Description: Your thoughts in words. Say exactly what you mean through clear, compelling and authentic writing.

    Company Name: Gretel
    Company Description: The Developer Stack for Synthetic Data. Power AI, Data Science, Testing, Analytics and More. Synthetic data that's as good, or even better than the data you have. Or don't have. Create and share data with the best-in-class accuracy and privacy guarantees – on demand.

    # Applications (Horizontal)
    Company Name: AI2SQL
    Company Description: Write SQL in seconds. With AI2sql, engineers and non-engineers can easily write efficient, error-free SQL queries without knowing SQL. It's time to take back your time!

    Company Name: ReSpeecher
    Company Description: Voice Cloning for Content Creators. Create speech that's indistinguishable from the original speaker. Perfect for filmmakers, game developers, and other content creators

    Now how would you categorise the following company? Only respond with the category name.

    Company Name: {company_name}
    Company Description: {company_description}
    """

    return call_openai(system_prompt, prompt)

In [20]:
companies_df = pd.read_csv(COMPANIES_LIST_FOLDER + "combined.csv")

companies_df.head()

Unnamed: 0,Company,Funding (est $),Notable Investors,Headcount,URL,Description,Category,Focus,Value Chain Layer,Modality,...,Active?,Founded,HQ,Logo,Founders,Last Round,Valuation,Business Model,Open Source?,Unnamed: 21
0,10Web,4000000.0,"Sierra Ventures, AI Fund",,https://10web.io/,,Code,Website Generation,,,...,,2017.0,Other US,,,,,,,
1,Abridge,27500000.0,"Bessemer Venture Partners, Union Square Ventur...",,https://www.abridge.com/,,Summarization,Healthcare conversation documentation,,,...,,2018.0,Other US,,,,,,,
2,ABtesting.ai,,,,https://abtesting.ai/,,Text,Marketing & A/B Testing,,,...,,2019.0,Europe,,,,,,,
3,Accomplice,520000.0,TinySeed,,https://accomplice.ai,,Image,AI-generated stock photos,,,...,,2021.0,Other US,,,,,,,
4,Ada,190620620.0,"Creative Destruction Lab (CDL), Tiger Global M...",,https://www.ada.cx/,,Chatbot/Conversational AI,Automated Virtual Agents,,,...,,2016.0,North America (excl. US),,,,,,,


In [22]:
# Loop through all rows in the dataframe that have an empty "Description" column
for index, row in companies_df.iterrows():
    if pd.isnull(row["Description"]):
        # Get the company name and URL from the row
        company_name = row["Company"]
        company_url = row["URL"]
        company_description = get_company_description(company_name, company_url)
        companies_df.at[index, "Description"] = company_description

companies_df.head()

Unnamed: 0,Company,Funding (est $),Notable Investors,Headcount,URL,Description,Category,Focus,Value Chain Layer,Modality,...,Active?,Founded,HQ,Logo,Founders,Last Round,Valuation,Business Model,Open Source?,Unnamed: 21
0,10Web,4000000.0,"Sierra Ventures, AI Fund",,https://10web.io/,Company Description: 10Web provides automated ...,Code,Website Generation,,,...,,2017.0,Other US,,,,,,,
1,Abridge,27500000.0,"Bessemer Venture Partners, Union Square Ventur...",,https://www.abridge.com/,Company Name: Abridge\nCompany Description: Ab...,Summarization,Healthcare conversation documentation,,,...,,2018.0,Other US,,,,,,,
2,ABtesting.ai,,,,https://abtesting.ai/,Company Description: ABtesting.ai provides AI-...,Text,Marketing & A/B Testing,,,...,,2019.0,Europe,,,,,,,
3,Accomplice,520000.0,TinySeed,,https://accomplice.ai,Company Description: Accomplice is an AI tool ...,Image,AI-generated stock photos,,,...,,2021.0,Other US,,,,,,,
4,Ada,190620620.0,"Creative Destruction Lab (CDL), Tiger Global M...",,https://www.ada.cx/,Company Description: Ada is a company that pro...,Chatbot/Conversational AI,Automated Virtual Agents,,,...,,2016.0,North America (excl. US),,,,,,,


In [23]:
companies_df[companies_df["Description"].isna()]["Company"].count()

0

In [25]:
# For each row in the dataframe, Remove the string "Company Description:" from the "Description" column
companies_df["Description"] = companies_df["Description"].str.replace("Company Description:", "")
companies_df["Description"] = companies_df["Description"].str.replace("Company Name:", "")
companies_df.head()

Unnamed: 0,Company,Funding (est $),Notable Investors,Headcount,URL,Description,Category,Focus,Value Chain Layer,Modality,...,Active?,Founded,HQ,Logo,Founders,Last Round,Valuation,Business Model,Open Source?,Unnamed: 21
0,10Web,4000000.0,"Sierra Ventures, AI Fund",,https://10web.io/,"10Web provides automated WordPress hosting, w...",Code,Website Generation,,,...,,2017.0,Other US,,,,,,,
1,Abridge,27500000.0,"Bessemer Venture Partners, Union Square Ventur...",,https://www.abridge.com/,Abridge\n Abridge assists individuals in unde...,Summarization,Healthcare conversation documentation,,,...,,2018.0,Other US,,,,,,,
2,ABtesting.ai,,,,https://abtesting.ai/,ABtesting.ai provides AI-powered A/B testing ...,Text,Marketing & A/B Testing,,,...,,2019.0,Europe,,,,,,,
3,Accomplice,520000.0,TinySeed,,https://accomplice.ai,Accomplice is an AI tool that helps in automa...,Image,AI-generated stock photos,,,...,,2021.0,Other US,,,,,,,
4,Ada,190620620.0,"Creative Destruction Lab (CDL), Tiger Global M...",,https://www.ada.cx/,Ada is a company that provides an AI-powered ...,Chatbot/Conversational AI,Automated Virtual Agents,,,...,,2016.0,North America (excl. US),,,,,,,


In [26]:
companies_df[companies_df["Value Chain Layer"].isna()]["Company"].count()

519

In [27]:
# Loop through all rows in the dataframe that have an empty "Value Chain Layer" column
for index, row in companies_df.iterrows():
    if pd.isnull(row["Value Chain Layer"]):
        # Get the company name and description from the row
        company_name = row["Company"]
        company_description = row["Description"]
        company_description = get_value_chain_layer(company_name, company_description)
        companies_df.at[index, "Value Chain Layer"] = company_description

companies_df.head()

Unnamed: 0,Company,Funding (est $),Notable Investors,Headcount,URL,Description,Category,Focus,Value Chain Layer,Modality,...,Active?,Founded,HQ,Logo,Founders,Last Round,Valuation,Business Model,Open Source?,Unnamed: 21
0,10Web,4000000.0,"Sierra Ventures, AI Fund",,https://10web.io/,"10Web provides automated WordPress hosting, w...",Code,Website Generation,Applications (Horizontal),,...,,2017.0,Other US,,,,,,,
1,Abridge,27500000.0,"Bessemer Venture Partners, Union Square Ventur...",,https://www.abridge.com/,Abridge\n Abridge assists individuals in unde...,Summarization,Healthcare conversation documentation,Applications (Vertical),,...,,2018.0,Other US,,,,,,,
2,ABtesting.ai,,,,https://abtesting.ai/,ABtesting.ai provides AI-powered A/B testing ...,Text,Marketing & A/B Testing,Applications (Horizontal),,...,,2019.0,Europe,,,,,,,
3,Accomplice,520000.0,TinySeed,,https://accomplice.ai,Accomplice is an AI tool that helps in automa...,Image,AI-generated stock photos,Applications (Horizontal),,...,,2021.0,Other US,,,,,,,
4,Ada,190620620.0,"Creative Destruction Lab (CDL), Tiger Global M...",,https://www.ada.cx/,Ada is a company that provides an AI-powered ...,Chatbot/Conversational AI,Automated Virtual Agents,Applications (Horizontal),,...,,2016.0,North America (excl. US),,,,,,,


In [28]:
companies_df[companies_df["Value Chain Layer"].isna()]["Company"].count()

0

In [29]:
# Save companies_df in a csv file
companies_df.to_csv(COMPANIES_LIST_FOLDER + "companies.csv", index=False)