In [1]:
import pandas as pd
import numpy as np
from llms import gemini
from llms import chatGPT
import re
import time
from lemmatize import lemmatize
x_chat = chatGPT()
x_gemini = gemini()

In [2]:
df = pd.read_csv("../output/data/data_post_chat_gpt.csv")
df = df[df["headquarters location"].str.contains("United States")]
df = df[pd.to_datetime(df['founded date'])>='11/30/2022']

In [3]:
len(df)

2196

In [4]:
generated_description_prompt = """
Your role is to describe $company ($website)'s product. 
Then, provide a confidence interval on scale on 1-10 on how sure you are about the response. Please be reasonable.

PLEASE FOLLOW THE FORMAT EXACTLY FROM THE EXAMPLES!!!!

EXAMPLES BELOW
************************
Company: Genmo
Website: https://www.genmo.ai/
Description: Genmo is a free tool that creates videos and images using artificial intelligence. Genmo to generate creative text formats of text content, like poems, code, scripts, musical pieces, email, letters, etc.

Company: OnePane
Website: https://www.onepane.ai/
Description: Onepane is a company that offers an AI companion for enhanced DevOps & SRE efficiency. Onepane offers a GenAI solution  providing unparalleled unified insights and control over your Cloud resources. Onepane helps with root cause analysis, cloud governance, and optimization strategies.
************************

YOUR TURN:
Company: $company
Wesbite: $website
Current Description: $description
________________________
Description (two sentences):
Confidence Interval:
Reasoning:
________________________

"""



In [5]:
parsed_description_prompt = """
Your role is to describe what jobs/tasks, industries, and customer that $company is targeting. Then, provide a confidence interval (1-10) on scale on 1-10 on how sure you are about the response. Please be reasonable.

A response should include:
- tasks/jobs being automated
- the industry that the startup applies to
- specific customers using the tool. DO NOT INCLUDE AN EXPLANATION

PLEASE FOLLOW THE FORMAT EXACTLY FROM THE EXAMPLES!!!!

EXAMPLES BELOW
************************
Company: Petville
Website: petville.co/pricing/biz
Description: Petville Global is a B2B CRM SaaS platform that utilizes advanced technologies like AI/ML and neural net to streamline and expand operations for pet businesses and veterinary clinics both locally and globally. The platform offers deep data analytics and marketing tools, helping businesses save an average of 22% on CRM and vet tech costs.
Tasks/Jobs: Data analysis, Marketing automation, Appointment scheduling, Inventory management
Industry: Customer management
Customers: Vetinarians, Pet Businesses

Company: Thunder
Wesbite: thundercompute.com
Description: Thunder is a decentralized, peer-to-peer cloud computing platform designed to democratize access to GPUs and address the persistent GPU shortage. It provides a solution for individuals and businesses seeking high-performance computing power, enabling them to leverage unused GPU resources from around the world.
Tasks/Jobs: GPU resource allocation, Access to unused GPUs, Distributed computing tasks 
Industry: Cloud Computing
Customers: Developers, GPU Owners

Company: NonprofitsHQ
Websit: www.nonprofitshq.com
Description: NonprofitsHQ is a software suite designed for nonprofits that utilizes AI to automate tasks, manage operations, and improve efficiency, ultimately saving organizations time and resources.
Tasks/Jobs: Fundraising management, Donor relationship management, Grant writing, Event planning
Industry: Non-profit management
Customers: Non-profit organizations
************************

YOUR TURN:
________________________
Company: $company
Wesbite: $website
Description: $description
________________________
Tasks/Jobs (comma separated list of 4, short):
Industry (1 item):
Customers (comma separated list): 
________________________
Confidence Interval:
Reasoning:
________________________
"""


In [6]:
examples_prompt = """
Your role is to provide 3 two-sentence examples of how the product from $company might be used. Do not mention the name of the company in the examples, and keep the descriptions broad.

Each example should include:
- A detailed description of the job that the tool automates and who performs that job and when.
- The ONET job being automated (preferably one from the database https://www.onetonline.org/) and the ONET task that the tool replaces (preferably one that from https://www.onetcenter.org/dictionary/20.1/excel/task_statements.html, include task id)
- A confidence interval (1-10) indicating how sure you are about the accuracy of your response.

PLEASE FOLLOW THE FORMAT EXACTLY FROM THE EXAMPLES!!!!

The goal is to map each example back to ONET jobs. If the job being automated is recognized by ONET, please use the ONET job title. 
If it is not typically found in ONET, use best judgement!

EXAMPLES BELOW: 
************************
Company: Blanc
Website: tryblanc.ai
Blanc is a compliance automation platform designed for fintech companies. It helps streamline regulatory compliance processes by providing a centralized hub for managing policies, monitoring activities, and generating reports.
_________________________
Example 1: A compliance officer at a fintech company uses Blanc to automate the process of creating and updating compliance policies, ensuring all documents are current and accessible to relevant team members.
ONET JOB automated 1: Compliance Officers that verify that all firm and regulatory policies and procedures have been documented, implemented, and communicated.
ONET JOB 1: Compliance Officers
_________________________
Example 2: A fintech company uses Blanc to generate automated compliance reports for regulatory audits, ensuring all necessary documentation is readily available and organized.
ONET JOB automated 2: Compliance Officers Prepare reports of activities, evaluations, recommendations, or decisions.
ONET JOB 2: Compliance Officers
_________________________
Example 3: A fintech company uses Blanc to monitor real-time transactions for potential compliance violations, triggering alerts and generating reports for further investigation.
ONET JOB automated 3: Compliance Officers that identify compliance issues that require follow-up or investigation.
ONET JOB 3: Compliance Officers



Company: Aether
Website: aetherenergie.com/
Aether Energy is an AI-driven platform designed to simplify the process of rooftop solar installation for businesses, providing comprehensive support from project planning and financing to installation and ongoing maintenance. This platform aims to streamline and optimize the entire solar energy journey for installers. 
________________
Example 1: A solar installer uses Aether to quickly create detailed project plans for rooftop solar installations, including system size, panel placement, and wiring diagrams. 
ONET JOB automated 1: Solar Photovoltaic Installers that diagram layouts and locations for photovoltaic (PV) arrays and equipment, including existing building or site features.
ONET JOB 1: Solar Photovoltaic Installers
________________
Example 2: A business owner leverages Aether to secure financing for their rooftop solar project, providing them with customized loan options and streamlined application processes. 
ONET JOB automated 2: Solar Photovoltaic Installers that prepare solar installation project proposals, quotes, budgets, or schedules.
ONET JOB 2: Solar Photovoltaic Installers
________________
Example 3: A solar installer uses Aether to manage the installation process, tracking materials, scheduling technicians, and coordinating with subcontractors, ensuring smooth project execution. 
ONET JOB automated 3:  Solar Engery Installation Managers that monitor work of contractors and subcontractors to ensure projects conform to plans, specifications, schedules, or budgets.
ONET JOB 3: Solar Energy Installation Managers
________________
************************

YOUR TURN:
Company:$company
Website: $website
Current Description: $description
$parsed_description
________________
Example 1: 
ONET JOB automated 1:
ONET JOB 1: 
Confidence Interval 1:
Reasoning 1:
________________
Example 2: 
ONET JOB automated 2:
ONET JOB 2: 
Confidence Interval 2:
Reasoning 2:
________________
Example 3:
ONET JOB automated 3:
ONET JOB 3: 
Confidence Interval 3:
Reasoning 3:
________________
"""


In [18]:
class prompting():
    def __init__(self):
        pass
    def set_current_results_df(self,results_df):
        self.results_df = results_df
        
    def iterate(self, df, prompt_template, args, value, start=0, end=False):
        if end == False:
            end = len(df)
            
        if start == 0:
            self.results_df = pd.DataFrame(columns=["organization name",value])
        
        if value in list(df.columns): 
            if start != 0:
                self.results_df = pd.concat([df[["organization name",value]].iloc[:start],self.results_df],axis=0)
            df = df.drop(columns=[value])
        
        
        for i, row in list(df.iterrows())[start:end]:
            failure_count = 0
            prompt = prompt_template
            while True:
                try:
                    name = row['organization name']
                    website = row['website']
                    print(f"******************************\nProcessing {i}: {name}, {website}")
                    if "generated_description" in df.columns:
                        print(row["generated_description"])

                    for arg in args: 
                        prompt = prompt.replace(f"${arg[0]}", row[arg[1]])
                    result = x_gemini.ask(prompt)
                    if result == "N/A": break #explicit material
 
                    text = re.sub(r"#|#\s+|_|\*", "", result).strip()
                    print(f"Result:\n{text}\n")
   
                    self.results_df.loc[i] = [name, result]

                    if i % 5 == 0:
                        self.results_df.to_csv("../output/current_results_df_prompting.csv")
                    break 
                
                except Exception as e:
                    print(failure_count)
                    failure_count += 1
                    if failure_count > 10: 
                        break
                    print(f"Error processing {i}, {row['organization name']}: {e}")
                    time.sleep(20)  
               

            
        df = df.merge(self.results_df, on='organization name', how='left')
        return df

prompting_class = prompting()


In [None]:
cols = ["generated_description", "generated_description_conf_interval", "generated_description_conf_interval_reasoning"]

args = [["company","organization name"],["website","website"], ["description","description_all"]]

df = prompting_class.iterate(df, generated_description_prompt, args, "generated_description_llm")

df.to_csv('../output/df_with_generated_description.csv', index=False)

In [None]:
def extract_data(text):
    if pd.isnull(text): return {}
    text = text.replace("\n"," ").replace("  "," ").replace("*","").replace(" (two sentences)","").replace("/10","").replace("_","").replace("#","")
    pattern = r"^.*?\s*Description:?\s*(.*)Confidence Interval:\s*(\d+)\s*Reasoning:\s*(.*)$"
    match = re.search(pattern, text, re.DOTALL)
    
    if match:
        description = match.group(1).strip()
        confidence_interval = match.group(2).strip()
        reasoning = match.group(3).strip()
        
        result = {
            "generated_description": description,
            "generated_description_conf_interval": confidence_interval,
            "generated_description_conf_interval_reasoning": reasoning
        }
        return result
        
    else:
        print("FAILURE TO MATCH")
        return {}
        
for col in cols:
    if col in list(df.columns): df=df.drop(columns=[col])
        
results_df = pd.DataFrame(list(df.apply(lambda x: extract_data(x["generated_description_llm"]),axis=1)))
df = pd.concat([df, results_df],axis=1)

In [None]:
len(df)

In [None]:
# Restart if cut in the middle of run!
# df = df.merge(prompting_class.results_df, on='organization name', how='left')

In [45]:
df = df.dropna(subset=["generated_description"],axis=0)
df = df.drop_duplicates(subset=['organization name'], keep='first')
df = df.reset_index(drop=True)

In [72]:
cols = ["parsed_description", "parsed_description_conf_interval", "parsed_description_conf_interval_reasoning", "Tasks/Jobs","Industry","Customers"]
    
args = [["company","organization name"],["website","website"], ["description","generated_description"]]

df = prompting_class.iterate(df, parsed_description_prompt, args, "parsed_description_llm",1778)

df.to_csv('../output/df_with_parsed_description.csv', index=False)

******************************
Processing 1778: Abide AI, abideai.com/
Abide AI is a platform that simplifies the development, deployment, and evaluation of machine learning (ML) systems. They focus on ensuring that ML models can be smoothly integrated into technology ecosystems while prioritizing robustness, reliability, and scalability.
Result:
Tasks/Jobs: Model training, Model deployment, Model monitoring, Model evaluation
Industry: Machine Learning
Customers: Data Scientists, Machine Learning Engineers

Confidence Interval: 8
Reasoning: The website focuses on ML development and deployment, which suggests the target audience is directly involved in these tasks. The language used in their description implies targeting individuals within the ML field.

******************************
Processing 1779: Plexicus, www.plexicus.com/
Plexicus is an AI-powered vulnerability management platform that helps organizations classify their assets, prioritize vulnerabilities, and automate remediation

In [78]:
def extract_data(text):
    if pd.isnull(text): return {}
    text = re.sub(r'\s+', ' ', text)  
    text = text.replace("_", "").replace("*", "").replace("#", "")
    text = re.sub(r'\s?\([^)]*\)', '', text)
    pattern = r".*?Tasks/Jobs:\s*(.*?)\s*Industry:\s*(.*?)\s*Customers:\s*(.*?)\s*Confidence Interval:\s*(.*?)\s*Reasoning:\s*(.*)"
    match = re.search(pattern, text, re.DOTALL)
    
    if match:
        tasks_jobs = match.group(1).strip()
        industry = match.group(2).strip()
        customers = match.group(3).strip()
        confidence_interval = match.group(4).strip()
        reasoning = match.group(5).strip()
        
        result = {
            "parsed_description": 
            "Tasks/Jobs: " + tasks_jobs + "\n" + "Industry: " + industry + "\n" + "Customers: "+ customers,
            "Tasks/Jobs": tasks_jobs,
            "Industry": industry,
            "Customers": customers,
            "parsed_description_conf_interval": confidence_interval,
            "parsed_description_conf_interval_reasoning": reasoning
        }
        
        return result
    else:
        print(text)
        print("FAILURE TO MATCH")
        return {}
        
for col in cols:
    if col in list(df.columns): df=df.drop(columns=[col])
        
results_df = pd.DataFrame(list(df.apply(lambda x: extract_data(x["parsed_description_llm"]),axis=1)))
df = pd.concat([df, results_df],axis=1)


In [87]:
df = df.dropna(subset=["parsed_description"],axis=0)
df = df.drop_duplicates(subset=['organization name'], keep='first')
df = df.reset_index(drop=True)

In [89]:
df.to_csv('../output/df_with_parsed_description.csv', index=False)

In [24]:
cols = ["situation1", "situation1_conf_interval", "situation1_conf_interval_reasoning", "situation2", "situation2_conf_interval", "situation2_conf_interval_reasoning","situation3", "situation3_conf_interval", "situation3_conf_interval_reasoning",'Example1','Job1','Job1_title','Example2','Job2','Job2_title','Example3','Job3','Job3_title']

args = [["company","organization name"],["website","website"], ["description","generated_description"], ["parsed_description","parsed_description"]]
df = prompting_class.iterate(df, examples_prompt, args, "examples_llm",1060)

df.to_csv('../output/df_with_examples.csv', index=False)

******************************
Processing 1060: OpsBeacon, opsbeacon.com
OpsBeacon is a no-code SaaS platform designed to streamline and automate IT and cloud operations. It utilizes AI to orchestrate systems, automate tasks, and manage incident responses, enabling organizations to enhance productivity and security.
Result:
OpsBeacon Example Scenarios:

Example 1: A DevOps engineer uses OpsBeacon to automatically deploy new code updates to a cloud-based application, ensuring consistent deployment processes and minimizing downtime. 

ONET JOB automated 1:  Software Developers who deploy software or program code to test or production environments (ONET 15-1199.00, Task ID 22-0107.01)
ONET JOB 1:  Software Developers
Confidence Interval 1: 9
Reasoning 1:  The example directly relates to a core function of software developers, which OpsBeacon automates through its deployment orchestration capabilities.

Example 2: An IT technician leverages OpsBeacon to automate routine system maintenance 

In [25]:
df.head()

Unnamed: 0,organization name,full_description,industries,headquarters location,founded date,description,CB rank,headquarters region,postal code,estimated revenue,...,generated_description_conf_interval,generated_description_conf_interval_reasoning,parsed_description_llm,parsed_description,Tasks/Jobs,Industry,Customers,parsed_description_conf_interval,parsed_description_conf_interval_reasoning,examples_llm
0,Pika,Pika is a startup that is developing an AI-pow...,"Artificial Intelligence (AI), Generative AI, G...","Palo Alto, California, United States",2023-01-01,Pika is a startup that develops an AI-powered ...,71,"San Francisco Bay Area, Silicon Valley, West C...",94301,—,...,8,While Pika's website doesn't go into deep deta...,________________________\nTasks/Jobs: Video cr...,"Tasks/Jobs: Video creation, Video editing, Ima...","Video creation, Video editing, Image animation...",Video Production,"Marketers, Content creators, Social media mana...",9,Pika's website and description clearly state i...,## Pika Examples:\n\n**Example 1:** A social m...
1,Pika,Pika is a startup that is developing an AI-pow...,"Artificial Intelligence (AI), Generative AI, G...","Palo Alto, California, United States",2023-01-01,Pika is a startup that develops an AI-powered ...,71,"San Francisco Bay Area, Silicon Valley, West C...",94301,—,...,8,While Pika's website doesn't go into deep deta...,________________________\nTasks/Jobs: Video cr...,"Tasks/Jobs: Video creation, Video editing, Ima...","Video creation, Video editing, Image animation...",Video Production,"Marketers, Content creators, Social media mana...",9,Pika's website and description clearly state i...,## Pika Examples:\n\n**Example 1:** A social m...
2,Contextual AI,Contextual AI offers a pioneering approach to ...,"Artificial Intelligence (AI), Generative AI, S...","Mountain View, California, United States",2023-01-01,Contextual AI offers a pioneering approach to ...,217,"San Francisco Bay Area, Silicon Valley, West C...",94040,—,...,6,While the website does state Contextual AI foc...,________________________\nTasks/Jobs: Content ...,"Tasks/Jobs: Content generation, Workflow autom...","Content generation, Workflow automation, Data ...",Business Automation,"Businesses, Enterprises",9,The website and description clearly indicate a...,## Contextual AI Example Uses\n\nCompany: Cont...
3,Contextual AI,Contextual AI offers a pioneering approach to ...,"Artificial Intelligence (AI), Generative AI, S...","Mountain View, California, United States",2023-01-01,Contextual AI offers a pioneering approach to ...,217,"San Francisco Bay Area, Silicon Valley, West C...",94040,—,...,6,While the website does state Contextual AI foc...,________________________\nTasks/Jobs: Content ...,"Tasks/Jobs: Content generation, Workflow autom...","Content generation, Workflow automation, Data ...",Business Automation,"Businesses, Enterprises",9,The website and description clearly indicate a...,## Contextual AI Example Uses\n\nCompany: Cont...
4,Sierra,Sierra is an AI startup that tackles essential...,"Artificial Intelligence (AI), Enterprise Softw...","San Francisco, California, United States",2023-01-01,Sierra is an AI startup that tackles essential...,273,"San Francisco Bay Area, West Coast, Western US",—,—,...,7,The information provided on their website is l...,________________________\nTasks/Jobs: Customer...,"Tasks/Jobs: Customer service, Issue resolution...","Customer service, Issue resolution, Lead gener...",Customer service,"Businesses, Enterprises",9,The description explicitly states Sierra's AI ...,## Sierra AI Example Use Cases\n\nCompany: Sie...


In [31]:

def extract_data(text):
    if pd.isnull(text): return {}
    # Normalize the text to ensure consistent whitespace and remove unwanted characters.
    text = re.sub(r'\s+', ' ', text).strip()
    text = text.replace("*", "").replace("/10", "").replace("_", "").replace("#", "")
    vals = {}
    i = 1
    while i <=3:
        # Regex pattern adjusted to handle fractions in confidence intervals like '9/10'
        pattern = rf"Example\s+{i}:\s+(.*?)\s+ONET JOB automated\s+{i}:\s+(.*?)\s+ONET JOB\s+{i}:\s+(.*?)\s+Confidence Interval\s+{i}:\s+(\d+(?:/\d+)?)\s+Reasoning\s+{i}:\s+(.*?)(?=\s*Example\s+{i + 1}:|$)"
        match = re.search(pattern, text, re.DOTALL)
        if not match:
            print(text+"\n\n")
            print(f"No matches found for Example {i}")  # Debug if no examples are found
            return {}
            break

        example_text, onet_job_automated, onet_job, confidence_interval, reasoning = match.groups()
        example_key = f"Example{i}"
        vals[example_key] = example_text.strip()
        vals[f"Job{i}"] = onet_job_automated.strip()
        vals[f"Job{i}_title"] = onet_job.strip()
        vals[f"situation{i}_conf_interval"] = confidence_interval.strip()
        vals[f"situation{i}_conf_interval_reasoning"] = reasoning.strip()

        i += 1  # Prepare to search for the next example
    return vals

for col in cols:
    if col in list(df.columns): df=df.drop(columns=[col])

results_df = pd.DataFrame(list(df.apply(lambda x: extract_data(x["examples_llm"]),axis=1)))
df = pd.concat([df, results_df],axis=1)


 Brzzy Examples Company: Brzzy Website: brzzy.co Current Description: Brzzy is a weather app that utilizes GIFs, music, and quirky forecasts to make the weather engaging and entertaining. Their goal is to transform how people interact with weather information by making it more fun and interactive. Tasks/Jobs: Weather information presentation, GIF creation, Music selection, Forecast personalization Industry: Weather information Customers: Individuals, Casual users, Social media users  Example 1: A social media manager uses Brzzy to find the perfect GIF to illustrate the weather conditions in their region, adding a touch of humor and engaging their audience. ONET JOB automated 1: Social media specialists who select and post digital content to promote a company's brand. ONET JOB 1: Social Media Specialists Confidence Interval 1: 7 Reasoning 1: While the ONET task is broad, it reflects the core action of selecting media to share, which aligns with Brzzy's focus on GIF-based weather present

In [35]:
l = pd.read_csv('../output/df_with_examples.csv',index_col=0)
len(l)

3251

Unnamed: 0_level_0,full_description,industries,headquarters location,founded date,description,CB rank,headquarters region,postal code,estimated revenue,operating status,...,generated_description_conf_interval,generated_description_conf_interval_reasoning,parsed_description_llm,parsed_description,Tasks/Jobs,Industry,Customers,parsed_description_conf_interval,parsed_description_conf_interval_reasoning,examples_llm
organization name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Pika,Pika is a startup that is developing an AI-pow...,"Artificial Intelligence (AI), Generative AI, G...","Palo Alto, California, United States",2023-01-01,Pika is a startup that develops an AI-powered ...,71,"San Francisco Bay Area, Silicon Valley, West C...",94301,—,Active,...,8,While Pika's website doesn't go into deep deta...,________________________\nTasks/Jobs: Video cr...,"Tasks/Jobs: Video creation, Video editing, Ima...","Video creation, Video editing, Image animation...",Video Production,"Marketers, Content creators, Social media mana...",9,Pika's website and description clearly state i...,## Pika Examples:\n\n**Example 1:** A social m...
Pika,Pika is a startup that is developing an AI-pow...,"Artificial Intelligence (AI), Generative AI, G...","Palo Alto, California, United States",2023-01-01,Pika is a startup that develops an AI-powered ...,71,"San Francisco Bay Area, Silicon Valley, West C...",94301,—,Active,...,8,While Pika's website doesn't go into deep deta...,________________________\nTasks/Jobs: Video cr...,"Tasks/Jobs: Video creation, Video editing, Ima...","Video creation, Video editing, Image animation...",Video Production,"Marketers, Content creators, Social media mana...",9,Pika's website and description clearly state i...,## Pika Examples:\n\n**Example 1:** A social m...
Contextual AI,Contextual AI offers a pioneering approach to ...,"Artificial Intelligence (AI), Generative AI, S...","Mountain View, California, United States",2023-01-01,Contextual AI offers a pioneering approach to ...,217,"San Francisco Bay Area, Silicon Valley, West C...",94040,—,Active,...,6,While the website does state Contextual AI foc...,________________________\nTasks/Jobs: Content ...,"Tasks/Jobs: Content generation, Workflow autom...","Content generation, Workflow automation, Data ...",Business Automation,"Businesses, Enterprises",9,The website and description clearly indicate a...,## Contextual AI Example Uses\n\nCompany: Cont...
Contextual AI,Contextual AI offers a pioneering approach to ...,"Artificial Intelligence (AI), Generative AI, S...","Mountain View, California, United States",2023-01-01,Contextual AI offers a pioneering approach to ...,217,"San Francisco Bay Area, Silicon Valley, West C...",94040,—,Active,...,6,While the website does state Contextual AI foc...,________________________\nTasks/Jobs: Content ...,"Tasks/Jobs: Content generation, Workflow autom...","Content generation, Workflow automation, Data ...",Business Automation,"Businesses, Enterprises",9,The website and description clearly indicate a...,## Contextual AI Example Uses\n\nCompany: Cont...
Sierra,Sierra is an AI startup that tackles essential...,"Artificial Intelligence (AI), Enterprise Softw...","San Francisco, California, United States",2023-01-01,Sierra is an AI startup that tackles essential...,273,"San Francisco Bay Area, West Coast, Western US",—,—,Active,...,7,The information provided on their website is l...,________________________\nTasks/Jobs: Customer...,"Tasks/Jobs: Customer service, Issue resolution...","Customer service, Issue resolution, Lead gener...",Customer service,"Businesses, Enterprises",9,The description explicitly states Sierra's AI ...,## Sierra AI Example Use Cases\n\nCompany: Sie...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Solatis,Solatis is an innovative AI technology tool de...,"Artificial Intelligence (AI), Information Tech...","New York, New York, United States",2024-06-05,Solatis is an AI technology workforce tool tha...,205522,"Greater New York Area, East Coast, Northeaster...",—,—,Active,...,8,The provided text gives a good overview of Sol...,________________________\nCompany: Solatis\nWe...,"Tasks/Jobs: Task automation, Workflow optimiza...","Task automation, Workflow optimization, Data a...",Business process automation,"Businesses, Customer support teams, Sales team...",8,The website and description clearly indicate S...,## Solatis Examples\n\n**Company: Solatis**\n*...
Degrees of Interest,Harnessing the power of AI to help law enforce...,"Artificial Intelligence (AI), Law Enforcement","Arlington, Virginia, United States",2024-06-06,Visual analytics AI for criminal investigations.,—,"Washington DC Metro Area, East Coast, Southern US",22201,—,Active,...,8,The website clearly states the purpose of the ...,________________________\nTasks/Jobs: Data ana...,"Tasks/Jobs: Data analysis, Pattern recognition...","Data analysis, Pattern recognition, Suspect id...",Law Enforcement,"Police detectives, Investigators, Forensic ana...",9,The company's website explicitly describes its...,## Example 1: \n\nA police detective uses Degr...
AI Compliance,Many potential users of AI are held back by th...,"Artificial Intelligence (AI), Internet, Legal ...","Newark, Delaware, United States",2024-06-06,Online Audit tools for AI compliance regulations,—,"Greater Philadelphia Area, East Coast, Souther...",19713,—,Active,...,8,The provided description clearly outlines the ...,________________________\nCompany: AI Complian...,"Tasks/Jobs: Compliance auditing, Risk assessme...","Compliance auditing, Risk assessment, AI regul...",AI ethics and governance,"AI developers, Tech companies, Government agen...",9/10,The company's description clearly focuses on c...,## AI Compliance Examples\n\n**Company: AI Com...
Robozaps,Robozaps is a marketplace to buy and sell huma...,"Artificial Intelligence (AI), Marketplace, Rob...","San Francisco, California, United States",2024-06-16,The world's #1 marketplace for humanoid robots,—,"San Francisco Bay Area, West Coast, Western US",94103,—,Active,...,9,The website clearly states that Robozaps is a ...,________________________\nCompany: Robozaps\nW...,"Tasks/Jobs: Robot search and filtering, Purcha...","Robot search and filtering, Purchase and sale ...",Robotics marketplace,"Businesses, Researchers, Individuals",9,The company's website and description clearly ...,## Robozaps Examples\n\n________________\n**Ex...


In [37]:
df = df.drop_duplicates(subset=['organization name'], keep='first')
df = df.reset_index(drop=True)


In [41]:
len(df)

2188

In [40]:
df = df.dropna(subset=["Example1"],axis=0)
df = df.drop_duplicates(subset=['organization name'], keep='first')
df = df.reset_index(drop=True)

In [42]:
df.to_csv('../output/df_with_examples.csv', index=False)

In [20]:
#RERUN STOPPED IN MIDDLE
# results_df = pd.read_csv("result.csv",index_col=0)
# df = pd.read_csv('../output/df_with_parsed_description.csv',index_col=0)
# prompting_class.set_current_results_df(results_df)
# df = df.merge(results_df, on='organization name', how='left')

In [16]:
prompting_class.results_df

Unnamed: 0,organization name,parsed_description_llm
0,Pika,________________________\nTasks/Jobs: Video cr...
1,Contextual AI,________________________\nTasks/Jobs: Content ...
2,Sierra,________________________\nTasks/Jobs: Customer...
3,Essential AI,________________________\nTasks/Jobs: Content ...
4,Liquid AI,________________________\nTasks/Jobs: Content ...
...,...,...
2188,Solatis,________________________\nCompany: Solatis\nWe...
2189,Degrees of Interest,________________________\nTasks/Jobs: Data ana...
2190,AI Compliance,________________________\nCompany: AI Complian...
2191,Robozaps,________________________\nCompany: Robozaps\nW...
