In [2]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os
from os import getenv
from openai import OpenAI
import pathlib
import textwrap
import google.generativeai as genai
import time
import requests
import ast
import json
from sklearn.metrics.pairwise import cosine_similarity
import re


pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.precision', 2)

load_dotenv("../../.env",override=True)
GOOGLE_API_KEY = getenv("GEMINI_API_KEY")
OPENAI_API_KEY = getenv("OPENAI_API_KEY")


In [2]:

genai.configure(api_key=GOOGLE_API_KEY)
class gemini():
    def __init__(self):
        self.model = genai.GenerativeModel('gemini-1.5-flash')
    def request(self,prompt):
        url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent'
        headers = {
            'Content-Type': 'application/json',
        }
        data = {
            "contents": [
                {
                    "parts": [
                        {
                            "text": prompt
                        }
                    ]
                }
            ]
        }
        params = {
            'key': GOOGLE_API_KEY
        }
        
        response = requests.post(url, headers=headers, json=data, params=params)
        return json.loads(response.text)

    def ask(self,prompt):
        #response = self.model.generate_content(prompt)
        response = self.request(prompt)
        if response["candidates"][0]["finishReason"] == 'SAFETY': return "N/A"
        response = response["candidates"][0]["content"]["parts"][0]["text"]
        return response

x_gemini = gemini()
x_gemini.ask("hi")



'Hi there! How can I help you today? \n'

In [3]:

class chatGPT():
    def __init__(self):
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        
    # def ask(self, q):
    #     stream = self.client.chat.completions.create(
    #         model="gpt-4",
    #         messages=[{"role": "user", "content": q}],
    #         stream=True,
    #         temperature=0
    #     )
    #     response = ""
    #     for chunk in stream:
    #         if chunk.choices[0].delta.content is not None:
    #             response += chunk.choices[0].delta.content

    #     self.response = response
    #     return response

    def get_embedding(self,text, model="text-embedding-3-large"):
       text = text.replace("\n", " ")
       return self.client.embeddings.create(input = [text], model=model).data[0].embedding

x_chat = chatGPT()


In [4]:
codes = pd.read_csv("../output/soc_codes.csv",index_col=0)
codes.head()

Unnamed: 0,Major Group,Minor Group,Broad Group,Detailed Occupation,Title,Major Group Name,Minor Group Name,Broad Group Name
3,11-0000,11-1000,11-1010,11-1011,Chief Executives,Management Occupations,Top Executives,Chief Executives
5,11-0000,11-1000,11-1020,11-1021,General and Operations Managers,Management Occupations,Top Executives,General and Operations Managers
7,11-0000,11-1000,11-1030,11-1031,Legislators,Management Occupations,Top Executives,Legislators
10,11-0000,11-2000,11-2010,11-2011,Advertising and Promotions Managers,Management Occupations,"Advertising, Marketing, Promotions, Public Rel...",Advertising and Promotions Managers
12,11-0000,11-2000,11-2020,11-2021,Marketing Managers,Management Occupations,"Advertising, Marketing, Promotions, Public Rel...",Marketing and Sales Managers


In [20]:
onet_df = pd.read_csv("../output/onet_df.csv")
onet_df = onet_df[(onet_df.task_similarity > .2) & (onet_df.job_title_similarity > .35)]
len(onet_df)

5236

In [11]:
for x, row in onet_df.iterrows():
    print("\n\n")
    for col in row.index:
        print(row[col])




Advoria
A lawyer uses Advoria to set up an automated booking system, allowing clients to schedule consultations at any time, day or night. This frees up the lawyer's time to focus on client work.
Legal Secretary schedules client appointments.
Legal Secretaries and Administrative Assistants
Schedule and make appointments.
Legal Secretary
0.7280972380294118
0.5293727830875479



HireGuardian
An HR manager uses the platform to conduct exit interviews with departing employees, gathering valuable insights into reasons for leaving and areas for improvement within the company. This feedback helps inform future hiring and retention strategies.
An HR manager conducts manual exit interviews and analyzes responses to identify trends.
Human Resources Managers
Conduct exit interviews to identify reasons for employee termination.
HR manager
0.747127197286384
0.4778633243299524



Aether
A solar installer uses Aether to manage the installation process, tracking materials, scheduling technicians, a

In [22]:
onet_df[onet_df["organization_name"]=="Aether"].values

array([['Aether',
        'A solar installer uses Aether to quickly create detailed project plans for rooftop solar installations, including system size, panel placement, and wiring diagrams.',
        'A solar installer has to plan, design, and visualize the installation of a rooftop solar panel.',
        'Solar Photovoltaic Installers',
        'Diagram layouts and locations for photovoltaic (PV) arrays and equipment, including existing building or site features.',
        'solar installer', 0.7181484600263808, 0.6428405301690214],
       ['Aether',
        'A solar installer uses Aether to quickly create detailed project plans for rooftop solar installations, including system size, panel placement, and wiring diagrams.',
        'A solar installer has to plan, design, and visualize the installation of a rooftop solar panel.',
        'Solar Photovoltaic Installers',
        'Determine photovoltaic (PV) system designs or configurations based on factors such as customer needs, expect

In [17]:
y = sample_df[sample_df["organization name"]=="Aether"]
for x, row in y.iterrows():
    print("\n\n")
    for col in row.index:
        print(row[col])




20392
Aether
1-10
2023-07-01
—
Artificial Intelligence (AI), Renewable Energy, SaaS, Solar
San Francisco, California, United States
Aether Energy is an AI-powered end-to-end platform for rooftop solar installers
—
—
San Francisco Bay Area, West Coast, Western US
aetherenergie.com/
—
View on LinkedIn
—
support@aetherenergy.me
—
—
—
Artificial Intelligence (AI), Data and Analytics, Energy, Natural Resources, Science and Engineering, Software, Sustainability
$3,000,000
For Profit
—
— Aether Energy is an AI-powered end-to-end platform for rooftop solar installers
['Renewable Energy', 'SaaS', 'Solar']
Aether Energy is an AI-driven platform designed to simplify the process of rooftop solar installation for businesses, providing comprehensive support from project planning and financing to installation and ongoing maintenance. This platform aims to streamline and optimize the entire solar energy journey for installers. 

Aether

Tasks/Jobs: Project Planning, Financing, Installation, Mainten

In [62]:
onet_weights = onet_df.groupby("organization_name")["onet_title"].count().apply(lambda x: 1/x).reset_index().rename({"onet_title":"onet_weight"},axis=1)
onet_df = onet_df.merge(onet_weights, on="organization_name")
onet_titles = onet_df.groupby("onet_title")["onet_weight"].sum().round(2).sort_values(ascending=False).reset_index()

onet_titles.head(100)


Unnamed: 0,onet_title,onet_weight
0,Search Marketing Strategists,46.41
1,Data Scientists,35.36
2,Market Research Analysts and Marketing Special...,30.95
3,Customer Service Representatives,29.47
4,Financial and Investment Analysts,25.2
5,Graphic Designers,18.49
6,Film and Video Editors,16.77
7,Writers and Authors,16.0
8,Video Game Designers,15.92
9,Investment Fund Managers,14.53


In [63]:
example_job_titles_df = onet_df.example_job_title.value_counts().apply(lambda x: x/3).reset_index()
example_job_titles_df.head(100)

Unnamed: 0,example_job_title,count
0,Data analyst,63.0
1,customer service representative,34.33
2,Financial analyst,34.0
3,travel agent,24.67
4,data scientist,23.33
5,marketing analyst,21.0
6,sales representative,21.0
7,marketing specialist,20.33
8,graphic designer,20.0
9,data analyst,19.67


In [64]:
top_100_tasks = onet_df['onet_task'].value_counts().reset_index().head(100)
top_100_tasks.columns = ['Task', 'Frequency']
for index, row in top_100_tasks.iterrows():
    print(f"Task = {row['Task']}, Frequency = {row['Frequency']}\n")


Task = Collect and analyze data on customer demographics, preferences, needs, and buying habits to identify potential markets and factors affecting product demand., Frequency = 71

Task = Forecast and track marketing and sales trends, analyzing collected data., Frequency = 62

Task = Inform investment decisions by analyzing financial information to forecast business, industry, or economic conditions., Frequency = 39

Task = Create content strategies for digital media., Frequency = 39

Task = Interpret data on price, yield, stability, future investment-risk trends, economic influences, and other factors affecting investment programs., Frequency = 38

Task = Confer with customers by telephone or in person to provide information about products or services, take or enter orders, cancel accounts, or obtain details of complaints., Frequency = 37

Task = Execute or manage social media campaigns to inform search marketing tactics., Frequency = 34

Task = Keep records of customer interactions o

In [65]:
top_100_tasks

Unnamed: 0,Task,Frequency
0,Collect and analyze data on customer demograph...,71
1,"Forecast and track marketing and sales trends,...",62
2,Inform investment decisions by analyzing finan...,39
3,Create content strategies for digital media.,39
4,"Interpret data on price, yield, stability, fut...",38
5,Confer with customers by telephone or in perso...,37
6,Execute or manage social media campaigns to in...,34
7,Keep records of customer interactions or trans...,33
8,"Conduct online marketing initiatives, such as ...",30
9,Refer unresolved customer grievances to design...,27


In [14]:
sample_df = pd.read_csv("../output/df_with_onet.csv")

In [22]:
sample_df["actively hiring"].unique()

array(['—'], dtype=object)

In [69]:
cols = ['organization name', 'num employees', 'founded date','website','description_all', 'industries_parsed', 'generated_description', 'Company', 'parsed_description', 'Tasks/Jobs', 'Industry', 'People Using Tool']
sample_df = sample_df[cols]
sample_df.head()

Unnamed: 0,organization name,num employees,founded date,website,description_all,industries_parsed,generated_description,Company,parsed_description,Tasks/Jobs,Industry,People Using Tool
0,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries"
1,Petville Global,1-10,2023-05-01,petville.co/pricing/biz,Petville Global: a B2B CRM SaaS platform + vet...,"['Information Technology', 'Pet', 'Veterinary']",Petville Global is a business-to-business (B2B...,Petville Global,\nTasks/Jobs: Customer Relationship Management...,"Customer Relationship Management, Marketing Au...",Veterinary Technology,"Veterinarians, Pet Businesses"
2,InputAI,1-10,2023-01-01,inputai.com/,— 1000+ AI templates with OpenAI GPT and ChatGPT,[],InputAI is an online platform that provides ov...,InputAI,"\nTasks/Jobs: Writing, Summarizing, Translatin...","Writing, Summarizing, Translating, Code Genera...",No-code AI platform,"Everyone, Professionals, Businesses"
3,Thunder,1-10,2024-02-02,thundercompute.com,Thunder is a Cloud computing to democratize ac...,"['Cloud Computing', 'Data Center', 'Informatio...","Thunder offers decentralized, peer-to-peer GPU...",Thunder,"\nTasks/Jobs: GPU resource allocation, Distrib...","GPU resource allocation, Distributed computing...",Cloud Computing,"Developers, Researchers, Businesses, Individuals"
4,Tail and Skew,1-10,2023-04-01,tailskew.com,Tail & Skew builds an AI agent to automate tas...,"['Financial Services', 'FinTech', 'Machine Lea...",Tail and Skew provides a platform where users ...,Tail and Skew,"\nTasks/Jobs: Task automation, Workflow optimi...","Task automation, Workflow optimization, Data a...",Business process automation,"Businesses, Developers"


In [70]:
df = sample_df.merge(onet_df,left_on="organization name",right_on="organization_name")
df[df.task_similarity.isnull()] #Check

Unnamed: 0,organization name,num employees,founded date,website,description_all,industries_parsed,generated_description,Company,parsed_description,Tasks/Jobs,Industry,People Using Tool,organization_name,example,job,onet_title,onet_task,example_job_title,task_similarity,job_title_similarity,onet_weight


In [71]:
df.head(20)

Unnamed: 0,organization name,num employees,founded date,website,description_all,industries_parsed,generated_description,Company,parsed_description,Tasks/Jobs,Industry,People Using Tool,organization_name,example,job,onet_title,onet_task,example_job_title,task_similarity,job_title_similarity,onet_weight
0,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A lawyer uses Advoria to set up an automated b...,Legal Secretary schedules client appointments.,Legal Secretaries and Administrative Assistants,Schedule and make appointments.,Legal Secretary,0.73,0.53,0.14
1,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A lawyer uses Advoria to set up an automated b...,Legal Secretary schedules client appointments.,Medical Secretaries and Administrative Assistants,Schedule and confirm patient diagnostic appoin...,Legal Secretary,0.59,0.44,0.14
2,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A lawyer uses Advoria to set up an automated b...,Legal Secretary schedules client appointments.,Legal Secretaries and Administrative Assistants,"Attend legal meetings, such as client intervie...",Legal Secretary,0.59,0.53,0.14
3,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A law firm uses Advoria to collect necessary c...,Legal Secretary collects and organizes client ...,Legal Secretaries and Administrative Assistants,Assist attorneys in collecting information suc...,Legal Secretary,0.66,0.55,0.14
4,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A law firm uses Advoria to collect necessary c...,Legal Secretary collects and organizes client ...,Legal Secretaries and Administrative Assistants,"Organize and maintain law libraries, documents...",Legal Secretary,0.61,0.55,0.14
5,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A law firm uses Advoria to collect necessary c...,Legal Secretary collects and organizes client ...,Legal Secretaries and Administrative Assistants,"Complete various forms, such as accident repor...",Legal Secretary,0.61,0.55,0.14
6,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A paralegal uses Advoria to manage their own c...,Paralegal manages own calendar and appointment...,Legal Secretaries and Administrative Assistants,Schedule and make appointments.,Paralegal,0.62,0.42,0.14
7,Petville Global,1-10,2023-05-01,petville.co/pricing/biz,Petville Global: a B2B CRM SaaS platform + vet...,"['Information Technology', 'Pet', 'Veterinary']",Petville Global is a business-to-business (B2B...,Petville Global,\nTasks/Jobs: Customer Relationship Management...,"Customer Relationship Management, Marketing Au...",Veterinary Technology,"Veterinarians, Pet Businesses",Petville Global,A veterinarian uses the software to automatica...,A veterinary receptionist manually sends out r...,Veterinary Technologists and Technicians,Schedule appointments and procedures for animals.,Veterinary receptionist,0.48,0.35,0.25
8,Petville Global,1-10,2023-05-01,petville.co/pricing/biz,Petville Global: a B2B CRM SaaS platform + vet...,"['Information Technology', 'Pet', 'Veterinary']",Petville Global is a business-to-business (B2B...,Petville Global,\nTasks/Jobs: Customer Relationship Management...,"Customer Relationship Management, Marketing Au...",Veterinary Technology,"Veterinarians, Pet Businesses",Petville Global,A pet grooming business uses the platform to a...,A pet grooming business manager uses data to t...,Spa Managers,Maintain client databases.,pet grooming,0.54,0.42,0.25
9,Petville Global,1-10,2023-05-01,petville.co/pricing/biz,Petville Global: a B2B CRM SaaS platform + vet...,"['Information Technology', 'Pet', 'Veterinary']",Petville Global is a business-to-business (B2B...,Petville Global,\nTasks/Jobs: Customer Relationship Management...,"Customer Relationship Management, Marketing Au...",Veterinary Technology,"Veterinarians, Pet Businesses",Petville Global,A veterinary clinic leverages the software to ...,A veterinary receptionist schedules appointmen...,Veterinary Technologists and Technicians,Schedule appointments and procedures for animals.,Veterinary receptionist,0.63,0.38,0.25


In [72]:
df = df.merge(codes,left_on="onet_title",right_on="Title")
df.head(50)

Unnamed: 0,organization name,num employees,founded date,website,description_all,industries_parsed,generated_description,Company,parsed_description,Tasks/Jobs,Industry,People Using Tool,organization_name,example,job,onet_title,onet_task,example_job_title,task_similarity,job_title_similarity,onet_weight,Major Group,Minor Group,Broad Group,Detailed Occupation,Title,Major Group Name,Minor Group Name,Broad Group Name
0,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A lawyer uses Advoria to set up an automated b...,Legal Secretary schedules client appointments.,Legal Secretaries and Administrative Assistants,Schedule and make appointments.,Legal Secretary,0.73,0.53,0.14,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
1,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A lawyer uses Advoria to set up an automated b...,Legal Secretary schedules client appointments.,Legal Secretaries and Administrative Assistants,"Attend legal meetings, such as client intervie...",Legal Secretary,0.59,0.53,0.14,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
2,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A law firm uses Advoria to collect necessary c...,Legal Secretary collects and organizes client ...,Legal Secretaries and Administrative Assistants,Assist attorneys in collecting information suc...,Legal Secretary,0.66,0.55,0.14,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
3,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A law firm uses Advoria to collect necessary c...,Legal Secretary collects and organizes client ...,Legal Secretaries and Administrative Assistants,"Organize and maintain law libraries, documents...",Legal Secretary,0.61,0.55,0.14,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
4,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A law firm uses Advoria to collect necessary c...,Legal Secretary collects and organizes client ...,Legal Secretaries and Administrative Assistants,"Complete various forms, such as accident repor...",Legal Secretary,0.61,0.55,0.14,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
5,Advoria,1-10,2023-08-29,advoria.de,Advoria's online appointment booking for law f...,"['Legal', 'Legal Tech', 'SaaS', 'Software']","Advoria is a German-made, GDPR-compliant onlin...",Advoria,"\nTasks/Jobs: Appointment scheduling, Client i...","Appointment scheduling, Client intake, Secreta...",Legal,"Lawyers, Legal Secretaries",Advoria,A paralegal uses Advoria to manage their own c...,Paralegal manages own calendar and appointment...,Legal Secretaries and Administrative Assistants,Schedule and make appointments.,Paralegal,0.62,0.42,0.14,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
6,Fifty One Ai,1-10,2024-04-01,fiftyoneai.com,"At 51AI, a public benefit company, we're dedic...","['CivicTech', 'GovTech', 'Legal Tech']",Fifty One AI is a public benefit company using...,Fifty One Ai,"\nTasks/Jobs: Report generation, Task automati...","Report generation, Task automation, Workflow o...",Legal,"Lawyers, Legal Aid Organizations",Fifty One Ai,A lawyer uses AI agents to automatically colle...,A legal assistant collects and organizes evide...,Legal Secretaries and Administrative Assistants,Assist attorneys in collecting information suc...,Legal assistant,0.63,0.47,0.2,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
7,SPEED AI,1-10,2023-09-01,speedintake.com/,The Legal Industry's First and ONLY AI-Powered...,"['Legal', 'Legal Tech', 'Software']",SPEED AI is a revolutionary AI-powered platfor...,SPEED AI,"\nTasks/Jobs: Intake process automation, Prosp...","Intake process automation, Prospect data analy...",Legal,"Lawyers, Legal professionals",SPEED AI,A lawyer uses SPEED AI to automate the initial...,A legal assistant collects client information ...,Legal Secretaries and Administrative Assistants,Assist attorneys in collecting information suc...,legal assistant,0.61,0.46,0.17,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
8,GenPen AI,1-10,2023-02-01,genpen.ai,— Artifical General Intelligence,"['Developer Tools', 'SaaS']",GenPen AI is an artificial general intelligenc...,GenPen AI,"\nTasks/Jobs: Content creation, Research, Pro...","Content creation, Research, Problem-solving, ...",Artificial General Intelligence,"Everyone, Professionals",GenPen AI,A lawyer uses GenPen AI to conduct legal resea...,A paralegal researches legal precedents.,Legal Secretaries and Administrative Assistants,Review legal publications and perform database...,paralegal,0.56,0.36,0.17,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
9,csky.ai,1-10,2023-05-01,csky.ai/,csky.ai is specializing in the augmentation of...,"['Cyber Security', 'Health Care', 'Information...",csky.ai uses edge AI to transform private meet...,csky.ai,"\nTasks/Jobs: Meeting transcription, Data anal...","Meeting transcription, Data analysis, Meeting ...",Meeting transcription and analytics,"Businesses, Organizations",csky.ai,A legal team uses csky.ai to transcribe and an...,A legal paralegal transcribes and analyzes con...,Legal Secretaries and Administrative Assistants,"Attend legal meetings, such as client intervie...",legal paralegal,0.62,0.44,0.11,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants


In [86]:
%%capture captured_output

group_sums = df.groupby('Minor Group Name')['onet_weight'].sum().sort_values(ascending=False)
print("1000 Startups Founded in 2023/2024 with 1-10 employees")
print("Decomposition of effect on labor market")
for group_name in group_sums.index:
    group_data = df[df['Minor Group Name'] == group_name]
    print("*****************")
    print(f"Group: {group_name}")
    onet_titles = group_data.groupby('onet_title')['onet_weight'].sum().round(2).sort_values(ascending=False).reset_index()
    onet_tasks = group_data.groupby('onet_task')['onet_weight'].sum().round(2).sort_values(ascending=False).reset_index()
    
    print("________________")
    print("10 Most Highly Weighted Titles:")
    for index, row in onet_titles.iterrows():
        print(f"Title: {row['onet_title']}, Weight: {row['onet_weight']}")
    print("\n")  

    print("10 Most Highly Weighted Tasks:")
    for index, row in list(onet_tasks.iterrows())[:10]:
        print(f"Task: {row['onet_task']}, Weight: {row['onet_weight']}")
    print("\n")  

    print("10 Example Startups:")
    startups = group_data.groupby(['organization_name','website'])['onet_weight'].sum().round(2).sort_values(ascending=False).reset_index()
    for index, row in list(startups.iterrows())[:10]:
        print(f"Startup: {row['organization_name']}, Website: {row['website']}")
    print("\n")  
    
with open('output.txt', 'w') as f:
    f.write(captured_output.stdout)

In [88]:
len(df.organization_name.unique())

736

In [39]:
onet_df

Unnamed: 0,organization_name,example,job,onet_title,onet_task,example_job_title,task_similarity,job_title_similarity,Major Group,Minor Group,Broad Group,Detailed Occupation,Title,Major Group Name,Minor Group Name,Broad Group Name
0,Advoria,A lawyer uses Advoria to set up an automated b...,Legal Secretary schedules client appointments.,Legal Secretaries and Administrative Assistants,Schedule and make appointments.,Legal Secretary,0.73,0.53,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
1,Advoria,A lawyer uses Advoria to set up an automated b...,Legal Secretary schedules client appointments.,Legal Secretaries and Administrative Assistants,"Attend legal meetings, such as client intervie...",Legal Secretary,0.59,0.53,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
2,Advoria,A law firm uses Advoria to collect necessary c...,Legal Secretary collects and organizes client ...,Legal Secretaries and Administrative Assistants,Assist attorneys in collecting information suc...,Legal Secretary,0.66,0.55,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
3,Advoria,A law firm uses Advoria to collect necessary c...,Legal Secretary collects and organizes client ...,Legal Secretaries and Administrative Assistants,"Organize and maintain law libraries, documents...",Legal Secretary,0.61,0.55,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
4,Advoria,A law firm uses Advoria to collect necessary c...,Legal Secretary collects and organizes client ...,Legal Secretaries and Administrative Assistants,"Complete various forms, such as accident repor...",Legal Secretary,0.61,0.55,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
5,Advoria,A paralegal uses Advoria to manage their own c...,Paralegal manages own calendar and appointment...,Legal Secretaries and Administrative Assistants,Schedule and make appointments.,Paralegal,0.62,0.42,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
6,Fifty One Ai,A lawyer uses AI agents to automatically colle...,A legal assistant collects and organizes evide...,Legal Secretaries and Administrative Assistants,Assist attorneys in collecting information suc...,Legal assistant,0.63,0.47,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
7,SPEED AI,A lawyer uses SPEED AI to automate the initial...,A legal assistant collects client information ...,Legal Secretaries and Administrative Assistants,Assist attorneys in collecting information suc...,legal assistant,0.61,0.46,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
8,GenPen AI,A lawyer uses GenPen AI to conduct legal resea...,A paralegal researches legal precedents.,Legal Secretaries and Administrative Assistants,Review legal publications and perform database...,paralegal,0.56,0.36,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants
9,csky.ai,A legal team uses csky.ai to transcribe and an...,A legal paralegal transcribes and analyzes con...,Legal Secretaries and Administrative Assistants,"Attend legal meetings, such as client intervie...",legal paralegal,0.62,0.44,43-0000,43-6000,43-6010,43-6012,Legal Secretaries and Administrative Assistants,Office and Administrative Support Occupations,Secretaries and Administrative Assistants,Secretaries and Administrative Assistants


Unnamed: 0,organization name,num employees,founded date,website,description_all,industries_parsed,generated_description,Company,parsed_description,Tasks/Jobs,Industry,People Using Tool,organization_name,example,job,onet_title,onet_task,example_job_title,task_similarity,job_title_similarity
