In [462]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os
from os import getenv
from openai import OpenAI
import pathlib
import textwrap
import google.generativeai as genai
import time

load_dotenv("../.env",override=True)
GOOGLE_API_KEY = getenv("GEMINI_API_KEY")
OPENAI_API_KEY = getenv("OPENAI_API_KEY")


In [463]:
import json
genai.configure(api_key=GOOGLE_API_KEY)
class gemini():
    def __init__(self):
        self.model = genai.GenerativeModel('gemini-1.5-flash')
    def request(self,prompt):
        url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent'
        headers = {
            'Content-Type': 'application/json',
        }
        data = {
            "contents": [
                {
                    "parts": [
                        {
                            "text": prompt
                        }
                    ]
                }
            ]
        }
        params = {
            'key': GOOGLE_API_KEY
        }
        
        response = requests.post(url, headers=headers, json=data, params=params)
        return json.loads(response.text)

    def ask(self,prompt):
        #response = self.model.generate_content(prompt)
        response = self.request(prompt)
        response = response["candidates"][0]["content"]["parts"][0]["text"]
        return response

x_gemini = gemini()
x_gemini.ask("hi")

'Hi! How can I help you today? \n'

In [464]:
class chatGPT():
    def __init__(self):
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        
    def ask(self, q):
        stream = self.client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": q}],
            stream=True,
            temperature=0
        )
        response = ""
        for chunk in stream:
            if chunk.choices[0].delta.content is not None:
                response += chunk.choices[0].delta.content

        self.response = response
        return response

    def get_embedding(self,text, model="text-embedding-3-large"):
       text = text.replace("\n", " ")
       return self.client.embeddings.create(input = [text], model=model).data[0].embedding

x_chat = chatGPT()


In [474]:
sample_df = pd.read_csv("./output/sample_df_current.csv")
sample_df = sample_df.sample(n=200)

In [475]:
prompt = """
Your role is to give me 3 two sentence example that would be using the product of $company. Do not use name of company in description. Keep it broad.
The goal is to give the a detailed description of the job that the tool automates and who performs that job and when.

IF THE PERSON AUTOMATED IS NOT DOING A JOB THAT IS NOT DONE AT WORK THEN WRITE "NOT_ONET" IN FRONT OF IT.

EXAMPLE: 
________________
Company: DreamGenerator.ai
Website: DreamGenerator.ai
DreamGenerator.ai is an innovative platform that utilizes generative AI to transform user ideas into stunning and diverse images. It offers a unique creative experience, encouraging users to experiment with prompts and share their AI-generated art, while also providing an opportunity to receive recognition through likes and shares.
Tasks/Jobs: Image generation, Artistic exploration, Prompt engineering, Social media sharing
Industry: Creative arts
People Using Tool: Artists, Designers, Everyone
**************************
Example 1: A writer uses generates images that will inspire ideas for their new book by typing in fun prompts into the website.
Job automated 1: Writer has to generate create ideas for book.
Example 2: A social media marketer creates eye-catching visuals for their summer fitness campaign, like "people doing yoga on a sunrise beach," then uses them for engaging social media posts.
Job automated 2: Graphic designer createsn images for summer fitness brand campaign.
Example 3: An architect generates images that spark ideas to for the new building. They type prompts like "skyscraper covered in living walls" and "underwater hotel with transparent pods," generating visuals to inspire their sustainable architecture concepts.
Job autoamted 3: Architect generates ideas for new building.
**************************

Company: Cozy Ventures
Wesbite: https://cozy.ventures/
Current Description: Cozy Ventures is a boutique software development company that provides startups with innovative digital solutions to accelerate their growth. Composed of a team of seasoned engineers and designers, they specialize in creating custom software tailored to meet the unique needs of each client.
Tasks/Jobs: Software development,  UI/UX design,  Project management,  Technical consulting
Industry:  Software development
People Using Tool:  Startups,  Entrepreneurs
**************************
Example 1: A food delivery startup develop a custom app that integrates real-time tracking, route optimization, and communication features. 
Person automated 1: A food deliver manager is in charge of real-time tracking, route optimization, and communication for food delivery employees.
Example 2: An e-commerce company build a custom recommendation engine, powered by AI, that analyzes user behavior and preferences to offer relevant product suggestions.
Person automated 2: A market research analyst for a ecommerce company analyzes user behavior and preferences to offer relevant product suggestions
Example 3: A social media platform develops an app a sophisticated content moderation system that leverages machine learning algorithms to identify and flag inappropriate content in real-time. 
Person automated 3: A social media content moderator identifies and flag inappropriate content in real-time.
**************************


Company: PnPAI
Website: www.pnpai.co
PnPAI is an AIaaS platform that standardizes the implementation of AI across various industries, providing pre-built solutions that can be easily integrated without the need for in-house AI expertise. Catering to businesses of all sizes, PnPAI offers industry-focused solutions for sectors like ecommerce, retail, healthcare, and finance, making AI an accessible and integral part of decision-making and growth strategies.
Tasks/Jobs:  Data analysis, Predictive modeling, Automation, Optimization
Industry: AIaaS (AI as a Service)
People Using Tool: Businesses, Professionals 
**************************
Example 1: A retail manager uses PnPAI to analyze customer purchasing patterns and predict future demand for specific products. They then adjust inventory levels and optimize product placement in stores based on the AI-generated insights.
Person automated 1: A data analyst for a retail manager performs inventory management assestment to predict future custumer demand.
Example 2: An insurance company uses PnPAI to automate the risk assessment process for new clients. The AI system analyzes various data points, including credit history, driving records, and health information, to quickly determine insurance premiums and coverage options. 
Person automated 2: A risk assesser for an insurance agency uses data to perform risk assement for new clients. 
Example 3: A healthcare provider leverages PnPAI to personalize treatment plans for patients with chronic diseases. The AI analyzes patient medical history, current symptoms, and lifestyle factors to recommend optimal medication dosages and treatment strategies. 
Person automated 3: A data analysts for a healthcare company uses patient data to form personalized treatment plan creation. 
**************************
________________
Now your turn:
Company:$company
Website: $website
Current Description: $generated_description
$parsed_description
YOUR TURN:
**************************
Example 1: 
Person automated 1:
Example 2: 
Person automated 2:
Example 3:
Person automated 3:
**************************




"""
websites = list(sample_df['website'])
companies = list(sample_df['organization name'])
descriptions = list(sample_df['generated_description'])
industries = list(sample_df['parsed_description'])
labels = list(sample_df['generated_cluster_label'])
results = []
n = len(sample_df)
for i in range(0,n):
    while True:
        try:
            print(f"Processing {str(i)}: {companies[i]}")
            print(websites[i])
            print(descriptions[i])
            print(industries[i])
            print(f"Label Cluster: {labels[i]}")
            p1 = prompt.replace("$website", websites[i])\
                       .replace("$company", companies[i])\
                       .replace("$generated_description", descriptions[i])\
                       .replace("$parsed_description",industries[i])
            result = x_gemini.ask(p1)
            result = result.replace("*","").replace("Product: ","").replace("\n\n","\n")
    
            print(f"Result: {result}\n")
            results.append(result)
            break
    
        
        except Exception as e:
            print(f"Error processing {str(i)}, {companies[i]}: {e}")
            results.append(None)
            time.sleep(20)

sample_df = sample_df[:len(results)]
sample_df['examples'] = results



Processing 0: BLOVO Animal Health
www.blovo.org
BLOVO Animal Health is a cutting-edge platform that utilizes artificial intelligence and predictive data analytics to anticipate and prevent health problems in animals. The service is designed to revolutionize animal healthcare by providing proactive solutions and insights to ensure optimal animal health and wellbeing.

Tasks/Jobs: Health risk prediction, Disease prevention, Early intervention, Animal health management
Industry: Animal healthcare 
People Using Tool: Veterinarians, Animal owners
 

Label Cluster: Veterinary and Pet Care Technologies
Result: 
Example 1: A veterinarian uses a platform to analyze a pet's medical history and current health data to predict potential future health issues. This allows them to proactively recommend preventive measures and early interventions, such as diet adjustments or specific medications.
Person automated 1: A veterinarian analyzes medical and health data to predict and diagnose health issues.


In [476]:
def extract_data(row, pattern):
    row = row.replace("\n"," ").replace("  "," ")
    pattern = r"(Example \d+: .*?)(Person automated \d+: .*?)(?= Example \d+|$)"
    matches = re.findall(pattern, row, flags=re.DOTALL)
    matches = [[m.split(":")[1].strip() for m in match] for match in matches]
    results = []
    for m in matches: 
        for l in m: results.append(l)
    return results


if "JOB1_ONET" not in sample_df.columns:
    sample_df[["Job1_ONET","Job2_ONET","Job3_ONET"]] = 'N/A'
    sample_df = sample_df.reset_index()

sample_df[['Example1','Job1','Example2','Job2','Example3','Job3']] = list(sample_df['examples'].apply(lambda x: extract_data(x, pattern)))



In [478]:
# sample_df.to_csv("sample_df_with_examples.csv")

In [455]:
sample_df

Unnamed: 0.1,level_0,index,Unnamed: 0,organization name,num employees,founded date,description,industries,headquarters location,description.1,...,Job1_ONET,Job2_ONET,Job3_ONET,examples,Example1,Job1,Example2,Job2,Example3,Job3
0,0,135,135,AIrtizen,1-10,2023-09-05,Solopreneur that creates no to low-code automa...,"Artificial Intelligence (AI), Developer Tools,...",—,AI artisan that creates efficient tools,...,"[[Loan Officers, Analyze applicants' financial...","[[Financial Quantitative Analysts, Develop met...","[[Financial Quantitative Analysts, Assess the ...",\nExample 1: A solopreneur uses AIrtizen to au...,A solopreneur uses AIrtizen to automate their ...,A solopreneur performs email marketing by send...,A small business owner leverages AIrtizen to c...,A small business owner is in charge of providi...,An entrepreneur uses AIrtizen to analyze custo...,An entrepreneur analyzes customer data and ide...
1,1,40,40,ICAI Icelandic Center for Artificial Intelligence,1-10,2023-02-03,—,Artificial Intelligence (AI),"Reykjavík, Gullbringusysla, Iceland",Elevating Artificial Intelligence in Iceland t...,...,,,,\nExample 1: A researcher uses ICAI's AI tools...,A researcher uses ICAI's AI tools to analyze l...,A data analyst for a researcher performs analy...,A software developer utilizes ICAI's AI platfo...,A software engineer builds a custom AI model f...,An entrepreneur uses ICAI's resources to devel...,NOT_ONET An entrepreneur working on an AI-powe...


In [481]:
# sample_df[['Example1', 'Job1', 'Example2', 'Job2', 'Example3', 'Job3']] = sample_df['examples'].str.extract(r'Example 1: (.*?)\nPerson automated 1: (.*?)\nExample 2: (.*?)\nPerson automated 2: (.*?)\nExample 3: (.*?)\nPerson automated 3: (.*)', flags=re.DOTALL)

results = []
for i, x in sample_df.reset_index().iterrows():
    name = x["organization name"]
    print(f"Processing {str(i), len(results)}: {name}")
    job1_embedding = x_chat.get_embedding(x["Job1"])
    job2_embedding = x_chat.get_embedding(x["Job2"])
    job3_embedding = x_chat.get_embedding(x["Job3"])
    results.append([name,job1_embedding, job2_embedding, job3_embedding])

df = pd.DataFrame(results, columns=['name', "Job1_embedding","Job2_embedding","Job3_embedding"])
sample_df = pd.concat([sample_df.reset_index(), df], axis=1)

Processing ('0', 0): BLOVO Animal Health
Processing ('1', 1): Enkrypt AI
Processing ('2', 2): Almo Chat
Processing ('3', 3): AsFIn
Processing ('4', 4): AI WebAssist Inc.
Processing ('5', 5): Doowii, Inc
Processing ('6', 6): SimLoop AI
Processing ('7', 7): Pacerr
Processing ('8', 8): Thunder
Processing ('9', 9): Medicod AI
Processing ('10', 10): AiToolHunt
Processing ('11', 11): AIrtizen
Processing ('12', 12): fyby.io
Processing ('13', 13): Adhesible
Processing ('14', 14): Ycotek
Processing ('15', 15): Locusive
Processing ('16', 16): Embriot
Processing ('17', 17): Tellen
Processing ('18', 18): AI Generative Lab - AIGL
Processing ('19', 19): Go Boldly
Processing ('20', 20): AppJust
Processing ('21', 21): Gigly
Processing ('22', 22): SendBlue.ai
Processing ('23', 23): WriteAI.me
Processing ('24', 24): WOWie
Processing ('25', 25): Palatine Growth Credit
Processing ('26', 26): Rivio
Processing ('27', 27): doctor Z
Processing ('28', 28): Taxly.Ai
Processing ('29', 29): Resume Ninja
Processin

In [482]:
sample_df.head()

Unnamed: 0.1,level_0,index,Unnamed: 0,organization name,num employees,founded date,description,industries,headquarters location,description.1,...,Example1,Job1,Example2,Job2,Example3,Job3,name,Job1_embedding,Job2_embedding,Job3_embedding
0,0,274,274,BLOVO Animal Health,1-10,2023-03-20,—,—,—,BLOVO is focused on innovative methods to pred...,...,A veterinarian uses a platform to analyze a pe...,A veterinarian analyzes medical and health dat...,An animal owner uses a platform to monitor the...,An animal owner analyzes and tracks pet's acti...,A farmer uses a platform to analyze herd data ...,A farmer analyzes herd data to identify animal...,BLOVO Animal Health,"[-0.048964887857437134, 0.015215005725622177, ...","[-0.05107363685965538, 0.012268440797924995, -...","[-0.034672852605581284, -0.01153380423784256, ..."
1,1,50,50,Enkrypt AI,1-10,2023-03-31,Enabling faster and secure adoption of Gen AI ...,"Artificial Intelligence (AI), Asset Management...","Boston, Massachusetts, United States",Enabling faster and secure adoption of Gen AI ...,...,A software developer integrates a pre-trained ...,A developer who programs chatbots for customer...,A data scientist uses Enkrypt AI to securely i...,NOT_ONET A data scientist who secures and prog...,A business analyst uses Enkrypt AI to streamli...,NOT_ONET A business analyst manually manages t...,Enkrypt AI,"[-0.005359786096960306, 0.0005912086926400661,...","[-0.00414356030523777, -0.0055502853356301785,...","[-0.0015187064418569207, 0.0013835820136591792..."
2,2,250,250,Almo Chat,1-10,2023-05-01,Almo is an innovative AI-powered chatbot that ...,"Customer Service, SaaS, Software","Hyderabad, Andhra Pradesh, India","SaaS, Customer Support",...,A customer service representative uses Almo Ch...,A customer service representative answers cust...,A marketing team uses Almo Chat to collect lea...,A marketing manager collects leads and qualifi...,A tech support team utilizes Almo Chat to prov...,A tech support specialist provides immediate a...,Almo Chat,"[-0.04567105695605278, -0.019649511203169823, ...","[0.00018257819465361536, -0.014048346318304539...","[-0.014262042939662933, -0.002772251144051552,..."
3,3,319,319,AsFIn,1-10,2023-01-27,"AsFIn brings, thru its best-in-class digital p...","Financial Services, FinTech, Predictive Analytics","Madrid, Madrid, Spain",The Digital CFO for SMEs. Digital platform del...,...,A small business owner uses AsFIn to analyze t...,A CFO for a small business would be responsibl...,A startup uses AsFIn to create detailed financ...,A financial analyst for a startup would be res...,A company uses AsFIn to automate their monthly...,A financial reporting specialist would normall...,AsFIn,"[-0.016614355146884918, 0.011362160556018353, ...","[-0.046705733984708786, 0.019734270870685577, ...","[-0.023972684517502785, 0.01975742168724537, -..."
4,4,297,297,AI WebAssist Inc.,1-10,2023-05-17,We are currently combining different machine l...,—,—,Artificial Intelligent Web Assistant,...,A college student uses AI WebAssist to analyze...,The student is performing their own website ev...,A busy professional utilizes AI WebAssist's ti...,The professional is performing their own onlin...,A senior citizen relies on AI WebAssist's cont...,NOT_ONET The senior citizen is performing thei...,AI WebAssist Inc.,"[-0.0022539575584232807, 0.03838570415973663, ...","[0.006720120087265968, -0.019216211512684822, ...","[0.022356342524290085, 0.00433908449485898, -0..."


In [483]:
sample_df.to_csv("sample_df_with_examples_embeddings.csv")

In [383]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
from nltk.corpus import wordnet

# Download the required resources
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

sentence = "A voice actor produces voiceovers for podcast introductions."
tokens = nltk.word_tokenize(sentence)
tagged_tokens = pos_tag(tokens)

def get_person(text):
    text = text.replace("A ","")
    lemmatizer = WordNetLemmatizer()
    for word, tag in tagged_tokens:
        wntag = get_wordnet_pos(tag)
        if wntag == wordnet.VERB:
            first_verb = lemmatizer.lemmatize(word, pos=wntag)
            break
    return text.split(" "+first_verb)[0]
get_person("A voice actor produces voiceovers for podcast introductions.")

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/juliasusser/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/juliasusser/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


'voice actor'

In [491]:
np.vstack(job_embeddings)

array([[-0.04896489,  0.01521501, -0.0172018 , ...,  0.01338203,
        -0.01152982, -0.01339485],
       [-0.05107364,  0.01226844, -0.00913018, ...,  0.00093119,
        -0.00062929, -0.02295237],
       [-0.03467285, -0.0115338 , -0.01705066, ...,  0.00600107,
         0.00746562, -0.02033696]])

In [492]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

for index, row in sample_df.iterrows():
    print("***********************")
    print(row["organization name"])
    print(row["generated_description"])
    jobs = row[["Job1","Job2","Job3"]].values
    job_embeddings = row[["Job1_embedding","Job2_embedding","Job3_embedding"]].values
    startup_results = []
    for i in range(len(jobs)):
        job = jobs[i]
        job_embedding = np.vstack(job_embeddings)
        print(f"\nProcessing Job: {job}")
        cosine_sim = cosine_similarity(job_embedding, embeddings, 'cosine')
        person = get_person(job)
        person_embedding = np.array([x_chat.get_embedding(person)])
        top3 = sorted(zip(cosine_sim[0], df_exp[['Title', 'Task']].values), reverse=True)[:3]
        job_results = []
        for x,y in top3:
            
            onet_person = y[0]
            onet_person_embedding = np.array([x_chat.get_embedding(onet_person)])
            cosine_sim = cosine_similarity(person_embedding, onet_person_embedding, 'cosine')[0][0]
            print(f"{y}\n Cosine Similarity: {x}, Person Cosine Similarity: {cosine_sim}")
            job_results.append([y[0],y[1],x,cosine_sim])
        startup_results.append(job_results)
    sample_df.loc[index, ["Job1_ONET", "Job2_ONET", "Job3_ONET"]] = startup_results
        
    print("***********************\n\n")
        

        
    


***********************
BLOVO Animal Health
BLOVO Animal Health is a cutting-edge platform that utilizes artificial intelligence and predictive data analytics to anticipate and prevent health problems in animals. The service is designed to revolutionize animal healthcare by providing proactive solutions and insights to ensure optimal animal health and wellbeing.

Processing Job: A veterinarian analyzes medical and health data to predict and diagnose health issues.
['Veterinarians'
 'Research diseases to which animals could be susceptible.']
 Cosine Similarity: 0.5644843187073701, Person Cosine Similarity: 0.513404694847263
['Veterinarians'
 'Operate diagnostic equipment, such as radiographic or ultrasound equipment, and interpret the resulting images.']
 Cosine Similarity: 0.560867075190992, Person Cosine Similarity: 0.513404694847263
['Veterinary Technologists and Technicians'
 'Perform laboratory tests on blood, urine, or feces, such as urinalyses or blood counts, to assist in the di

In [495]:
sample_df.to_csv("sample_df_with_onet_similarity.csv")

In [439]:

sample_df.loc[0, ["Job1_ONET", "Job2_ONET", "Job3_ONET"]] = startup_results


In [442]:
sample_df.iloc[0].Job1

AttributeError: 'Series' object has no attribute 'Job1'

In [441]:
sample_df.iloc[0].Job1_ONET

[['Loan Officers',
  "Analyze applicants' financial status, credit, and property evaluations to determine feasibility of granting loans.",
  0.576983110392953,
  0.632143777461692],
 ['Financial Managers',
  'Examine, evaluate, or process loan applications.',
  0.545331505006758,
  0.29368268617098986],
 ['Loan Officers',
  'Review loan agreements to ensure that they are complete and accurate according to policy.',
  0.5395771574671153,
  0.6321613063587334]]

In [360]:
 cosine_similarity = cosine_similarity(single_embedding, embeddings, 'cosine')


TypeError: 'numpy.ndarray' object is not callable

In [348]:
df["Embeddings"] = df["Embeddings"].apply(lambda x: np.array(x))
embeddings = df["Embeddings"]

In [359]:
embeddings = np.array(embeddings)
embeddings = np.vstack(embeddings)
embeddings.shape

(11582, 3072)

In [354]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
print(job3)
single_embedding = np.array([job3_embedding])
cosine_similarity = cosine_similarity(single_embedding, embeddings, 'cosine')
print("Top 3 Cosine similarities and details:", sorted(zip(cosine_similarity[0], df_exp[['Title', 'Task']].values), reverse=True)[:3])

max_cosine_sim = np.max(cosine_similarity)
max_index = np.argmax(cosine_similarity)

print("Max Cosine similarity:", max_cosine_sim)
print("Index:", df_exp.iloc[max_index][["Title","Task"]].values)


A freelance writer manually reading and summarizing long email threads from clients.
Top 3 Cosine similarities and details: [(0.4142085959362561, array(['Digital Forensics Analysts',
       'Write technical summaries to report findings.'], dtype=object)), (0.39795679378787047, array(['Writers and Authors', 'Present drafts and ideas to clients.'],
      dtype=object)), (0.39394958290327164, array(['Legal Secretaries and Administrative Assistants',
       'Submit articles and information from searches to attorneys for review and approval for use.'],
      dtype=object))]
Max Cosine similarity: 0.4142085959362561
Index: ['Digital Forensics Analysts'
 'Write technical summaries to report findings.']


In [351]:
print("Top 3 Cosine similarities and details:", sorted(zip(cosine_similarity[0], df_exp[['Title', 'Task']].values), reverse=True)[:3])


Top 3 Cosine similarities and details: [(0.44830755574065523, array(['Web Developers',
       'Respond to user email inquiries, or set up automated systems to send responses.'],
      dtype=object)), (0.4343264557668538, array(['Web and Digital Interface Designers',
       'Respond to user email inquiries, or set up automated systems to send responses.'],
      dtype=object)), (0.41100481834492647, array(['Correspondence Clerks',
       'Type acknowledgment letters to persons sending correspondence.'],
      dtype=object))]


In [153]:
min_cosine_distance

0.6529335707236348

In [38]:
from sklearn.metrics.pairwise import cosine_similarity
e = np.array(e)
e2 = np.array(e2)
similarity_score = cosine_similarity(e.reshape(1, -1), e2.reshape(1, -1))

# Output the similarity score
print(similarity_score)

[[0.33176111]]


In [91]:
e3 = x_chat.get_embedding("An AI tool that replaces a Food Service Managers by scheduling and receive food and beverage deliveries, checking delivery contents to verify product quality and quantity.")





In [40]:
df_exp = pd.read_csv("./input/gpts_labels_new.csv")

In [41]:
df_exp = df_exp[(df_exp.human_labels!='E0') | (df_exp.gpt_4_exposure!='E0')]

    
df_exp["description"] = df_exp.apply(lambda x: f"{x.Title.capitalize()} that {x.Task.lower()}",axis=1)

In [42]:
df_exp[(df_exp.gpt_4_exposure=='EO')]
df_exp.gpt_4_exposure.unique()

array(['E2', 'E0', 'E1'], dtype=object)

In [43]:
for i, row in df_exp[df_exp.Title == "Sales Managers"].iterrows():
    print(row.description)

Sales managers that resolve customer complaints regarding sales and service.
Sales managers that monitor customer preferences to determine focus of sales efforts.
Sales managers that determine price schedules and discount rates.
Sales managers that review operational records and reports to project sales and determine profitability.
Sales managers that confer or consult with department heads to plan advertising services and to secure information on equipment and customer specifications.
Sales managers that advise dealers and distributors on policies and operating procedures to ensure functional effectiveness of business.
Sales managers that prepare budgets and approve budget expenditures.
Sales managers that plan and direct staffing, training, and performance evaluations to develop and control sales and service programs.
Sales managers that visit franchised dealers to stimulate interest in establishment or expansion of leasing programs.
Sales managers that oversee regional and local sal

In [48]:
# results = []
# for i, x in df_exp.reset_index().iterrows():
#     description = x["description"]
#     print(f"Processing {str(i), len(results)}: {description}")
#     result = x_chat.get_embedding(description)
#     results.append([description,result])


Processing ('0', 0): Sales managers that resolve customer complaints regarding sales and service.
Processing ('1', 1): Sales managers that monitor customer preferences to determine focus of sales efforts.
Processing ('2', 2): Sales managers that determine price schedules and discount rates.
Processing ('3', 3): Sales managers that review operational records and reports to project sales and determine profitability.
Processing ('4', 4): Sales managers that confer or consult with department heads to plan advertising services and to secure information on equipment and customer specifications.
Processing ('5', 5): Sales managers that advise dealers and distributors on policies and operating procedures to ensure functional effectiveness of business.
Processing ('6', 6): Sales managers that prepare budgets and approve budget expenditures.
Processing ('7', 7): Sales managers that plan and direct staffing, training, and performance evaluations to develop and control sales and service programs.


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (11582, 2) + inhomogeneous part.

In [61]:
# df = pd.DataFrame(results, columns=['Description', 'Embeddings'])
# df_exp = pd.concat([df_exp.reset_index(), df], axis=1)

In [67]:
# df_exp.to_csv("gpt_exposure_embeddings.csv")