In [186]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os
from os import getenv
from openai import OpenAI
import pathlib
import textwrap
import google.generativeai as genai
import time
import requests
import ast
import json
from sklearn.metrics.pairwise import cosine_similarity
import re


pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.precision', 2)

load_dotenv("../../.env",override=True)
GOOGLE_API_KEY = getenv("GEMINI_API_KEY")
OPENAI_API_KEY = getenv("OPENAI_API_KEY")


In [187]:

genai.configure(api_key=GOOGLE_API_KEY)
class gemini():
    def __init__(self):
        self.model = genai.GenerativeModel('gemini-1.5-flash')
    def request(self,prompt):
        url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent'
        headers = {
            'Content-Type': 'application/json',
        }
        data = {
            "contents": [
                {
                    "parts": [
                        {
                            "text": prompt
                        }
                    ]
                }
            ]
        }
        params = {
            'key': GOOGLE_API_KEY
        }
        
        response = requests.post(url, headers=headers, json=data, params=params)
        return json.loads(response.text)

    def ask(self,prompt):
        #response = self.model.generate_content(prompt)
        response = self.request(prompt)
        if response["candidates"][0]["finishReason"] == 'SAFETY': return "N/A"
        response = response["candidates"][0]["content"]["parts"][0]["text"]
        return response

x_gemini = gemini()
x_gemini.ask("hi")



'Hi there! How can I help you today? \n'

In [171]:

class chatGPT():
    def __init__(self):
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        
    # def ask(self, q):
    #     stream = self.client.chat.completions.create(
    #         model="gpt-4",
    #         messages=[{"role": "user", "content": q}],
    #         stream=True,
    #         temperature=0
    #     )
    #     response = ""
    #     for chunk in stream:
    #         if chunk.choices[0].delta.content is not None:
    #             response += chunk.choices[0].delta.content

    #     self.response = response
    #     return response

    def get_embedding(self,text, model="text-embedding-3-large"):
       text = text.replace("\n", " ")
       return self.client.embeddings.create(input = [text], model=model).data[0].embedding

x_chat = chatGPT()


In [242]:
onet_df = pd.read_csv("../output/onet_df.csv")
onet_df = onet_df[(onet_df.task_similarity > .47) & (onet_df.job_title_similarity > .35)]
onet_weights = onet_df.groupby("organization_name")["onet_title"].count().apply(lambda x: 1/x).reset_index().rename({"onet_title":"onet_weight"},axis=1)
onet_df = onet_df.merge(onet_weights, on="organization_name")
onet_titles = onet_df.groupby("onet_title")["onet_weight"].sum().round(2).sort_values(ascending=False).reset_index()

onet_titles.head()


Unnamed: 0,onet_title,onet_weight
0,Search Marketing Strategists,46.41
1,Data Scientists,35.36
2,Market Research Analysts and Marketing Special...,30.95
3,Customer Service Representatives,29.47
4,Financial and Investment Analysts,25.2


In [173]:
onet_df.example_job_title

0                             Legal Secretary
1                             Legal Secretary
2                             Legal Secretary
3                             Legal Secretary
4                             Legal Secretary
5                             Legal Secretary
6                                   Paralegal
7                     Veterinary receptionist
8                                pet grooming
9                     Veterinary receptionist
10                    Veterinary receptionist
11                      social media marketer
12                      social media marketer
13                           Customer service
14                           Customer service
15                           Customer service
16                         Software developer
17                         Software developer
18                             data scientist
19                           graphic designer
20                           graphic designer
21                           graph

In [174]:
onet_df.onet_task.value_counts().reset_index().head()

Unnamed: 0,onet_task,count
0,Collect and analyze data on customer demograph...,49
1,"Forecast and track marketing and sales trends,...",43
2,Inform investment decisions by analyzing finan...,34
3,"Interpret data on price, yield, stability, fut...",31
4,Create content strategies for digital media.,28


In [189]:
len(onet_df)

3994

array(['Legal, Business operations,  platform',
       'Digital marketing, Software ment ',
       'Agriculture, Data Analytics, Financial services, Ewaste recycling, Decentralized Identity,  Recruitment, Language education',
       'Home Design, Film distribution, Creative ',
       'Game ment, IT Services,  Legacy storytelling, Video production, nology Blogging, Content marketing, Legal services, Aviation,  platform, Customer service, Content creation,  Cryptocurrency Trading, Machine Learning, Artificial Intelligence Information',
       'Customer relationship  (CRM), Workflow , Cybersecurity,  content creation , Cloud , Ecommerce ,  assistants , Education, Software Testing, Real Estate, Marketing , Accounting, Nonprofit , Recruitment, Wearable nology, Home Services ,  Education, Facility  , Health  Fitness, Blockchn nology, Mental health care, Customer service, Meeting Productivity, Media  Entertnment, Geospatial , Gaming , Data analytics, Cybersecurity , Personalized healthcare,  