In [3]:
# Kor!
from kor.extraction import create_extraction_chain
from kor.nodes import Object, Text, Number

# LangChain Models
from langchain_google_genai import ChatGoogleGenerativeAI

# Standard Helpers
import pandas as pd
import requests
import time
import json
from datetime import datetime

# Text Helpers
from bs4 import BeautifulSoup
from markdownify import markdownify as md

# For token counting
from langchain.callbacks import get_openai_callback

# Project Hepers
import getpass
import os
from dotenv import load_dotenv, find_dotenv


def printOutput(output):
    print(json.dumps(output,sort_keys=True, indent=3))

In [4]:
load_dotenv(find_dotenv(), override=True)

True

In [5]:
if 'GOOGLE_API_KEY' not in os.environ:
    os.environ['GOOGLE_API_KEY'] = getpass.getpass('Provide your Google API Key: ')

In [6]:
llm = ChatGoogleGenerativeAI(model='gemini-1.5-flash-002', temperature=0.9)

In [7]:
def pull_from_greenhouse(board_token):

    url = f'https://boards-api.greenhouse.io/v1/boards/{board_token}/jobs?content=true'
    
    try:
        response = requests.get(url)
    except:
        print ("Whoops, error")
        return
        
    status_code = response.status_code
    
    jobs = response.json()['jobs']
    
    print (f"{board_token}: {status_code}, Found {len(jobs)} jobs")
    
    return jobs

In [8]:
jobs = pull_from_greenhouse("okta")
# omnivacorpsandbox
# juaai
# eyecarecenter
# teachforall
# matic

okta: 200, Found 174 jobs


In [13]:
print ("Preview:\n", json.dumps(jobs[0]))

Preview:
 {"absolute_url": "https://www.okta.com/company/careers/opportunity/6297980?gh_jid=6297980", "data_compliance": [{"type": "gdpr", "requires_consent": false, "requires_processing_consent": false, "requires_retention_consent": false, "retention_period": null, "demographic_data_consent_applies": false}], "internal_job_id": 2976449, "location": {"name": "United States"}, "metadata": null, "id": 6297980, "updated_at": "2024-12-09T21:49:15-05:00", "requisition_id": "P17421_2976449", "title": "Anaplan Manager", "content": "&lt;div class=&quot;content-intro&quot;&gt;&lt;p&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;strong&gt;Get to know Okta&lt;br&gt;&lt;br&gt;&lt;/strong&gt;&lt;/span&gt;Okta is The World\u2019s Identity Company. We free everyone to safely use any technology\u2014anywhere, on any device or app. Our Workforce and Customer Identity Clouds enable secure yet flexible access, authentication, and automation that transforms how people move through the digital world,

In [14]:
def describeJob(job_description):
    print(f"Job ID: {job_description['id']}")
    print(f"Link: {job_description['absolute_url']}")
    print(f"Updated At: {datetime.fromisoformat(job_description['updated_at']).strftime('%B %d, %Y')}")
    print(f"Title: {job_description['title']}\n")
    print(f"Content:\n{job_description['content'][:550]}")

In [15]:
job_id = 6297980

job_description = [item for item in jobs if item['id'] == job_id][0]
	
describeJob(job_description)

Job ID: 6297980
Link: https://www.okta.com/company/careers/opportunity/6297980?gh_jid=6297980
Updated At: December 09, 2024
Title: Anaplan Manager

Content:
&lt;div class=&quot;content-intro&quot;&gt;&lt;p&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;strong&gt;Get to know Okta&lt;br&gt;&lt;br&gt;&lt;/strong&gt;&lt;/span&gt;Okta is The World’s Identity Company. We free everyone to safely use any technology—anywhere, on any device or app. Our Workforce and Customer Identity Clouds enable secure yet flexible access, authentication, and automation that transforms how people move through the digital world, putting Identity at the heart of business security and growth.&amp;nbsp;&lt;br&gt;&lt;br&g


In [16]:
soup = BeautifulSoup(job_description['content'], 'html.parser')

In [18]:
text = soup.get_text()

text = md(text)

print (text)



**Get to know Okta**Okta is The World’s Identity Company. We free everyone to safely use any technology—anywhere, on any device or app. Our Workforce and Customer Identity Clouds enable secure yet flexible access, authentication, and automation that transforms how people move through the digital world, putting Identity at the heart of business security and growth.   
  
At Okta, we celebrate a variety of perspectives and experiences. We are not looking for someone who checks every single box - we’re looking for lifelong learners and people who can make us better with their unique experiences.   
  
Join our team! We’re building a world where Identity belongs to you.

**The GTM Strategy and Operations Team**

We are a dynamic team within the GTM Strategy and Operations department, dedicated to optimizing and enhancing our business processes through innovative planning and analytics solutions. We build and manage foundational models on Anaplan to support our most critical planning proc

In [51]:
tools = Object(
    id="tools",
    description="""
        A tool, application, or other company that is listed in a job description.
        Analytics, eCommerce and GTM are not tools
    """,
    attributes=[
        Text(
            id="tool",
            description="The name of a tool or company"
        )
    ],
    examples=[
        (
            "Experience in working with Netsuite, or Looker a plus.",
            [
                {"tool": "Netsuite"},
                {"tool": "Looker"},
            ],
        ),
        (
           "Experience with Microsoft Excel",
            [
               {"tool": "Microsoft Excel"}
            ] 
        ),
        (
           "You must know AWS to do well in the job",
            [
               {"tool": "AWS"}
            ] 
        ),
        (
           "Troubleshooting customer issues and debugging from logs (Splunk, Syslogs, etc.) ",
            [
               {"tool": "Splunk"},
            ] 
        )
    ],
    many=True,
)

In [52]:
chain = create_extraction_chain(llm, tools, input_formatter="triple_quotes")

In [53]:
output = chain.invoke(input=text)["data"]

printOutput(output)

{
   "tools": [
      {
         "tool": "Anaplan"
      },
      {
         "tool": "Hyperion"
      },
      {
         "tool": "Tableau"
      }
   ]
}


In [19]:
salary_range = Object(
    id="salary_range",
    description="""
        The range of salary offered for a job mentioned in a job description
    """,
    attributes=[
        Number(
            id="low_end",
            description="The low end of a salary range"
        ),
        Number(
            id="high_end",
            description="The high end of a salary range"
        )
    ],
    examples=[
        (
            "This position will make between $140 thousand and $230,000.00",
            [
                {"low_end": 140000, "high_end": 230000},
            ]
        )
    ]
)

In [20]:
jobs = pull_from_greenhouse("cruise")

cruise: 200, Found 31 jobs


In [84]:
print("Preview:")
print(json.dumps(jobs[1])[:400])

Preview:
{"absolute_url": "https://boards.greenhouse.io/cruise/jobs/6126427?gh_jid=6126427", "data_compliance": [{"type": "gdpr", "requires_consent": false, "requires_processing_consent": false, "requires_retention_consent": false, "retention_period": null, "demographic_data_consent_applies": false}], "education": "education_optional", "internal_job_id": 2925388, "location": {"name": "San Francisco, CA"}, 


In [22]:
job_id = 6126427

job_description = [item for item in jobs if item['id'] == job_id][0]
    
describeJob(job_description)

soup = BeautifulSoup(job_description['content'], 'html.parser')
text = soup.get_text()

text = md(text)

Job ID: 6126427
Link: https://boards.greenhouse.io/cruise/jobs/6126427?gh_jid=6126427
Updated At: December 10, 2024
Title: Manager II, Operations Data Science

Content:
&lt;div class=&quot;content-intro&quot;&gt;&lt;p&gt;&lt;span style=&quot;font-weight: 400;&quot;&gt;We&#39;re Cruise, a self-driving service designed for the cities we love.&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;span style=&quot;font-weight: 400;&quot;&gt;We’re building the world’s most advanced self-driving vehicles to safely connect people to the places, things, and experiences they care about. We believe self-driving vehicles will help save lives, reshape cities, give back time in transit, and restore freedom of movement for many.&lt;/span&gt;


In [23]:
print(text)



We're Cruise, a self-driving service designed for the cities we love.

We’re building the world’s most advanced self-driving vehicles to safely connect people to the places, things, and experiences they care about. We believe self-driving vehicles will help save lives, reshape cities, give back time in transit, and restore freedom of movement for many.

In our cars, you’re free to be yourself. It’s the same here at Cruise. We’re creating a culture that values the experiences and contributions of all of the unique individuals who collectively make up Cruise, so that every employee can do their best work. 

Cruise is committed to building a diverse, equitable, and inclusive environment, both in our workplace and in our products. If you are looking to play a part in making a positive impact in the world by advancing the revolutionary work of self-driving cars, come join us. Even if you might not meet every requirement, we strongly encourage you to apply. You might just be the right cand

In [66]:
chain = create_extraction_chain(llm, salary_range)
output = chain.invoke(input=text)["data"]

printOutput(output)

{
   "salary_range": [
      {
         "high_end": "252000",
         "low_end": "171400"
      }
   ]
}
