In [43]:
# Load some jobs into a pandas dataframe

import pandas as  pd
pd.set_option('display.max_colwidth', 300)
pd.options.mode.chained_assignment = None


# replace with student spreadsheet

jobs = pd.read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vRWmeWZs5NHptaUiotzWf8Qf9evBzdDToTvkMbn1v0ug5uqu32vPd4BwTU7DsogP9cU7xKov0ku8gYV/pub?output=csv')
jobs = jobs[['year', 'date', 'description']]
jobs.head(5)


In [44]:
# let's try to extract information from the job description
jobs.loc[0, 'description']

'Administrative Specialist II (Elections Specialist-Chinese):\xa0 The Department of Elections – is searching for energetic and resourceful professionals who like to “get stuff done”. The Administrative Specialist II position in the Voter Services Department combines an exciting, fast-paced environment with the opportunity to cultivate talents and apply a variety of skills.\xa0 The ideal candidate will have a desire to help ensure the democratic process through public service.\xa0 They will thrive in an innovative environment and will not hesitate to roll up both sleeves, work hard, have fun, and get the job done. Salary: $25.59 – $32.58 Hourly. Deadline: October 31.\xa0 Application: For the complete job listing and to apply, click here.\xa0'

In [3]:
# let's get the salary.
# let's get the location
# let's get whether it's a nonprofit or a government position


def get_title(job_description):
    return job_description.split('–')[0]

def get_salary(job_description):
    return job_description.split('Salary: ')[1]

get_salary(jobs.loc[0, 'description'])

# this isn't gonna work!

'$25.59 – $32.58 Hourly. Deadline: October 31.\xa0 Application: For the complete job listing and to apply, click here.\xa0'

In [4]:
# Weather API

from pyowm import OWM
from pyowm.utils import config
from pyowm.utils import timestamps

# ---------- FREE API KEY examples ---------------------

owm = OWM('c721ef57a321d8ae12fdaa16de2870d8')
mgr = owm.weather_manager()


# Search for current weather in London (Great Britain) and get details
observation = mgr.weather_at_place('London,GB')
w = observation.weather

w.detailed_status         # 'clouds'
w.wind()                  # {'speed': 4.6, 'deg': 330}
w.humidity                # 87
w.temperature('celsius')  # {'temp_max': 10.5, 'temp': 9.7, 'temp_min': 9.0}
w.rain                    # {}
w.heat_index              # None
w.clouds                  # 75

w.temperature('fahrenheit')['temp']

54.72

In [12]:
# Structured Outputs API

from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class ResearchPaperExtraction(BaseModel):
    title: str
    authors: list[str]
    abstract: str
    keywords: list[str]

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": "You are an expert at structured data extraction. You will be given unstructured text from a research paper and should convert it into the given structure."},
        {"role": "user", "content": "..."}
    ],
    response_format=ResearchPaperExtraction,
)

research_paper = completion.choices[0].message.parsed

research_paper.authors

['Jane Doe', 'John Smith', 'Alex Johnson']

In [16]:
# sample description string
description = jobs.loc[1, 'description']
description

'Campaign Manager/Director, Supreme Court Reform– The Brennan Center for Justice at NYU School of Law is a nonpartisan law and policy institute that seeks to improve our systems of democracy and justice. We work to hold our political institutions and laws accountable to the twin American ideals of democracy and equal justice for all. The Brennan Center’s work ranges from voting rights to court reform, from ending mass incarceration to preserving constitutional protections in the fight against terrorism. Part think tank, part advocacy group, part cutting-edge communications hub, we start with rigorous research. We craft innovative policies. And we fight for them — in the courts, in Congress and the states, and in the court of public opinion.The Brennan Center’s Kohlberg Center on the U.S. Supreme Court was established in 2024 to advocate for reform of the Supreme Court to prevent ethical abuses, reduce partisanship and extremism, and ensure that the Court plays a more\xa0 balanced and a

In [62]:
# modify schema to include the fields we want to extract

from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class JobDescriptionExtraction(BaseModel):
    job_title: str
    job_location: str
    employer: str
    salary_low_end: float
    responsible_for_administering_elections: bool
    nonprofit_or_government: str
    # explanation_for_nonprofit_or_government: str

def parse_job_description(job_description: str) -> JobDescriptionExtraction:
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": "You are an expert at structured data extraction. You will be given a job description and should convert it into the given structure."},
            {"role": "user", "content": job_description}
        ],
        response_format=JobDescriptionExtraction,
    )

    return completion.choices[0].message.parsed

parse_job_description(jobs.loc[1, 'description'])

JobDescriptionExtraction(job_title='Campaign Manager/Director, Supreme Court Reform', job_location='New York, NY', employer='The Brennan Center for Justice at NYU School of Law', salary_low_end=90000.0, responsible_for_administering_elections=False, nonprofit_or_government='nonprofit', explanation_for_nonprofit_or_government='The Brennan Center for Justice is a nonpartisan law and policy institute at NYU School of Law.')

In [48]:
shortened = jobs.head(3)

# loop through df, parsing each job description in the 'description' column, and adding the parsed data to new columns

for i, row in shortened.iterrows():
    description = row['description']
    # print(i, description)
    
    parsed = parse_job_description(description)
    
    shortened.loc[i, 'job_title'] = parsed.job_title
    shortened.loc[i, 'job_location'] = parsed.job_location
    shortened.loc[i, 'employer'] = parsed.employer
    shortened.loc[i, 'salary_low_end'] = parsed.salary_low_end
    shortened.loc[i, 'responsible_for_administering_elections'] = parsed.responsible_for_administering_elections
    shortened.loc[i, 'nonprofit_or_government'] = parsed.nonprofit_or_government

    

In [63]:
shortened = jobs.head(3)

# loop through df, parsing each job description in the 'description' column, and adding the parsed data to new columns


for i, row in shortened.iterrows():
    description = row['description']
    # print(i, description)
    
    parsed = parse_job_description(description)

    for field in parsed:
        shortened.loc[i, field[0]] = field[1]
    
    # shortened.loc[i, 'job_title'] = parsed.job_title
    # shortened.loc[i, 'job_location'] = parsed.job_location
    # shortened.loc[i, 'employer'] = parsed.employer
    # shortened.loc[i, 'salary_low_end'] = parsed.salary_low_end
    # shortened.loc[i, 'responsible_for_administering_elections'] = parsed.responsible_for_administering_elections
    # shortened.loc[i, 'nonprofit_or_government'] = parsed.nonprofit_or_government

shortened

Unnamed: 0,year,date,description,job_title,job_location,employer,salary_low_end,responsible_for_administering_elections,nonprofit_or_government,explanation_for_nonprofit_or_government
0,2024,10-17,"Administrative Specialist II (Elections Specialist-Chinese): The Department of Elections – is searching for energetic and resourceful professionals who like to “get stuff done”. The Administrative Specialist II position in the Voter Services Department combines an exciting, fast-paced environme...",Administrative Specialist II (Elections Specialist-Chinese),Department of Elections,Department of Elections,25.59,True,government,The Department of Elections is a government entity responsible for managing electoral processes.
1,2024,10-17,"Campaign Manager/Director, Supreme Court Reform– The Brennan Center for Justice at NYU School of Law is a nonpartisan law and policy institute that seeks to improve our systems of democracy and justice. We work to hold our political institutions and laws accountable to the twin American ideals o...","Campaign Manager/Director, Supreme Court Reform","NYU School of Law, New York, NY",The Brennan Center for Justice,90000.0,False,Nonprofit,The Brennan Center for Justice is a nonpartisan law and policy institute that operates as a nonprofit organization within NYU School of Law.
2,2024,10-17,"Deputy County Clerk, III, Boone County, Missouri– This position provides general supervision in the voter registration department, manages the recruitment, training, payroll, and assignment of election judges for polling places and early voting in Boone County, and provides election information ...","Deputy County Clerk, III","Boone County, Missouri",Boone County Government,18.04,True,government,Boone County Government is a public sector entity responsible for local governmental operations including administering elections.
