# Extract keys from parsed data
We asked the LLM to create any keys that it did not have to categorize the data. We will examine them now, clean up any redundancies and create guidelines for their use.

In [1]:
#| default_exp keys

In [2]:
from hn_jobs_chat.util import loadJSON

posts = loadJSON("../previousData/postData.json")

keys = []
for post in posts:
    for key in post.keys():
        if key not in keys:
            keys.append(key)

print(keys)

['company', 'company_goal', 'location', 'job_title', 'job_requirements', 'tech_stack', 'contact_url', 'additional_notes', 'company_stage', 'company_state', 'remote/local/hybrid details', 'work_schedule', 'urls', 'application_process', 'job_description', 'remote/local details', 'compensation', 'isPosting', 'company_status', 'contact_email', 'additional_urls']


```
['company', 'company_goal', 'location', 'job_title', 'job_requirements', 'tech_stack', 'contact_url', 'additional_notes', 'company_stage', 'company_state', 'remote/local/hybrid details', 'work_schedule', 'urls', 'application_process', 'job_description', 'remote/local details', 'compensation', 'isPosting', 'company_status', 'contact_email', 'additional_urls']
```

In [3]:
#| export

keys = [
    'company',
    'location',
    'timezone',
    'industry',
    'target_market',
    'company_status',
    'company_description',
    'company_goal',
    'company_stage',
    'more_company_info',
    'employment_type',
    'remote_or_local_details',
    'job_title',
    'job_description',
    'job_requirements',
    'job_soft_skills',
    'product_description',
    'tech_stack',
    'application_process',
    'contact_email',
    'application_url',
    'information_urls',
    'additional_notes',
    'compensation',
]

In [1]:
#| export

# We add a description to each key to help the LLM understand what each key is for
# Array keys are not embedded, but will be handled separately

# We store the keys as a list because their order is important for readability, when formatting the results.

described_keys = [
    {'key': 'company', 'description': 'name of the company', 'type': 'string', 'embed': False},
    # Location will not be embedded, but will be handled separately
    {'key': 'location', 'description': 'country, city or location otherwise stated', 'type': 'string', 'embed': False},
    # Timezone will not be embedded, but will be handled separately
    {'key': 'timezone', 'description': 'timezones that are referred to in the job post, if any', 'type': 'string', 'embed': False},
    {'key': 'industry', 'description': 'the industry vertical that the company is part of', 'type': 'string', 'embed': True},
    {'key': 'target_market', 'description': 'the type of consumer or businesses that are targeted by the company venture', 'type': 'string', 'embed': True},
    {'key': 'company_status', 'description': 'the current situation that company is in', 'type': 'string', 'embed': True },
    {'key': 'company_description', 'description': 'description of the company, as described in the job post', 'type': 'string', 'embed': True},
    {'key': 'company_goal', 'description': 'primary focus of the company', 'type': 'string', 'embed': True},
    {'key': 'company_stage', 'description': 'venture stage of the company, if applicable', 'type': 'string', 'embed': True},
    {'key': 'more_company_info', 'description': 'an array. any company information in the listing not otherwise covered by company_goal, company_status, company_stage, company_description, industry or location', 'type': 'array', 'embed': True},
    {'key': 'employment_type', 'description': 'full-time, part-time, contract, or as otherwise described', 'type': 'string', 'embed': True},
    {'key': 'remote_or_local_details', 'description': 'details about the remote, local, hybrid or otherwise work arrangement as described', 'type': 'string', 'embed': True},
    {'key': 'job_title', 'description': 'job title for the position. Use the singular tense', 'type': 'string', 'embed': True},
    {'key': 'job_description', 'description': 'description of the job as described in the job post', 'type': 'string', 'embed': True},
    {'key': 'job_requirements', 'description': 'an array of the specific job requirements, listed in the job posting', 'type': 'array', 'embed': True},
    {'key': 'job_soft_skills', 'description': 'an array of any soft skills required by the job, as outlined in the posting', 'type': 'array', 'embed': True},
    {'key': 'product_description', 'description': 'a description of the product that job is related to, if any', 'type': 'string', 'embed': True},
    {'key': 'tech_stack', 'description': 'an array. It is list of the names of technologies used by the company. This can include programming languages, frameworks, libraries, databases, and other tools.', 'type': 'array', 'embed': True},
    {'key': 'application_process', 'description': 'any description of the application processes that the person applying will undergo', 'type': 'string', 'embed': True},
    {'key': 'contact_email', 'description': 'a single email. the contact email provided in the job post. if multiple emails are presented, add the additional as an item in the additional notes array', 'type': 'string', 'embed': True},
    {'key': 'application_url', 'description': 'a single url. The url used to apply for the job. information about the company other than the job listing should be placed in the information_urls key', 'type': 'string', 'embed': False},
    {'key': 'information_urls', 'description': 'an array. if the posting states that more information about the company or the job is available at a URL other than the job posting, recored those urls in the array here', 'type': 'array', 'embed': False},
    {'key': 'additional_notes', 'description': 'an array. any additional information about the company, role or position, not covered by the other keys', 'type': 'array', 'embed': True},
    {'key': 'compensation', 'description': 'any compensation and/or benefits described in the job post', 'type': 'string', 'embed': True}
]

described_keys_dict = {key['key']: key for key in described_keys}


{'company': {'key': 'company',
  'description': 'name of the company',
  'type': 'string',
  'embed': False},
 'location': {'key': 'location',
  'description': 'country, city or location otherwise stated',
  'type': 'string',
  'embed': False},
 'timezone': {'key': 'timezone',
  'description': 'timezones that are referred to in the job post, if any',
  'type': 'string',
  'embed': False},
 'industry': {'key': 'industry',
  'description': 'the industry vertical that the company is part of',
  'type': 'string',
  'embed': True},
 'target_market': {'key': 'target_market',
  'description': 'the type of consumer or businesses that are targeted by the company venture',
  'type': 'string',
  'embed': True},
 'company_status': {'key': 'company_status',
  'description': 'the current situation that company is in',
  'type': 'string',
  'embed': True},
 'company_description': {'key': 'company_description',
  'description': 'description of the company, as described in the job post',
  'type': 'str

In [2]:
#| hide
from nbdev import nbdev_export
nbdev_export()