In [1]:
from llama_index import PromptTemplate
from llama_index.llms import Bedrock, ChatMessage
from llama_index.program import LLMTextCompletionProgram
from llama_index.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List
import pandas as pd

In [2]:
# Need to have AWS creddentials loaded into environment
# This could be previously done in ~/.aws/credentials
bedrock = Bedrock(model="anthropic.claude-v2", max_tokens=8000)

In [7]:
bedrock.chat([ChatMessage(role="user", content="What is pi")])

ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content=" Pi (π) is a mathematical constant that represents the ratio of a circle's circumference to its diameter. It is an irrational number that begins with 3.14159... and continues infinitely without repeating. Some key facts about pi:\n\n- Pi is approximately equal to 3.14 or 22/7. However, pi is an irrational number, meaning its digits go on forever without repeating.\n\n- Pi is often represented by the Greek letter π. This notation was introduced by William Jones in 1706. \n\n- The value of pi cannot be expressed exactly as a fraction. However, 22/7 and other fractional approximations are commonly used. \n\n- Pi is defined as the ratio of a circle's circumference C to its diameter d. Mathematically, π = C/d.\n\n- Many formulas in mathematics, science and engineering involve pi. It appears in equations dealing with circles, spheres, trigonometry, waves and many other areas. \n\n- Pi is a transcendental num

In [10]:
# Load the raw data
job_df = pd.read_csv("../data/bronze/apify/indeed/dataset_indeed-scraper_2023-10-23_07-07-15.csv")
descriptions = job_df['description'].to_list()

In [9]:
# Create the pydantic class to output into

class Skill(BaseModel):
    name: str
    reference_text: str

class Job(BaseModel):
    #original_description: str
    skills: List[Skill]

# Create the program to parse each description
job_program = LLMTextCompletionProgram.from_defaults(
    output_parser=PydanticOutputParser(Job),
    llm=bedrock,
    prompt_template_str="Extract the Job skills, with the reference text to that skill, from the following description: {description}",
    verbose=True
)

In [18]:
test_output = job_program(description=descriptions[0])

In [34]:
for skill in test_output.skills:
    if skill.reference_text in descriptions[0]:
        print(skill)
    else:
        print(skill.name, "is not in the description")

name='ETL' reference_text='We are seeking an experienced ETL (Extract, Transform, and Load) Data Engineer with expertise in Google Cloud Platform (GCP) to join our client data engineering team.'
name='Google Cloud Platform' reference_text='We are seeking an experienced ETL (Extract, Transform, and Load) Data Engineer with expertise in Google Cloud Platform (GCP) to join our client data engineering team.'
name='Data Pipeline Design' reference_text='· Design, develop, and maintain ETL pipelines on Google Cloud Platform (GCP) to ensure efficient data extraction, transformation, and loading processes.'
name='Data Extraction' reference_text='· Extract data from various sources, including databases, APIs, and cloud storage, and ensure data quality and consistency.'
name='Data Transformation' reference_text='· Implement data transformations, including cleaning, aggregation, and enrichment, to prepare data for analysis and reporting.'
name='GCP Services' reference_text='· Design, build, and ma

In [14]:
# Test out some of the New data
marco_csvs = pd.read_csv("../data/jobs_Head_of_Product_-_European_Union_-_Remote_-_LinkedIn.csv")
descriptions = marco_csvs['job_description'].to_list()
print(descriptions[0])

About the job
            
 
About RevolutPeople deserve more from their money. More visibility, more control, more freedom. And since 2015, Revolut has been on a mission to deliver just that. With an arsenal of aweso﻿me products that span spending, saving, travel, transfers, investing, exchanging and more, our super app has helped 35+ million customers get more from their money. And we're not done yet.  As we continue our lightning-fast growth,‌ two things are essential to continuing our success: our people and our culture. We've been officially certified as a Great Place to Work™ in recognition of our outstanding employee experience! So far, we have 7,500+﻿ people working around the world, from our great offices or remotely, on our mission. And we're looking for more. We want brilliant people that love building great products, love redefining success, and love turning the complexity of a chaotic world into the simplicity of a beautiful solution.About The RoleOn the Revolut rocket shi

In [10]:
marco_test_output = job_program(description=descriptions[0])
print(marco_test_output.skills)

In [13]:
for skill in marco_test_output.skills:
    if skill.reference_text in descriptions[0]:
        print(skill)
    else:
        print(skill.name, "is not in the description")

name='Crypto Exchange Product Management' reference_text='Completely owning and building our Crypto Exchange product, increasing the number of tokens tradable in the application and providing the tools retail users expect from exchanges'
name='Team Leadership' reference_text="Setting your team's goals, success metrics, and roadmap to align with Revolut’s mission and drive maximum impact based on data analysis, market research, and company strategy"
name='Crypto Product Development' reference_text='Working with our Core Crypto team to expand and improve our suite of crypto products, including deposits, withdrawals, and staking, making them more accessible for our retail users'
name='UX Design' reference_text='Working closely with Design and UX Research to define the customer journey and create an amazing user experience'
name='Engineering Collaboration' reference_text='Liaising with Engineering to ensure effective delivery of the product'
name='Stakeholder Collaboration' reference_text=