In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
model = "gpt-4o"

llm = ChatOpenAI(temperature=0, model=model)

  warn_deprecated(


In [3]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator
from langchain_openai import OpenAIEmbeddings


loader = DirectoryLoader("data/", glob="*.txt", show_progress=True, loader_cls=TextLoader, loader_kwargs={"autodetect_encoding": True})
index = VectorstoreIndexCreator(
    embedding=OpenAIEmbeddings(),
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

100%|██████████| 1110/1110 [00:12<00:00, 88.57it/s] 







In [4]:
resp = index.query("all the requirements and description of Product Management", llm=llm)

In [5]:
print(resp)

Based on the provided context, here are the requirements and descriptions related to Product Management:

### Responsibilities and Activities:
1. **Consultation and Coordination:**
   - Consult with product development personnel on product specifications such as design, color, or packaging.
   - Confer with organizational members to accomplish work activities.
   - Coordinate or participate in promotional activities or trade shows, working with developers, advertisers, or production managers, to market products or services.
   - Coordinate special events or programs.

2. **Sales and Market Analysis:**
   - Use sales forecasting or strategic planning to ensure the sale and profitability of products, lines, or services.
   - Analyze business developments and monitor market trends.
   - Analyze market research data.
   - Analyze forecasting data to improve business decisions.

3. **Contract and Distribution Management:**
   - Negotiate contracts with vendors or distributors to manage prod

In [6]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel 
from typing import List 

class Plan(BaseModel):
    plan: List[List[str]]

parser = PydanticOutputParser(pydantic_object=Plan)
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"plan": {"items": {"items": {"type": "string"}, "type": "array"}, "title": "Plan", "type": "array"}}, "required": ["plan"]}
```


In [29]:
from langchain.prompts import PromptTemplate
from langchain.chains import SequentialChain

prompt_1 = PromptTemplate(
    template='''
    Create a list of {num_keywords} ATS
keywords associated with the given role. The keywords should be as distinct 
and unique as possible, and one element in the list of keyword should not be the extension 
of another keyword. Do not have duplicates and do not repeat any keyword. 
Do not create keyword which are extension of other keywords like - "Brand Strategy", "Brand Strategy Development".
Use the given description for the role to get a better understanding 
of the role.

Role: {role}

Description: {description}
    ''', 
    input_variables=['num_keywords', 'role', 'description']
)

prompt_2 = PromptTemplate(
    template=''' 
    From the given list of ATS keywords and role, cluster 
    the keywords with the exact same or highly similar in context of role meaning in the same list. 
    Cluster all the given keywords, but not duplicates
    and each nested list should represent distinct meaning. 
    For the given list of keywords, create 'Plan' which is a list of 'Item' where 
    each Item represents a list of keywords with exact same meaning. 
    for example, given list [l1, l2, l3, l4] if l1 and l2 have same exact meaning and 
    l3 and l4 also have same exact meaning, the generated list will be Plan: [Item_1: [l1, l2], Item_2: [l3, l4]]. 
    Instead of eliminating the keyword, merge the keywords with same meaning 
    in the same list. If and only if a keyword have a distinct meaning than all the other keywords, it can be alone 
    in the list.
    
    Role: {role}
    
    Keywords: {keywords}
    
    Format Instructions: {format_instructions}
    ''', 
    input_variables=['role', 'keywords'], 
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

# From the given nested list of ATS keywords, with the given role. Make sure that
#     the ATS keywords in a same nested list must have exact same meaning. If the ATS keywords 
#     in an Item does not have same meaning, create a new Item and append it to the Plan. 
#     Use as many keywords as possible
#     and each Item should represent distinct meaning.
#     For example, given nested list - Plan: [Item_1: [l1, l2], Item_2: [l3, l4], Item_3: [l5]] 
#     if l1 and l2 does not have same 
#     meaning, the refined list will be - Plan: [Item_1: [l1], Item_2: [l3, l4], Item_3: [l5], Item_4: [l2]]
#     Instead of eliminating the keyword, merge the keywords with same meaning 
#     in the same list. If and only if a keyword have a distinct meaning than all the other keywords, it can be alone 
#     in the list.

prompt_3 = PromptTemplate(
    template=''' 
    Given the Nested Lists of ATS keywords associated with the given role. Evaluate 
    each list of keywords, if the keyword is not similar in meaning (for the given role) create a new list 
    for that keyword, otherwise leave the list unchanged. Your work is to evaluate each list for their similar 
    meaning, and make changes only if necessary.
    
    The input and output format is same. 
    
    Role: {role}
    
    Keywords: {curated_keywords}
    
    Format Instructions: {format_instructions}
    ''',
    input_variables=['role', 'curated_keywords'],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

chain_1 = LLMChain(llm=llm, prompt=prompt_1, 
                   output_key="keywords")
chain_2 = LLMChain(llm=llm, prompt=prompt_2, 
                   output_key="curated_keywords")
chain_3 = LLMChain(llm=llm, prompt=prompt_3, 
                   output_key="refined_keywords")

overall_chain = SequentialChain(
    chains=[chain_1, chain_2, chain_3], 
    input_variables=['role', 'num_keywords', 'description'], 
    output_variables=['keywords', 'curated_keywords', 'refined_keywords'],
    verbose=True
)

In [30]:
resp_chain = overall_chain({"role": 'product management', "num_keywords": 500, "description": resp})



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [31]:
print(resp_chain['curated_keywords'])

```json
{
  "plan": [
    ["Product Development"],
    ["Market Analysis", "Market Trends", "Market Research"],
    ["Sales Forecasting"],
    ["Strategic Planning", "Business Decisions"],
    ["Business Development"],
    ["Data Analysis", "Data Analysis Methods", "Data Analysis", "Data Analysis", "Data Analysis", "Data Analysis"],
    ["Vendor Negotiation", "Vendor Contracts", "Vendor Management", "Vendor Negotiation", "Vendor Negotiation"],
    ["Distribution Management", "Distribution Networks", "Distribution Networks", "Distribution Networks", "Distribution Networks"],
    ["Regulatory Documentation"],
    ["Project Reporting", "Progress Reporting", "Project Reporting", "Project Reporting", "Project Reporting", "Project Reporting"],
    ["Team Supervision", "Employee Supervision", "Employee Management", "Employee Management", "Employee Management", "Employee Management"],
    ["Customer Service", "Customer Service Principles", "Customer Service", "Customer Service"],
    ["Technic

In [32]:
print(resp_chain['refined_keywords'])

```json
{
  "plan": [
    ["Product Development"],
    ["Market Analysis", "Market Trends", "Market Research"],
    ["Sales Forecasting"],
    ["Strategic Planning", "Business Decisions"],
    ["Business Development"],
    ["Data Analysis", "Data Analysis Methods"],
    ["Vendor Negotiation", "Vendor Contracts", "Vendor Management"],
    ["Distribution Management", "Distribution Networks"],
    ["Regulatory Documentation"],
    ["Project Reporting", "Progress Reporting"],
    ["Team Supervision", "Employee Supervision", "Employee Management"],
    ["Customer Service", "Customer Service Principles"],
    ["Technical Project Management"],
    ["Financial Systems", "Financial Information Systems", "Financial Systems Development"],
    ["Technical Source Data"],
    ["Data Specifications"],
    ["Model Validation"],
    ["Scientific Models"],
    ["Data Requirements"],
    ["Data Quality"],
    ["Statistical Analysis", "Statistical Tests", "Statistical Methods"],
    ["Operational Problems

In [33]:
print(resp_chain['keywords'])

Certainly! Here is a list of 500 distinct and unique ATS keywords associated with the role of Product Management:

1. Product Development
2. Market Analysis
3. Sales Forecasting
4. Strategic Planning
5. Business Development
6. Market Trends
7. Market Research
8. Data Analysis
9. Vendor Negotiation
10. Distribution Management
11. Regulatory Documentation
12. Project Reporting
13. Team Supervision
14. Customer Service
15. Technical Project Management
16. Financial Systems
17. Technical Source Data
18. Data Specifications
19. Data Analysis Methods
20. Model Validation
21. Scientific Models
22. Data Requirements
23. Data Quality
24. Statistical Analysis
25. Operational Problems
26. Product Specifications
27. Design Consultation
28. Packaging Consultation
29. Promotional Activities
30. Trade Shows
31. Special Events
32. Program Coordination
33. Contract Negotiation
34. Distribution Networks
35. Transportation Contracts
36. Logistics Services
37. Operational Documentation
38. Project Plans
3

In [34]:
import json

with open("result-500.json", "w") as file:
    file.write(parser.parse(resp_chain['refined_keywords']).model_dump_json())