In [1]:
import pandas as pd
import numpy as np
import json
import os
import openai
from tqdm.notebook import tqdm
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# initialize openai
openai.api_key = os.environ["OPENAI_API_KEY"]

client = openai.OpenAI()

In [3]:
df = pd.read_csv("../data/Resume.csv")
df.shape

(2484, 4)

In [4]:
df.Category.unique()

array(['HR', 'DESIGNER', 'INFORMATION-TECHNOLOGY', 'TEACHER', 'ADVOCATE',
       'BUSINESS-DEVELOPMENT', 'HEALTHCARE', 'FITNESS', 'AGRICULTURE',
       'BPO', 'SALES', 'CONSULTANT', 'DIGITAL-MEDIA', 'AUTOMOBILE',
       'CHEF', 'FINANCE', 'APPAREL', 'ENGINEERING', 'ACCOUNTANT',
       'CONSTRUCTION', 'PUBLIC-RELATIONS', 'BANKING', 'ARTS', 'AVIATION'],
      dtype=object)

In [6]:
df.loc[df['Category']=='CHEF']

Unnamed: 0,ID,Resume_str,Resume_html,Category
1357,15180322,CHEF Career Focus I am a nu...,"<div class=""fontsize fontface vmargins hmargin...",CHEF
1358,24221960,CHEF Summary Customer-o...,"<div class=""fontsize fontface vmargins hmargin...",CHEF
1359,24673903,CHEF Career Overview De...,"<div class=""fontsize fontface vmargins hmargin...",CHEF
1360,19007667,CHEF Summary Experienced ca...,"<div class=""fontsize fontface vmargins hmargin...",CHEF
1361,30311202,RM Roxanne Mejia Summary ...,"<div class=""RNA skn-mng3 fontsize fontface vma...",CHEF
...,...,...,...,...
1470,15354126,OWNER/CHEF Summary Chef wit...,"<div class=""fontsize fontface vmargins hmargin...",CHEF
1471,12155206,KINDERGARTEN TEACHER Professi...,"<div class=""fontsize fontface vmargins hmargin...",CHEF
1472,27298953,SUBSTITUTE PARA PROFESSIONAL Su...,"<div class=""fontsize fontface vmargins hmargin...",CHEF
1473,68338341,EXECUTIVE CHEF-PARTNER Summ...,"<div class=""fontsize fontface vmargins hmargin...",CHEF


In [7]:
with open("../data/resume_info_extracted.json", 'r') as file:
    data = json.load(file)

with open("../data/resume_info_extracted_emb.json", 'r') as file:
    emb_data = json.load(file)

In [8]:
data[0]

{'skills': ['CPR/AED Certified', 'CPI Certified', 'THSCA Member'],
 'work experience (years)': '7',
 'summary': ['Worked as Fitness Consultant from 09/2013 to 03/2014, doing tours, wellness advice, outside marketing, and calls to prospective members.',
  'Worked as DAEP/Special Education/Coach from 08/2012 to 05/2013, doing teaching in DAEP, working with Autism and Down Syndrome children in Special Education, and coaching Football, Basketball, and Strength and Conditioning programs.',
  'Worked as PE/Special Education/Coach from 08/2009 to 05/2012, doing teaching in Middle School PE and Content Mastery programs, working with Special Education at various levels, and coaching Football, Basketball, Baseball, and Track & Field.',
  'Worked as Sales from 07/2014 to Current, doing business growth, maintaining detailed records of jobs and potential clients, and maintaining client relations.'],
 'ID': '17576030',
 'title': 'FITNESS CONSULTANT'}

In [9]:
emb_data[0]['ID']

'17576030'

### Connect to Pinecone

In [10]:
from pinecone import Pinecone

pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index = pc.Index("llm-study")

In [11]:
pc.describe_index("llm-study")

{'dimension': 1536,
 'host': 'llm-study-etttk3j.svc.aped-4627-b74a.pinecone.io',
 'metric': 'cosine',
 'name': 'llm-study',
 'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
 'status': {'ready': True, 'state': 'Ready'}}

### Upset data

In [12]:
data[0]

{'skills': ['CPR/AED Certified', 'CPI Certified', 'THSCA Member'],
 'work experience (years)': '7',
 'summary': ['Worked as Fitness Consultant from 09/2013 to 03/2014, doing tours, wellness advice, outside marketing, and calls to prospective members.',
  'Worked as DAEP/Special Education/Coach from 08/2012 to 05/2013, doing teaching in DAEP, working with Autism and Down Syndrome children in Special Education, and coaching Football, Basketball, and Strength and Conditioning programs.',
  'Worked as PE/Special Education/Coach from 08/2009 to 05/2012, doing teaching in Middle School PE and Content Mastery programs, working with Special Education at various levels, and coaching Football, Basketball, Baseball, and Track & Field.',
  'Worked as Sales from 07/2014 to Current, doing business growth, maintaining detailed records of jobs and potential clients, and maintaining client relations.'],
 'ID': '17576030',
 'title': 'FITNESS CONSULTANT'}

- 필요한 데이터:
    - skills & work summary
- 문제점:
    - pinecone에 저장을 하기 위해서는 한 row당 index 하나 밖에 만들지 못 함
- 해결 방법:
    - meta data에 field를 추가하여 이력서 ID 등과 같은 데이터를 저장
- 기존 dataframe을 사용할 때와 다른 점 :
    - 각 data point가 하나의 embedding vector가 되어야 함

Desired input format : 
```json
work_experience = {
    "id" : 1234-work,
    "values" : [0.23432, 0.22149, ...],
    "metadata" : {
        "summary" : "Worked as a head chef in ...",
        "ID" : 1234
    }
}

skill = {
    "id" : 1234-skill,
    "values" : [0.92372, 0.678234, ...],
    "metadata" : {
        "summary" : "Cooking",
        "ID" : 1234
    }
}

```

#### Indexing
- Custom Index

In [13]:
data[0]

{'skills': ['CPR/AED Certified', 'CPI Certified', 'THSCA Member'],
 'work experience (years)': '7',
 'summary': ['Worked as Fitness Consultant from 09/2013 to 03/2014, doing tours, wellness advice, outside marketing, and calls to prospective members.',
  'Worked as DAEP/Special Education/Coach from 08/2012 to 05/2013, doing teaching in DAEP, working with Autism and Down Syndrome children in Special Education, and coaching Football, Basketball, and Strength and Conditioning programs.',
  'Worked as PE/Special Education/Coach from 08/2009 to 05/2012, doing teaching in Middle School PE and Content Mastery programs, working with Special Education at various levels, and coaching Football, Basketball, Baseball, and Track & Field.',
  'Worked as Sales from 07/2014 to Current, doing business growth, maintaining detailed records of jobs and potential clients, and maintaining client relations.'],
 'ID': '17576030',
 'title': 'FITNESS CONSULTANT'}

In [14]:
exp_vectors = list()
skill_vectors = list()

for d, emb in zip(data, emb_data):
    assert d['ID']==emb['ID'], "You should check your embeddings again"

    for i, summary_emb in enumerate(emb['summary']):
        value = {
                "id" : str(d['ID']) + "-work" + str(i),
                "values" : summary_emb,
                "metadata" : {'summary' : d['summary'][i],
                            'ID':d['ID']},
                }
        exp_vectors.append(value)

    for i, skill_emb in enumerate(emb['skills']):
        value = {
                "id" : str(d['ID']) + "-skill" + str(i),
                "values" : skill_emb,
                "metadata" : {"skill" : d['skills'][i],
                              "ID":d['ID']}
                }
        skill_vectors.append(value)

In [15]:
def create_batches(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

index upsert시 확인 필요 - [Pinecone Quotas and limits](https://docs.pinecone.io/reference/quotas-and-limits)

Batch upsert

In [16]:
exp_batches = list(create_batches(exp_vectors, 50))

for batch in tqdm(exp_batches):
    index.upsert(vectors=batch,
                 namespace="work_exp")

  0%|          | 0/23 [00:00<?, ?it/s]

In [17]:
len(exp_vectors)

1122

In [18]:
skill_batches = list(create_batches(skill_vectors, 50))

for batch in tqdm(skill_batches):
    index.upsert(vectors=batch,
                 namespace="skill")

  0%|          | 0/108 [00:00<?, ?it/s]

In [19]:
# index.delete(delete_all=True, namespace='work_exp')
# index.delete(delete_all=True, namespace='skill')

---

### Search & retrieval (test)

In [20]:
from text_utils import create_embeddings

현재 예시에 hybrid search가 적합하지 않은 이유 : 
- 우리의 'skills'들이 사전에 정해진 단어들로만 이루어져 있으면 가능하겠지만, 그렇지 않은 상황
- 사용자의 input을 통해 얻는 query가 우리가 갖고 있는 skill의 리스트에 국한되지 않음
- hybrid search에 많이 쓰이는, ranking function 중 하나인 BM25는 training 과정에서 보지 못 한 단어는 처리하지 못함 (sparse vector 상으로 표현 X)

hybrid search가 적합한 경우:
- Skill의 list를 활용하여 후보군들을 선정
- 만약 이력서 데이터셋에 있는 skill들의 variation이 정해져 있덨다면 아주 적합
- 전문 용어들을 활용하여 document search를 하는 경우 (semantic representation으로 나타내기 어려운 전문용어의 경우에도 활용 가능)

In [21]:
data[0]

{'skills': ['CPR/AED Certified', 'CPI Certified', 'THSCA Member'],
 'work experience (years)': '7',
 'summary': ['Worked as Fitness Consultant from 09/2013 to 03/2014, doing tours, wellness advice, outside marketing, and calls to prospective members.',
  'Worked as DAEP/Special Education/Coach from 08/2012 to 05/2013, doing teaching in DAEP, working with Autism and Down Syndrome children in Special Education, and coaching Football, Basketball, and Strength and Conditioning programs.',
  'Worked as PE/Special Education/Coach from 08/2009 to 05/2012, doing teaching in Middle School PE and Content Mastery programs, working with Special Education at various levels, and coaching Football, Basketball, Baseball, and Track & Field.',
  'Worked as Sales from 07/2014 to Current, doing business growth, maintaining detailed records of jobs and potential clients, and maintaining client relations.'],
 'ID': '17576030',
 'title': 'FITNESS CONSULTANT'}

In [22]:
search_skill = data[0]['skills'][0]
search_emb = create_embeddings(search_skill)
print(search_skill)

CPR/AED Certified


In [23]:
index.query(
    top_k=50,
    vector=search_emb,
    namespace='skill',
    include_metadata=True
    )

{'matches': [{'id': '17163375-skill2',
              'metadata': {'ID': '17163375', 'skill': 'CPR/AED Certified'},
              'score': 0.999999464,
              'values': []},
             {'id': '70603826-skill2',
              'metadata': {'ID': '70603826', 'skill': 'CPR/AED Certified'},
              'score': 0.999999464,
              'values': []},
             {'id': '32636041-skill15',
              'metadata': {'ID': '32636041', 'skill': 'CPR/AED Certified'},
              'score': 0.999999464,
              'values': []},
             {'id': '17576030-skill0',
              'metadata': {'ID': '17576030', 'skill': 'CPR/AED Certified'},
              'score': 0.999999464,
              'values': []},
             {'id': '63282405-skill14',
              'metadata': {'ID': '63282405', 'skill': 'CPR Certified'},
              'score': 0.904459953,
              'values': []},
             {'id': '20457611-skill11',
              'metadata': {'ID': '20457611', 'skill': 'CPR Cer

---

### Search & retrieval + postprocessing

In [24]:
data[0]

{'skills': ['CPR/AED Certified', 'CPI Certified', 'THSCA Member'],
 'work experience (years)': '7',
 'summary': ['Worked as Fitness Consultant from 09/2013 to 03/2014, doing tours, wellness advice, outside marketing, and calls to prospective members.',
  'Worked as DAEP/Special Education/Coach from 08/2012 to 05/2013, doing teaching in DAEP, working with Autism and Down Syndrome children in Special Education, and coaching Football, Basketball, and Strength and Conditioning programs.',
  'Worked as PE/Special Education/Coach from 08/2009 to 05/2012, doing teaching in Middle School PE and Content Mastery programs, working with Special Education at various levels, and coaching Football, Basketball, Baseball, and Track & Field.',
  'Worked as Sales from 07/2014 to Current, doing business growth, maintaining detailed records of jobs and potential clients, and maintaining client relations.'],
 'ID': '17576030',
 'title': 'FITNESS CONSULTANT'}

In [25]:
skills = ['Menu Development', 'Catering', 'Inventory Management']
exp = 'As the Executive Chef at Le Gourmet Quatre, a Michelin-starred fine dining restaurant, I led a team of 20 chefs in developing innovative French-Asian fusion menus, while also managing kitchen operations efficiently to uphold the highest standards of food safety and cost control'

#### search & retrieval
- Top-K retrieval
- Metadata replacement

In [26]:
emb_skills = create_embeddings(skills)
emb_exp = create_embeddings(exp)[0]

In [27]:
def search_vdb(vdb_index, query_emb, top_k, namespace):
    output = vdb_index.query(
        namespace=namespace,
        top_k=top_k,
        vector=query_emb,
        include_metadata=True
    )

    return output['matches']

In [28]:
# search
skill_outputs = {s:search_vdb(index, i, 10, 'skill') for s,i in zip(skills, emb_skills)}
exp_outputs = search_vdb(index, emb_exp, 10, 'work_exp')

In [29]:
skill_outputs.keys()

dict_keys(['Menu Development', 'Catering', 'Inventory Management'])

In [30]:
skill_outputs['Catering']

[{'id': '34452806-skill15',
  'metadata': {'ID': '34452806', 'skill': 'Catering'},
  'score': 0.999999583,
  'values': []},
 {'id': '25128608-skill17',
  'metadata': {'ID': '25128608', 'skill': 'Catering'},
  'score': 0.999999583,
  'values': []},
 {'id': '29775391-skill13',
  'metadata': {'ID': '29775391', 'skill': 'Catering'},
  'score': 0.999999583,
  'values': []},
 {'id': '21611637-skill11',
  'metadata': {'ID': '21611637', 'skill': 'catering'},
  'score': 0.928126335,
  'values': []},
 {'id': '37231163-skill5',
  'metadata': {'ID': '37231163', 'skill': 'catering style'},
  'score': 0.76884979,
  'values': []},
 {'id': '34452806-skill39',
  'metadata': {'ID': '34452806', 'skill': 'Private catering'},
  'score': 0.749412596,
  'values': []},
 {'id': '29449419-skill26',
  'metadata': {'ID': '29449419', 'skill': 'Strong Catering abilities'},
  'score': 0.745458901,
  'values': []},
 {'id': '65373280-skill1',
  'metadata': {'ID': '65373280', 'skill': 'Banquets and catering'},
  'score

In [31]:
exp_outputs

[{'id': '19285236-work0',
  'metadata': {'ID': '19285236',
               'summary': 'Worked as Executive Chef from February 2014, '
                          'overseeing kitchen and Café activities, managing a '
                          'significant budget, and leading a staff through peak '
                          'seasons, achieving improved service and safety '
                          'standards.'},
  'score': 0.660633683,
  'values': []},
 {'id': '13212436-work2',
  'metadata': {'ID': '13212436',
               'summary': 'Worked as Executive Chef/Director of Culinary from '
                          '01/2015 to 03/2018, focusing on high quality, '
                          'innovative food so clients and customers feel like '
                          'they are not in a convention center atmosphere, '
                          'overseeing 100+ employees and 6 chefs daily.'},
  'score': 0.657844484,
  'values': []},
 {'id': '22561438-work0',
  'metadata': {'ID': '22561438',
 

#### postprocessing
- Custom processing

In [32]:
skill_df = pd.DataFrame()

for k,v in skill_outputs.items():
    # score thresholding
    v = [i for i in v if i['score']>0.5]
    # top3개만 가져옴. 필요한 정보는 모두 metadata에 저장되어 있음
    v = [i['metadata'] for i in v][:3]
    # convert to dataframe (다루기 쉽도록)
    v_df = pd.DataFrame(v)
    v_df['query_skill'] = k
    # concat
    skill_df = pd.concat([skill_df, v_df], axis=0)

In [33]:
skill_df

Unnamed: 0,ID,skill,query_skill
0,19285236,Menu Development,Menu Development
1,13095891,Menu Development,Menu Development
2,28092317,Menu Development,Menu Development
0,34452806,Catering,Catering
1,25128608,Catering,Catering
2,29775391,Catering,Catering
0,19285236,Inventory Management,Inventory Management
1,10333299,Inventory Management,Inventory Management
2,35468363,Inventory Management,Inventory Management


In [34]:
exp_outputs = [i for i in exp_outputs if i['score']>0.5]
exp_outputs = [i['metadata'] for i in exp_outputs][:3]

In [35]:
exp_outputs

[{'ID': '19285236',
  'summary': 'Worked as Executive Chef from February 2014, overseeing kitchen and Café activities, managing a significant budget, and leading a staff through peak seasons, achieving improved service and safety standards.'},
 {'ID': '13212436',
  'summary': 'Worked as Executive Chef/Director of Culinary from 01/2015 to 03/2018, focusing on high quality, innovative food so clients and customers feel like they are not in a convention center atmosphere, overseeing 100+ employees and 6 chefs daily.'},
 {'ID': '22561438',
  'summary': 'Worked as Chef De Cuisine from 04/2016 to Current, doing seasonal menu development, maintaining food quality with low cost, managing inventory and ordering, hiring and firing, and training new employees to uphold standards.'}]

In [36]:
pd.DataFrame(exp_outputs)

Unnamed: 0,ID,summary
0,19285236,"Worked as Executive Chef from February 2014, o..."
1,13212436,Worked as Executive Chef/Director of Culinary ...
2,22561438,Worked as Chef De Cuisine from 04/2016 to Curr...


하나의 function으로 변환

In [37]:
def search(index, skills, exp, top_k=10, threshold=0.5):
    emb_skills = create_embeddings(skills)
    emb_exp = create_embeddings(exp)[0]

    skill_outputs = {s:search_vdb(index, i, top_k, 'skill') for s,i in zip(skills, emb_skills)}
    exp_outputs = search_vdb(index, emb_exp, top_k, 'work_exp')
    
    skill_df = pd.DataFrame()

    for k,v in skill_outputs.items():
        # score thresholding
        v = [i for i in v if i['score']>threshold]
        # top3개만 가져옴. 필요한 정보는 모두 metadata에 저장되어 있음
        v = [i['metadata'] for i in v][:3]
        # convert to dataframe (다루기 쉽도록)
        v_df = pd.DataFrame(v)
        v_df['query_skill'] = k
        # concat
        skill_df = pd.concat([skill_df, v_df], axis=0)

    exp_outputs = [i for i in exp_outputs if i['score']>threshold]
    exp_outputs = [i['metadata'] for i in exp_outputs][:3]
    exp_df = pd.DataFrame(exp_outputs)

    return skill_df, exp_df

In [38]:
s, e = search(index, skills, exp)

In [39]:
s

Unnamed: 0,ID,skill,query_skill
0,19285236,Menu Development,Menu Development
1,13095891,Menu Development,Menu Development
2,28092317,Menu Development,Menu Development
0,34452806,Catering,Catering
1,25128608,Catering,Catering
2,29775391,Catering,Catering
0,19285236,Inventory Management,Inventory Management
1,10333299,Inventory Management,Inventory Management
2,35468363,Inventory Management,Inventory Management


In [40]:
e

Unnamed: 0,ID,summary
0,19285236,"Worked as Executive Chef from February 2014, o..."
1,13212436,Worked as Executive Chef/Director of Culinary ...
2,22561438,Worked as Chef De Cuisine from 04/2016 to Curr...


---

### Query transformation
- Custom transformation
- Sub query generation
- Query rewriting

In [41]:
from text_utils import normal_chat_completion

In [42]:
skills = ['Menu Development', 'Catering', 'Inventory Management']
exp = 'As the Executive Chef at Le Gourmet Quatre, a Michelin-starred fine dining restaurant, I led a team of 20 chefs in developing innovative French-Asian fusion menus, while also managing kitchen operations efficiently to uphold the highest standards of food safety and cost control'

1. Query rewriting
- 사용자의 쿼리를 rewrite하여 search에 최적화된 형태로 변형

In [43]:
rewriting_prompt = """Convert the [user input] into a format as if it was written in a resume.
Example: 
    - [user input] : 내가 벤치 프레스, 스쿼트, 그리고 데드리프트를 총 500kg을 들 수 있는데 도와줄 수 있는 사람을 추천해줘.
    - [output] : Worked as a personal trainer, helping people to achieve their personal fitness goals in various fields such as weight lifting and losing weight.
    - [skills] : [Strength training, weight lifting, coathing, anatomy]
23
Desired output format:
    - json format with 'output' and 'skills'
    - the value of 'output' should be a sentence string in a format of 'Worked as a <job title>, <job description>'
    - the value of 'skills' should be a list of 5 strings. Each element in a list should be a realistic skill that matches with the job description
    
The [user input] : {}
[output] : 
"""

In [44]:
input = "3대 500을 할 수 있도록 도와줄 수 있는 사람을 찾아줘"

a = normal_chat_completion(rewriting_prompt.format(input))

In [45]:
json.loads(a.choices[0].message.content)

{'output': 'Worked as a strength and conditioning coach, specializing in coaching individuals to achieve their powerlifting goals, particularly in bench press, squat, and deadlift with a total lift of 500kg.',
 'skills': ['Powerlifting coaching',
  'Strength training',
  'Fitness assessment',
  'Nutritional guidance',
  'Motivational coaching']}

2. Breakdown multiple requests
- 사용자의 쿼리 내에 여러개의 요구사항이 있다면, 해당 요구사항들을 개별적으로 분리
- 분리된 요구사항들을 개별적으로 처리

In [46]:
sub_query_prompt = """The user will request a talent recommendation.
If the user's request contains mentions of multiple talent recommendation,
divide them into a separate but full sentences.

Example 1 :
    - [user input] : 내가 벤치 프레스, 스쿼트, 그리고 데드리프트를 총 500kg을 들 수 있는데 도와줄 수 있는 사람을 추천해주고, 그에 맞는 식단을 만들어줄 수 있는 사람도 추천해줘.
    - [output] : [내가 벤치 프레스, 스쿼트, 그리고 데드리프트를 총 500kg을 들 수 있는데 도와줄 수 있는 사람을 추천해줘, 
                          내가 벤치 프레스, 스쿼트, 그리고 데드리프트를 총 500kg을 드는데 도움을 줄 식단을 만들어 줄 수 있는 사람을 추천해줘]

If the user's request only mentions of one talent recommendation,
provide the exact same input as output.

Example 2 :
    - [user input] : 내가 벤치 프레스, 스쿼트, 그리고 데드리프트를 총 500kg을 들 수 있는데 도와줄 수 있는 사람을 추천해줘
    - [output] : [내가 벤치 프레스, 스쿼트, 그리고 데드리프트를 총 500kg을 들 수 있는데 도와줄 수 있는 사람을 추천해줘]

If the user's request does not mention clear descriptions about each talent,
provide 'False' as output.

Example 3 :
    - [user input] : 내 이사를 도와줄 사람 5명을 추천해줘
    - [output] : [False]

Desired output format :
    - json format with 'original_input' and 'output' as keys.
    - the 'original_input' should be the input I provide you.
    - the 'output' is the rewritten input by you.

[user input] : {}
"""

In [47]:
input = "파인다이닝 음식점에서 음식을 구상할 수 있는 사람 한 명, 그리고 그 음식을 서빙할 수 있는 사람을 추천해줘"

a = normal_chat_completion(sub_query_prompt.format(input))

json.loads(a.choices[0].message.content)

{'original_input': '파인다이닝 음식점에서 음식을 구상할 수 있는 사람 한 명, 그리고 그 음식을 서빙할 수 있는 사람을 추천해줘',
 'output': ['파인다이닝 음식점에서 음식을 구상할 수 있는 사람 한 명을 추천해줘',
  '파인다이닝 음식점에서 음식을 서빙할 수 있는 사람을 추천해줘']}

In [48]:
input = "내가 이번에 음식점을 오픈하는데, 거기에 들어갈 인원 5명을 추천해줘"

a = normal_chat_completion(sub_query_prompt.format(input))

json.loads(a.choices[0].message.content)

{'original_input': '내가 이번에 음식점을 오픈하는데, 거기에 들어갈 인원 5명을 추천해줘', 'output': [False]}

In [49]:
def query_transformation(input, sub_query_prompt, rewriting_prompt):
    transformed_query = normal_chat_completion(sub_query_prompt.format(input))
    transform_output = json.loads(transformed_query.choices[0].message.content)['output']
    
    if isinstance(transform_output, list):
        if transform_output[0]==False:
            return "조금 더 구체적으로 인풋을 작성해주세요"
        else:
            search_queries = [normal_chat_completion(rewriting_prompt.format(output)) for output in transform_output]
            search_queries = [json.loads(i.choices[0].message.content) for i in search_queries]
            return search_queries
    else:
        search_queries = [normal_chat_completion(rewriting_prompt.format(transform_output))]
        search_queries = [json.loads(search_queries.choices[0].message.content)]
        return search_queries

In [52]:
# 정보 부족시 chat 생성 example
input = "내가 이번에 음식점을 오픈하는데, 거기에 들어갈 인원 5명을 추천해줘"

query_transformation(input, sub_query_prompt, rewriting_prompt)

'조금 더 구체적으로 인풋을 작성해주세요'

In [51]:
input = "내 사업을 성공시켜줄 사람을 추천해줘"

query_transformation(input, sub_query_prompt, rewriting_prompt)

[{'output': 'Worked as a Business Development Manager, focusing on strategizing and executing plans to drive business success and growth.',
  'skills': ['Strategic planning',
   'Market research and analysis',
   'Financial modeling',
   'Leadership and team management',
   'Sales and marketing strategies']}]

---

In [None]:
input = "파인다이닝 음식점에서 음식을 구상할 수 있는 사람 한 명, 그리고 그 음식을 서빙할 수 있는 사람을 추천해줘"

queries = query_transformation(input, sub_query_prompt, rewriting_prompt)

In [None]:
queries

In [None]:
retrieved = [search(index, query['skills'], query['output']) for query in queries]

In [None]:
retrieved[0]

In [None]:
input = "보디빌딩 대회에서 우승을 할 수 있게끔 도와줄 수 있는 사람을 추천해줘"

queries = query_transformation(input, sub_query_prompt, rewriting_prompt)
retrieved = [search(index, query['skills'], query['output']) for query in queries]

In [None]:
retrieved[0][0]

In [None]:
retrieved[0][1]