In [1]:
##### LLAMAPARSE #####
from llama_parse import LlamaParse
from dotenv import load_dotenv

from groq import Groq
from langchain_groq import ChatGroq

from src.schemas import Topics, Subjetivas

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

import os

import nest_asyncio  # noqa: E402
nest_asyncio.apply()

load_dotenv()

True

In [2]:
from src.utils import generate_questions, generate_profile
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [8]:
import random

from uuid import uuid4
from langchain_core.documents import Document

In [10]:
personalities = ["supportive, calm, and patient", "confident, assertive, and competitive", "creative, imaginative, and innovative", "analytical, logical, and detail-oriented", "enthusiastic, outgoing, and energetic", "organized, efficient, and reliable", "friendly, warm, and approachable", "diplomatic, tactful, and respectful", "adaptable, flexible, and open-minded", "independent, self-motivated, and goal-oriented"]

base_path = "data/curriculos/"

documents = []

for curr in os.listdir(base_path):

    path = base_path+ curr

    questions = generate_questions(path)
    questions = [q[1] for q in questions]
    # print(questions)
    # break
    answer_prompt = [("system", 
                    "You are a candidate for a job position. You have to answer the following questions based on your profile: {questions}"),
                    ("system", f"You are a candidate that is {random.choice(personalities)}"),
                    ("system", "Your answers must depict your personality and skills.")]
    
    answer_template = ChatPromptTemplate(answer_prompt)

    output_parser = StrOutputParser()

    openai = ChatOpenAI(
        api_key=os.getenv("OPENAI_API_KEY"),
        model = "gpt-4o-mini",
        temperature=random.randrange(10, 20)/100,
    )

    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

    chain = answer_template | openai | output_parser

    responses = chain.invoke({"questions": "\n".join(questions)})

    documents.append(Document(page_content = responses, metadata={"source": path}))

print(len(documents))

    
    




Started parsing the file under job_id ad64f9ce-9039-4f1b-b30b-ddb0c7ac53cf
Started parsing the file under job_id 3ba5c501-506e-41d4-a3e6-7bea350d0b96
Started parsing the file under job_id 12ff89ac-108e-4785-8b0b-98ae13a30549
Started parsing the file under job_id 39250661-f2f7-483f-812f-85aa90047997
Started parsing the file under job_id f77b779e-f436-43f2-b670-9c0d18017704
Started parsing the file under job_id c54adc68-b4ef-4570-a35e-eabdc2ce63fc
Started parsing the file under job_id c138e691-cc23-4a5a-85ce-1155570dfa75
Started parsing the file under job_id f17443f5-5866-4566-88b3-7c0c6bd21a16
Started parsing the file under job_id 34972cd3-0b06-4bc4-b38e-83cd7cbcc922
Started parsing the file under job_id 8aee17c1-7f00-4293-af8e-744a81bf085b
Started parsing the file under job_id f6086319-5072-4926-b249-df25228a003f
Started parsing the file under job_id ff2d3633-cc59-4974-b0c9-5a5dba9ab05c
Started parsing the file under job_id fdb3584d-cbd1-420b-b334-85dc22c5c965
Started parsing the file 

In [11]:
%pip install faiss-cpu

Collecting faiss-cpu
  Using cached faiss_cpu-1.9.0-cp310-cp310-win_amd64.whl.metadata (4.5 kB)
Using cached faiss_cpu-1.9.0-cp310-cp310-win_amd64.whl (14.9 MB)
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0
Note: you may need to restart the kernel to use updated packages.


In [12]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['cb634a38-4ce9-4610-b936-c353c243ae97',
 'd5d3b620-7e72-4ec9-b585-765b6b05fd2e',
 'a633e02c-deda-40bf-addc-17fbdbda7f70',
 '25b49986-3977-42f2-aa01-97359b9b1edc',
 'f2b523ae-d00d-4787-8e39-d35307407121',
 '405d0ad3-102c-4bec-a321-c13c783b0e72',
 '42cce91b-7475-4ae5-b62c-ecba62143439',
 '4a3a056c-6d7b-4351-99bb-cbbc482d1142',
 '9fff4f43-2738-4dea-9ec3-887719ab3119',
 'ebe855bd-8e35-4b3c-864b-7484788d6114',
 '6a75555e-b4fc-4514-9344-f1e3a1c62ad4',
 'ce12e713-7712-40f9-8990-927172622749',
 'b65c2c22-1204-449c-b80c-ad04721cdea0',
 'dd3aa272-7b7a-4b4a-9390-08afc9f1d64a',
 'afbf7ccd-a902-4ccb-918e-fe9ba09f3af8',
 '085221a8-b41b-4bad-b58e-e8da7069bc0e',
 '50ab358e-8256-4db9-8002-87e92c6cbaf3',
 '9a1cd42c-ba8d-43fd-928c-7048d28a1ec8',
 '3fbe156b-f15b-4bf7-b3af-5e7f0bac70ba',
 '09a663aa-0a38-47c4-88bf-0178a22ab4ba',
 'f149f496-0908-4473-b85e-2872171e5458',
 '648d220a-4d0d-4adf-9970-ef60a4f30f8a',
 'fd20a949-77cd-4601-9974-61b6f3f57e6a',
 'c3e45e8e-00df-44b4-87ea-b1b3b839fd9e',
 '51a9e284-2a60-

In [13]:
vector_store.save_local("faiss_index")