In [15]:
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import PyPDFLoader

In [16]:
loader = PyPDFLoader(file_path="/Users/dloader/Documents/GitHub/lazy-cv/references/Dylan-Loader-Resume-October-2023.pdf")

In [17]:
data = loader.load()

In [18]:
print(data)

[Document(page_content='\tDylan Loader DylanLoader@gmail.com | (780) 293-1570 | Edmonton, AB | linkedin.com/in/Dylan-loader/  SKILLS  Data Science and Analytics: Applied various data extraction and cleaning approaches to real-world financial time-series data, including transaction and financial asset data Supervised Machine Learning: Designed logistic regression algorithms for classification and tree-based models for classification and regression Unsupervised Machine Learning: Designed Euclidean and Density-Based clustering algorithms as preprocessing work for downstream tasks Programming and Model Construction: Utilized Git platforms such as GitLab and GitHub to facilitate collaboration with team members for code tracking and documentation, Erwin Data Modeller Communication: Wrote documentation for project tracking and presented results to many internal and external stakeholders across various organizations  EDUCATION   University of Calgary                                         Apr

In [19]:
text_chunk = RecursiveCharacterTextSplitter(chunk_size=10000, 
                                            chunk_overlap=20).split_documents(data)

In [20]:
len(text_chunk)

1

In [21]:
text_chunk[0]

Document(page_content='Dylan Loader DylanLoader@gmail.com | (780) 293-1570 | Edmonton, AB | linkedin.com/in/Dylan-loader/  SKILLS  Data Science and Analytics: Applied various data extraction and cleaning approaches to real-world financial time-series data, including transaction and financial asset data Supervised Machine Learning: Designed logistic regression algorithms for classification and tree-based models for classification and regression Unsupervised Machine Learning: Designed Euclidean and Density-Based clustering algorithms as preprocessing work for downstream tasks Programming and Model Construction: Utilized Git platforms such as GitLab and GitHub to facilitate collaboration with team members for code tracking and documentation, Erwin Data Modeller Communication: Wrote documentation for project tracking and presented results to many internal and external stakeholders across various organizations  EDUCATION   University of Calgary                                         April 

In [22]:
# Setup the sentence transformer emb
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [23]:
# Create an embedding store
vector_store = FAISS.from_documents(text_chunk, embedding=embeddings)

In [24]:
vector_store

<langchain.vectorstores.faiss.FAISS at 0x2d0ac5290>

In [31]:
model_path = "../ollama/text-generation-webui/models/mistral-11b-omnimix-bf16.Q5_K_M.gguf"

In [37]:
# import model 
llm = LlamaCpp(
    streaming=True, 
    model_path=model_path,
    temperature=0.7,
    top_p=1,
    n_ctx=4096,
    verbose=True, 
)

llama_model_loader: loaded meta data with 20 key-value pairs and 435 tensors from ../ollama/text-generation-webui/models/mistral-11b-omnimix-bf16.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 14336,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_outpu

In [38]:
from langchain import document_loaders


In [39]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever(search_kwargs={"k": 2}))

In [45]:
query = "Based on my last jobs what are 3 jobs I should apply for next?"
returned_q = qa.run(query)

Llama.generate: prefix-match hit

llama_print_timings:        load time =  3954.02 ms
llama_print_timings:      sample time =    58.48 ms /    87 runs   (    0.67 ms per token,  1487.56 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =  7800.63 ms /    87 runs   (   89.66 ms per token,    11.15 tokens per second)
llama_print_timings:       total time =  7960.48 ms


In [47]:
print(returned_q)

 Here are some job titles you may want to consider based on your experience and skills:
1. Data Scientist
2. Machine Learning Engineer
3. Data Analyst

These roles align well with your background in data science, programming, machine learning, and analytics, as well as your experience using statistical software such as R and Python. Additionally, your project management and communication skills would be valuable in these positions.


In [None]:
if generate_cover_letter:
    # Prompts
    pre_prompt = "You are a helpful assistant. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
    # Create a prompt for LLM: Include user inputs, and job description in the prompt
    prompt = f"The job description is: {prompt_input}\n"
    prompt += f"The candidate's name to include on the cover letter: {user_name}\n"
    prompt += f"The job title/role: {role}\n"
    prompt += f"The hiring manager is: {manager}\n"
    prompt += f"How I heard about the opportunity: {referral}\n."
    prompt += "Generate a cover letter"
    # Generate LLM response
    with st.spinner("Generating response"):
        response = replicate.run(
            'a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5',  # Llama 2 model
            input={
                "prompt": f"{pre_prompt} {prompt} Assistant:",
                "temperature": temp,
            }
        )
        # Extract and display the LLM-generated cover letter
        generated_cover_letter = " ".join([item for item in response])
    
    st.subheader("Generated Cover Letter:")
    st.write(generated_cover_letter)
    # Offer a download link for the generated cover letter
    st.subheader("Download Generated Cover Letter:")
    st.download_button("Download Cover Letter as TXT", generated_cover_letter, key="cover_letter")