In [1]:
from dotenv import load_dotenv
from pathlib import Path
import os

from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain_community.document_loaders import PyPDFLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoTokenizer

In [2]:
# Get environment variables
dotenv_path = Path('./.env')
load_dotenv(dotenv_path=dotenv_path)

os.environ["NEO4J_URI"] = os.getenv('uri')
os.environ["NEO4J_USERNAME"] = os.getenv('user_name')
os.environ["NEO4J_PASSWORD"] = os.getenv('password')
os.environ["GROQ_API_KEY"] = os.getenv('GROQ_API_KEY')

#### Loading a PDF

In [None]:
# # Load a sample PDF
# loader = PyPDFLoader(file_path="Project Proposal - Career Guide and Recommender System.pdf")
# pages = loader.load_and_split()
# raw_pages = [pages[i].page_content for i in range(len(pages))]

#### Loading an Embedding Model

In [None]:
# from langchain_huggingface import HuggingFaceEmbeddings

# # Load an embedding model to create embeddings
# embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# # Load tokenizer
# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# def bert_len(text):
#     tokens = tokenizer.encode(text)
#     return len(tokens)

#### Loading and using Text Splitter

In [None]:
# # Split document into chunks
# text_splitter = RecursiveCharacterTextSplitter(
#           chunk_size = 200,
#           chunk_overlap  = 20,
#           length_function = bert_len,
#           separators=['\n\n', '\n', ' ', ''],
#       )

# documents = text_splitter.create_documents(raw_pages)

In [None]:
# # Store vectorized documents in Neo4j database as vectors
# neo4j_vector = Neo4jVector.from_documents( documents, embedding_model )

In [None]:
# # Perform a vector similarity search and retrieve the top 2 most similar documents
# query = "Who are the authors of this paper?"

# vector_results = neo4j_vector.similarity_search(query, k=2)

# for i, res in enumerate(vector_results):
#     print(res.page_content)
#     if i != len(vector_results)-1:
#         print()

# vector_result = vector_results[0].page_content

#### Testing GraphCypherQAChain

In [15]:
### Query Knowledge Graph
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph
from langchain_groq import ChatGroq

# Connect to graph
graph = Neo4jGraph()

# Create LLM instance
llm = ChatGroq(temperature=0, groq_api_key=os.environ["GROQ_API_KEY"], model_name="llama3-8b-8192")

graph_schema = f"Context of the graph's schema: {graph.schema}"
personality_traits = graph.query("MATCH (n:Personality_Trait) return n")
personality_traits = [title['n']['title'] for title in personality_traits]

label_and_prop = f"Context of the Label and Properties of nodes: {graph.get_structured_schema['node_props']}"
rel = f"Context of the properties of the Relationships between Nodes: {graph.get_structured_schema['rel_props']}"
rel_and_prop = f"Context of the Relationships between Nodes{graph.get_structured_schema['relationships']}"

In [20]:
prompt = f"""Your role is to recommend career paths for me. You have access to a Neo4j Graph Database. Here is the graph's schema: {graph_schema} and here the Personality_Trait titles that you can use: {personality_traits} and not that job_zone resembles the level of experience required for the job from 1 till 5 as integers. Use the graph to supplement your answer. Here are some details about myself: I love people and I am a good listener. I enjoy observation and analysis. what careers would you recommend?"""

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True, validate_cypher=True)
response = chain.invoke({"query": prompt})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH p=(o:Occupation)-[:need_for_personality_trait]->(pt:Personality_Trait) WHERE pt.title IN ['Social', 'Artistic'] OR pt.title IN ['Realistic', 'Investigative'] RETURN o.title AS Career, o.job_zone AS Job_Zone ORDER BY o.job_zone ASC;[0m
Full Context:
[32;1m[1;3m[{'Career': 'Career/Technical_Education_Teachers,_Middle_School', 'Job_Zone': None}, {'Career': 'Choreographers', 'Job_Zone': None}, {'Career': 'Broadcast_Announcers_and_Radio_Disc_Jockeys', 'Job_Zone': None}, {'Career': 'Dancers', 'Job_Zone': None}, {'Career': 'Actors', 'Job_Zone': None}, {'Career': 'Librarians_and_Media_Collections_Specialists', 'Job_Zone': None}, {'Career': 'Loan_Officers', 'Job_Zone': None}, {'Career': 'Interpreters_and_Translators', 'Job_Zone': None}, {'Career': 'Credit_Counselors', 'Job_Zone': None}, {'Career': 'New_Accounts_Clerks', 'Job_Zone': None}][0m

[1m> Finished chain.[0m


In [21]:
print(response['result'])

Based on your personality, I would recommend the following careers: Broadcast Announcers and Radio Disc Jockeys, Librarians and Media Collections Specialists, and Interpreters and Translators.


In [14]:
graph.query("MATCH (o:Occupation)-[:need_for_personality_trait]->(pt:Personality_Trait) WHERE pt.title IN ['Social', 'Artistic'] RETURN o.title AS CareerPath")

[{'CareerPath': 'Career/Technical_Education_Teachers,_Middle_School'},
 {'CareerPath': 'Choreographers'},
 {'CareerPath': 'Broadcast_Announcers_and_Radio_Disc_Jockeys'},
 {'CareerPath': 'Dancers'},
 {'CareerPath': 'Actors'},
 {'CareerPath': 'Librarians_and_Media_Collections_Specialists'},
 {'CareerPath': 'Loan_Officers'},
 {'CareerPath': 'Interpreters_and_Translators'},
 {'CareerPath': 'Credit_Counselors'},
 {'CareerPath': 'New_Accounts_Clerks'},
 {'CareerPath': 'Morticians,_Undertakers,_and_Funeral_Arrangers'},
 {'CareerPath': 'Medical_Transcriptionists'},
 {'CareerPath': 'Medical_Assistants'},
 {'CareerPath': 'Loan_Interviewers_and_Clerks'},
 {'CareerPath': 'Library_Technicians'},
 {'CareerPath': 'Interviewers,_Except_Eligibility_and_Loan'},
 {'CareerPath': 'Human_Resources_Assistants,_Except_Payroll_and_Timekeeping'},
 {'CareerPath': 'Executive_Secretaries_and_Executive_Administrative_Assistants'},
 {'CareerPath': 'Eligibility_Interviewers,_Government_Programs'},
 {'CareerPath': 'De

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_core.pydantic_v1 import BaseModel
from graph_functions import connect_to_database

class QueryKnowledgeGraph(BaseModel):
    f'''The contains details about Occupations and their related demands. This is the schema of the graph as context: {label_and_prop, rel, rel_and_prop}. In your cypher query do not use any words that are not found in the schema. Use the graphs relations, labels, and properties to supplement your answer. Return an answer to the question, appraoch you used, the node_id that you referred to, and the query you wrote.'''

    answer: str
    approach_used: str
    node_id: str
    cypher_query: str

    def query_neo4j(query):
        result = graph.query(query)
        return result


chat = ChatGroq(temperature=1, groq_api_key=os.environ["GROQ_API_KEY"], model_name="mixtral-8x7b-32768")
structured_llm = chat.with_structured_output(QueryKnowledgeGraph, include_raw=True)

answer = structured_llm.invoke("I love people and I love to listen, what would you recommend as a career?")

In [None]:
print(answer['parsed'].answer)
print(answer['parsed'].approach_used)
print(answer['parsed'].node_id)
print(answer['parsed'].cypher_query)

In [None]:
driver = connect_to_database(uri=os.getenv('uri'), username = os.getenv('user_name'), password = os.getenv('password'))
query = f"MATCH (n:Occupation)-[]-(m:`Personality Trait` {{title: 'Artistic'}}) RETURN n"
# with driver.session() as session:
#     result = session.run(query)
#     result = result.value()
#     driver.close()
result = graph.query(query)
result

In [None]:
## Tetsing groq alone
from groq import Groq

client = Groq(api_key=os.environ["GROQ_API_KEY"])

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Introduce yourself",
        }
    ],
    model="llama3-groq-70b-8192-tool-use-preview"
)

print(chat_completion.choices[0].message.content)

In [None]:
#https://download.llamameta.net/*?Policy=eyJTdGF0ZW1lbnQiOlt7InVuaXF1ZV9oYXNoIjoienhzcW5qdnJwdWYyZDRwMnB2N2Jydzg4IiwiUmVzb3VyY2UiOiJodHRwczpcL1wvZG93bmxvYWQubGxhbWFtZXRhLm5ldFwvKiIsIkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMzc4OTY5NX19fV19&Signature=SndIBApuNRCYZknzXVDyzXwL083ASH6kq42bpq-j8IHM6LRwPwK8456EmYa-XNjedbJNeDPpjdpxo%7EZtMe%7EVoDz4MUep-AwmhnyRlIim9-nwOZOGTpp%7El4bNUBRB%7Et8ARUZiQ%7EMdQa59BgNvmxQuNSAiwkYx0kr5pxV%7EUmqcTdHLM5B2FfGvCVfQrwIMwcXGYJI5SSwMH-4byp6ZSgbJM50WtykKpnyukLsa6kybY38AbUHxI5BdMeYpIPM9rIGlknWOEPaZE8oTnCeIf0NwEyN8fG3F4sFThEKN6KA2r0ilV3z2TkGKu6MpcnvvtdIMPldSlG-ewyUdHRGUWK98kg__&Key-Pair-Id=K15QRJLYKIFSLZ&Download-Request-ID=2562051127315960

In [None]:
label_and_prop

### Back to videos

In [None]:
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate

chat = ChatGroq(temperature=0, groq_api_key=os.environ["GROQ_API_KEY"], model_name="mixtral-8x7b-32768")

prompt = f"The neo4j graph database contains details about Occupations and their related demands. This is the schema of the graph as context:\n{label_and_prop}\n{rel}\n{rel_and_prop}\nIn your cypher query do not use any words that are not found in the schema. Use the graphs relations, labels, and properties to supplement your answer. Return an answer to the question, appraoch you used, the node_id that you referred to, and the query you wrote."

prompt_template = ChatPromptTemplate.from_template(prompt)

response = chat.invoke("I love people and I love to listen, what would you recommend as a career?")

In [None]:
print(response.content)

In [None]:
## Adding memory to a conversation

from langchain.chains import ConversationChain # Depricated
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables import RunnableSequence
from langchain.memory import ConversationBufferMemory, ConversationSummaryMemory

llm = ChatGroq(temperature=0, groq_api_key=os.environ["GROQ_API_KEY"], model_name="mixtral-8x7b-32768")

memory = ConversationSummaryMemory(llm=llm)

conversation = ConversationChain(
  llm=llm,
  memory=memory,
  verbose=True
)

# To manually load previous memory
# memory.save_context({"input": "Hi"}, {"output": "What's up"})
# memory.load_memory_variables({})
# conversation.predict(input="Limit your answers to one line. Give me a  love poem.")
# print(memory.buffer)

In [None]:
## A chain of prompts returning one output from one input
from langchain.chains import SequentialChain, LLMChain

llm = ChatGroq(temperature=0.9, groq_api_key=os.environ["GROQ_API_KEY"], model_name="mixtral-8x7b-32768")

# prompt template 1: translate to english
first_prompt = ChatPromptTemplate.from_template("Translate the following review to english:\n\n{Review}")
chain_one = LLMChain(llm=llm, prompt=first_prompt, output_key="English_Review") # chain 1: input= Review and output= English_Review

second_prompt = ChatPromptTemplate.from_template("Can you summarize the following review in 1 sentence:\n\n{English_Review}")
chain_two = LLMChain(llm=llm, prompt=second_prompt, output_key="summary") # chain 2: input= English_Review and output= summary

# prompt template 3: translate to english
third_prompt = ChatPromptTemplate.from_template("What language is the following review:\n\n{Review}")
chain_three = LLMChain(llm=llm, prompt=third_prompt, output_key="language") # chain 3: input= Review and output= language

# prompt template 4: follow up message
fourth_prompt = ChatPromptTemplate.from_template("Write a follow up response to the following summary in the specified language:\n\nSummary: {summary}\n\nLanguage: {language}")
chain_four = LLMChain(llm=llm, prompt=fourth_prompt, output_key="followup_message") # chain 4: input= summary, language and output= followup_message

# overall_chain: input= Review 
# and output= English_Review,summary, followup_message
overall_chain = SequentialChain(
    chains=[chain_one, chain_two, chain_three, chain_four],
    input_variables=["Review"], 
    output_variables=["English_Review", "summary","followup_message"], 
    verbose=True)

In [None]:
output = overall_chain.invoke("Ana b7ebbak men kel albe ya kbir")

print("review => ", output['Review'], '\n')
print("english review => ", output['English_Review'], '\n')
print("summary => ", output['summary'], '\n')
print("followup message => ", output['followup_message'], '\n')