In [None]:
!pip install langchain langchain_community neo4j langchain_groq

Collecting langchain_community
  Downloading langchain_community-0.3.17-py3-none-any.whl.metadata (2.4 kB)
Collecting neo4j
  Downloading neo4j-5.28.0-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain_groq
  Downloading langchain_groq-0.2.4-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-core<0.4.0,>=0.3.33 (from langchain)
  Downloading langchain_core-0.3.34-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain
  Downloading langchain-0.3.18-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.6 (from langchain)
  Downloading langchain_tex

In [5]:
import pandas as pd
df = pd.read_csv("dataset.csv")
print(df.head())

   number      id    imdb_id  popularity     budget     revenue  \
0       0  135397  tt0369610   32.985763  150000000  1513528810   
1       1   76341  tt1392190   28.419936  150000000   378436354   
2       2  262500  tt2908446   13.112507  110000000   295238201   
3       3  140607  tt2488496   11.173104  200000000  2068178225   
4       4  168259  tt2820852    9.335014  190000000  1506249360   

                 original_title  \
0                Jurassic World   
1            Mad Max: Fury Road   
2                     Insurgent   
3  Star Wars: The Force Awakens   
4                     Furious 7   

                                                cast  \
0  Chris Pratt|Bryce Dallas Howard|Irrfan Khan|Vi...   
1  Tom Hardy|Charlize Theron|Hugh Keays-Byrne|Nic...   
2  Shailene Woodley|Theo James|Kate Winslet|Ansel...   
3  Harrison Ford|Mark Hamill|Carrie Fisher|Adam D...   
4  Vin Diesel|Paul Walker|Jason Statham|Michelle ...   

                                            homep

In [4]:
from google.colab import userdata
import os
os.environ['NEO4J_USERNAME'] = userdata.get('NEO4J_USERNAME')
os.environ['NEO4J_PASSWORD'] = userdata.get('NEO4J_PASSWORD')
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')

In [7]:
from re import M
from langchain_community.graphs import Neo4jGraph

graph = Neo4jGraph(url="neo4j+s://75bd7571.databases.neo4j.io")

movies_query = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/ShivaniBandapalli/Movie-Recommendation/main/dataset.csv'
AS row
WITH row WHERE row.id IS NOT NULL AND row.id <> ""
MERGE (m:Movie {id: row.id})
SET m.imdb_id = row.imdb_id,
    m.originalt_title = row.original_title,
    m.popularity = toFloat(row.popularity),
    m.budget = toInteger(row.budget),
    m.revenue = toInteger(row.revenue),
    m.vote_average = toFloat(row.vote_average),
    m.release_year = toInteger(row.release_year)

FOREACH (director IN split(row.director, '|') |
  MERGE (p:Person {name: trim(director)})
  MERGE (p)-[:DIRECTED]->(m))

FOREACH (actor IN split(row.cast, '|') |
  MERGE (p:Person {name: trim(actor)})
  MERGE (p)-[:ACTED_IN]->(m))

FOREACH (genre IN split(row.genres, '|') |
  MERGE (g:Genre {name: trim(genre)})
  MERGE (m)-[:IN_GENRE]->(g))

FOREACH (company IN split(row.production_companies, '|') |
  MERGE (c:Company {name: trim(company)})
  MERGE (m)-[:PRODUCED_BY]->(c))
"""

graph.query(movies_query)

[]

In [13]:
graph.refresh_schema()
graph_schema = graph.schema
print(graph_schema)

Node properties:
Movie {id: STRING, imdb_id: STRING, title: STRING, popularity: FLOAT, budget: INTEGER, revenue: INTEGER, vote_average: FLOAT, release_year: INTEGER, originalt_itle: STRING, originalt_title: STRING}
Person {name: STRING}
Genre {name: STRING}
Company {name: STRING}
Relationship properties:

The relationships:
(:Movie)-[:IN_GENRE]->(:Genre)
(:Movie)-[:PRODUCED_BY]->(:Company)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:ACTED_IN]->(:Movie)


In [33]:
!pip install groq



In [48]:
from groq import Groq
from langchain_core.runnables import Runnable

client = Groq(
    api_key=userdata.get('GROQ_API_KEY')
)

class Agent(Runnable):

    def __init__(self, client, system):
        self.client = client
        self.system = system
        self.messages = []
        if self.system:
            self.messages.append({"role": "system", "content": self.system})

    def invoke(self, input_message, config=None, **kwargs):
        if not isinstance(input_message, str):
            input_message = str(input_message)

        self.messages.append({"role": "user", "content": input_message})
        response = self.execute()
        self.messages.append({"role": "assistant", "content": response})
        return response

    def execute(self):
        completions = self.client.chat.completions.create(
            messages=self.messages,
            model="gemma2-9b-it",
            temperature=0.2,
        )
        return completions.choices[0].message.content


In [55]:
system_prompt = """
You are a Neo4j expert who generates Cypher queries and responds with natural language explanations.
"""
agent = Agent(client, system_prompt)

In [22]:
"""from langchain_groq import ChatGroq

llm = ChatGroq(
    model = "gemma2-9b-it",
    temperature = 0.01,
)"""

In [18]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate


examples = [
    {
        "question": "How many artists are there?",
        "query": "MATCH (a:Person) RETURN count(a) AS total_artists"
    },
    {
        "question": "How many movies were released in the year 2000?",
        "query": "MATCH (m:Movie) WHERE m.release_year = 2000 RETURN count(m) AS total_movies"
    },
]

example_prompt = PromptTemplate.from_template("User input: {question}\nCypher query: {query}")

prompt_with_few_shot = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query\n Here is the schema information",
    suffix="User input: {question}\nCypher query:",
    input_variables=["question", "schema"]
)

formatted_prompt = prompt_with_few_shot.format(question="How many artists are there?", schema="foo")
print(formatted_prompt)


You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query
 Here is the schema information

User input: How many artists are there?
Cypher query: MATCH (a:Person) RETURN count(a) AS total_artists

User input: How many movies were released in the year 2000?
Cypher query: MATCH (m:Movie) WHERE m.release_year = 2000 RETURN count(m) AS total_movies

User input: How many artists are there?
Cypher query:


In [54]:
from langchain.chains import GraphCypherQAChain

chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=agent,
    cypher_prompt=prompt_with_few_shot,
    verbose=True,
    return_intermediate_steps=True,
    allow_dangerous_requests=True
)

response = chain.invoke({"query": "colin trevorrow directed which movie?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Person {name: "Colin Trevorrow"})-[:DIRECTED]->(m:Movie) RETURN m.title AS movie_title
[0m
Full Context:
[32;1m[1;3m[{'movie_title': 'Jurassic World'}][0m

[1m> Finished chain.[0m


{'query': 'colin trevorrow directed which movie?',
 'result': 'Colin Trevorrow directed Jurassic World. \n',
 'intermediate_steps': [{'query': 'cypher\nMATCH (d:Person {name: "Colin Trevorrow"})-[:DIRECTED]->(m:Movie) RETURN m.title AS movie_title\n'},
  {'context': [{'movie_title': 'Jurassic World'}]}]}