In [7]:
!pip install --upgrade --quiet  langchain langchain-community langchain-groq neo4j


[notice] A new release of pip is available: 23.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [38]:
## Graphdb configuration
NEO4J_URI=""
NEO4J_USERNAME=""
NEO4J_PASSWORD=""

In [9]:
from langchain_community.graphs import Neo4jGraph
graph=Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
)

In [10]:
graph

<langchain_community.graphs.neo4j_graph.Neo4jGraph at 0x27be37d42b0>

In [22]:
import pandas as pd

In [29]:
# Load CSV locally
df = pd.read_csv('C:/Users/LAKSHYA PALIWAL/Vehicle-Insurance-Agent/data/car_dataset_combined.csv')

# Load data programmatically 
def load_vehicle_data(graph, df):
    count = 0
    for _, row in df.iterrows():
        features = [f.strip() for f in str(row['features']).split(',') if f.strip() and f.strip() != 'nan']
        
        cypher_query = """
        // Step 1: Find or create the nodes for categories
        MERGE (m:Manufacturer {name: $manufacturer})
        MERGE (bt:BodyType {name: $body_type})
        MERGE (ft:FuelType {name: $fuel_type})

        // Step 2: Find or create the unique Vehicle node and set its properties
        MERGE (v:Vehicle {
            manufacturer: $manufacturer,
            model_name: $model_name,
            variant_name: $variant_name
        })
        SET v.manufacturing_year = $manufacturing_year,
            v.showroom_price = $showroom_price,
            v.engine_capacity_cc = $engine_capacity_cc,
            v.body_type = $body_type,
            v.fuel_type = $fuel_type,
            v.seating_capacity = $seating_capacity,
            v.transmission = $transmission,
            v.power_bhp = $power_bhp,
            v.torque_nm = $torque_nm,
            v.mileage_kmpl = $mileage_kmpl,
            v.safety_rating = $safety_rating,
            v.features = $features // Storing the list directly can still be useful

        // Step 3: Connect the Vehicle to its main categories
        MERGE (v)-[:MANUFACTURED_BY]->(m)
        MERGE (v)-[:HAS_BODY_TYPE]->(bt)
        MERGE (v)-[:USES_FUEL]->(ft)

        // Step 4: Unwind the features list and connect each feature to the vehicle
        WITH v // Pass the vehicle 'v' to the next part of the query
        UNWIND $features AS feature_name
        MERGE (f:Feature {name: feature_name})
        MERGE (v)-[:HAS_FEATURE]->(f)
        """
        
        params = {
            'manufacturer': str(row['manufacturer']),
            'model_name': str(row['model_name']),
            'variant_name': str(row['variant_name']),
            'manufacturing_year': int(row['manufacturing_year']) if pd.notna(row['manufacturing_year']) else None,
            'showroom_price': str(row['showroom_price']),
            'fuel_type': str(row['fuel_type']),
            'engine_capacity_cc': float(row['engine_capacity_cc']) if pd.notna(row['engine_capacity_cc']) and str(row['engine_capacity_cc']).replace('.','').isdigit() else None,
            'body_type': str(row['body_type']),
            'seating_capacity': float(row['seating_capacity']) if pd.notna(row['seating_capacity']) and str(row['seating_capacity']).replace('.','').isdigit() else None,
            'transmission': str(row['transmission']),
            'power_bhp': str(row['power_bhp']),
            'torque_nm': str(row['torque_nm']),
            'mileage_kmpl': float(row['mileage_kmpl']) if pd.notna(row['mileage_kmpl']) and str(row['mileage_kmpl']).replace('.','').isdigit() else None,
            'safety_rating': float(row['safety_rating']) if pd.notna(row['safety_rating']) and str(row['safety_rating']).replace('.','').isdigit() else None,
            'features': features
        }
        
        graph.query(cypher_query, params)
        count += 1
        if count % 100 == 0:
            print(f"Loaded {count} vehicles...")
    
    print(f"✅ Loaded {count} vehicles total")

# Run the data loading
load_vehicle_data(graph, df)

Loaded 100 vehicles...
Loaded 200 vehicles...
✅ Loaded 237 vehicles total


In [30]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Vehicle {model_name: STRING, variant_name: STRING, manufacturer: STRING, manufacturing_year: INTEGER, showroom_price: STRING, fuel_type: STRING, engine_capacity_cc: FLOAT, body_type: STRING, seating_capacity: FLOAT, transmission: STRING, power_bhp: STRING, torque_nm: STRING, features: LIST, mileage_kmpl: FLOAT}
Manufacturer {name: STRING}
BodyType {name: STRING}
FuelType {name: STRING}
Feature {name: STRING}
Relationship properties:

The relationships:
(:Vehicle)-[:MANUFACTURED_BY]->(:Manufacturer)
(:Vehicle)-[:HAS_BODY_TYPE]->(:BodyType)
(:Vehicle)-[:USES_FUEL]->(:FuelType)
(:Vehicle)-[:HAS_FEATURE]->(:Feature)


In [None]:
GROQ_API_KEY = "" # Add your GROQ API key here 

In [34]:
from langchain_groq import ChatGroq
llm=ChatGroq(model_name="meta-llama/llama-4-scout-17b-16e-instruct",api_key=GROQ_API_KEY)
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000027BE3E8D660>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000027BE6273100>, model_name='meta-llama/llama-4-scout-17b-16e-instruct', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [35]:
from langchain.chains import GraphCypherQAChain
chain=GraphCypherQAChain.from_llm(llm=llm,graph=graph,verbose=True,allow_dangerous_requests=True)
chain

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x0000027BE37D42B0>, cypher_generation_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['question', 'schema'], input_types={}, partial_variables={}, template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000027BE3E8D660>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000027BE6273100>, model_name=

In [36]:
response=chain.invoke({"query": "I have the Ghost Series II model tell me about it"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (v:Vehicle {model_name: 'Ghost Series II'})
OPTIONAL MATCH (v)-[:MANUFACTURED_BY]->(m:Manufacturer)
OPTIONAL MATCH (v)-[:HAS_BODY_TYPE]->(bt:BodyType)
OPTIONAL MATCH (v)-[:USES_FUEL]->(ft:FuelType)
OPTIONAL MATCH (v)-[:HAS_FEATURE]->(f:Feature)
RETURN 
    v.model_name AS modelName,
    v.variant_name AS variantName,
    v.manufacturer AS manufacturer,
    v.manufacturing_year AS manufacturingYear,
    v.showroom_price AS showroomPrice,
    v.fuel_type AS fuelType,
    v.engine_capacity_cc AS engineCapacityCc,
    v.body_type AS bodyType,
    v.seating_capacity AS seatingCapacity,
    v.transmission AS transmission,
    v.power_bhp AS powerBhp,
    v.torque_nm AS torqueNm,
    v.features AS features,
    v.mileage_kmpl AS mileageKmpl,
    m.name AS manufacturerName,
    bt.name AS bodyTypeName,
    ft.name AS fuelTypeName,
    collect(DISTINCT f.name) AS featuresList
[0m
Full Context:
[32

{'query': 'I have the Ghost Series II model tell me about it',
 'result': 'The Ghost Series II is a Sedan model manufactured by Rolls-Royce, with a manufacturing year of 2025. It has a petrol engine with a capacity of 6750.0 cc, producing 563 bhp of power. The car features an automatic transmission and has a seating capacity of 5.0. Some of its notable features include a luxury interior, advanced safety features, and a high-end infotainment system. The showroom price of the Ghost Series II ranges from ₹8.95 to ₹10.52 Cr.'}