In [15]:
import requests
import json

def extract_triples(report_text: str) -> list[tuple[str, str, str]]:
    # 1) Prompt with clear instructions
    prompt = f"""Extract all (Subject,Relation,Object) triples from the following report.
Only return lines in the exact format (X,REL,Y), one per line, no extra text.

Report:
\"\"\"{report_text}\"\"\"

Triples:
"""

    # 2) Request payload
    payload = {
        "model": "llama3.1:8b",
        "prompt": prompt,
        "temperature": 0.0,
        "max_tokens": 512,
        "stop": ["\n\n"]
    }

    # 3) Call Ollama
    res = requests.post("http://localhost:11434/api/generate", json=payload)
    if res.status_code != 200:
        raise RuntimeError(f"Ollama error {res.status_code}: {res.text}")

    # 4) Handle multi-line JSON response from Ollama
    try:
        response_lines = res.text.strip().splitlines()
        response_parts = [json.loads(line)["response"] for line in response_lines if line.strip()]
        text = "".join(response_parts)
    except json.JSONDecodeError as e:
        raise RuntimeError(f"JSON decode error at char {e.pos}: {e.msg}\nRaw:\n{res.text[:300]}")

    # 5) Debug output
    print("[DEBUG] Raw extract_triples response:")
    print(text)
    print("----- end response -----")

    # 6) Extract (S,R,O) triples
    triples = []
    for line in text.splitlines():
        line = line.strip()
        if line.startswith("(") and line.endswith(")"):
            parts = line[1:-1].split(",")
            if len(parts) == 3:
                s, r, o = (p.strip() for p in parts)
                triples.append((s, r, o))
            else:
                print(f"[WARN] Skipping malformed line: {line}")
    return triples


In [18]:
if __name__ == "__main__":
    test_report = """
    On July 10th, intelligence sources confirmed that Zahoor met with Irfan Khalid at a safe house near Reshipora.
    Irfan Khalid, who also goes by the alias Naseer, is suspected to be planning coordinated attacks.
    A cache of weapons was recovered from a hideout allegedly controlled by Babar in Sopore district.
    Tanveer was seen surveilling the army base using drones, possibly under the instructions of Babar.
    According to intercepted communication, Zahoor and Tanveer discussed logistics related to an upcoming operation.
    The group is believed to be funded by foreign handlers operating from across the border.
    A recent intel brief links Zahoor to a smuggling ring using forest routes along the LoC.
    Police sources say that Irfan Khalid and Tanveer were previously detained in 2022 for suspicious activities.
    """
    
    triples = extract_triples(test_report)
    print("\n[INFO] Parsed Triples:")
    for t in triples:
        print(t)


[DEBUG] Raw extract_triples response:
Here are the extracted triples:

Zahoor,met with,Irfan Khalid
Irfan Khalid,planning coordinated attacks,
A cache of weapons,was recovered from,Babar 
Tanveer,was surveilling the army base using drones,Babar 
Zahoor,discussed logistics related to an upcoming operation,Tanveer 
The group,is believed to be funded by,foreign handlers operating from across the border
Zahoor,is linked to a smuggling ring using forest routes along the LoC,
Irfan Khalid and Tanveer,were previously detained in 2022 for,suspicious activities
----- end response -----

[INFO] Parsed Triples:


In [19]:
from py2neo import Graph, Node, Relationship

# 1. Connect to Neo4j
graph = Graph("bolt://localhost:7687", auth=("neo4j", "password"))

# 2. Push a triple to Neo4j
def push_triple(s, r, o):
    subj = Node("Entity", name=s)
    obj = Node("Entity", name=o)
    graph.merge(subj, "Entity", "name")
    graph.merge(obj, "Entity", "name")
    rel = Relationship(subj, r.upper(), obj)
    graph.merge(rel)

# 3. Read and process each report entry
with open("/Users/inviforce/Downloads/VS_CODE(C++,PYTHON,JUPYTER,DEV)/tracknet/test_files/test.txt") as f:
    raw_reports = f.read().split("**SSP INT")[1:]  # Skip preamble

for report_text in raw_reports:
    try:
        # Extract title (before next "**" or full text fallback)
        title = report_text.split("**")[0].strip()
        body = report_text.strip()

        # Extract triples
        triples = extract_triples(body)

        # Merge report node
        report_node = Node("Report", title=title)
        graph.merge(report_node, "Report", "title")

        # Push triples and link subjects to the report
        for s, r, o in triples:
            push_triple(s, r, o)
            
            # Link Report → Subject entity (if not already linked)
            subj = Node("Entity", name=s)
            graph.merge(subj, "Entity", "name")
            mention = Relationship(report_node, "MENTIONS", subj)
            graph.merge(mention)

    except Exception as e:
        print(f"[ERROR] Failed to process entry:\n{report_text[:80]}...\nReason: {e}")


[DEBUG] Raw extract_triples response:
Here are the extracted triples:

Subject, Relation, Object
Naseer, is no longer communicating via, known proxy numbers
Kunan-Poshpora tower dump, indicates encrypted short-range communication from, LoRa devices
Sopore madrasa, passed a coded message mentioning, G72 arrival via walnut grove
Reshipora upper orchards, confirmed multiple night-stay signatures of, Naseer (alias Mudasir)
Zubair, matched known biometric estimates to, two silhouettes
Zahoor, was earlier presumed neutralized in the, Eidgah shootout
----- end response -----
[DEBUG] Raw extract_triples response:
Here are the extracted triples:

Falcon-12, reported, Feroza
Feroza, delivered, inhalers and codeine syrups
Feroza, delivered, "Cache 5-Delta"
Iqra, delivered, inhalers and codeine syrups
Iqra, delivered, "Cache 5-Delta"
Waleed, matched to, madrasa teacher in Achabal
Zubair, mentioned, "Delta leak — switch to Faizan route — Ghulam’s contact burnt"
"Faizan route", refers to, the left s

In [21]:
from py2neo import Graph
from langchain.schema import Document
from langchain_community.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS

# 1. Connect to Neo4j
graph = Graph("bolt://localhost:7687", auth=("neo4j", "password"))

# 2. Gather triples and construct LangChain documents
docs = []
query = """
MATCH (e:Entity)-[r]->(o:Entity)
RETURN e.name AS subject, type(r) AS relation, o.name AS object
"""
for record in graph.run(query):
    subj, rel, obj = record
    sentence = f"{subj} {rel.replace('_', ' ').lower()} {obj}"
    docs.append(Document(page_content=sentence, metadata={"entity": subj}))

print(f"[INFO] Loaded {len(docs)} documents from Neo4j.")

# 3. Create Ollama embedding function
embedding_fn = OllamaEmbeddings(model="nomic-embed-text")

# 4. Build FAISS index from documents
faiss_index = FAISS.from_documents(docs, embedding=embedding_fn)

# 5. Save index to disk
faiss_index.save_local("faiss_kg_index")
print("[INFO] FAISS index saved to 'faiss_kg_index'")

# 🔁 To reload the FAISS index later:
# from langchain.vectorstores import FAISS
# faiss_index = FAISS.load_local("faiss_kg_index", embedding=embedding_fn)


[INFO] Loaded 15 documents from Neo4j.
[INFO] FAISS index saved to 'faiss_kg_index'


In [23]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS

# 1. Load raw reports (one big string or multiple files)
loader = TextLoader("/Users/inviforce/Downloads/VS_CODE(C++,PYTHON,JUPYTER,DEV)/tracknet/test_files/test.txt")  # or loop over a folder
raw_docs = loader.load()

# 2. Chunk documents
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(raw_docs)

# 3. Embed using Ollama
embedding_fn = OllamaEmbeddings(model="nomic-embed-text")

# 4. Build FAISS index for semantic RAG
semantic_index = FAISS.from_documents(chunks, embedding=embedding_fn)

# 5. Save locally
semantic_index.save_local("faiss_semantic_index")


In [24]:
def kg_lookup(entity_name):
    q = f"""
    MATCH (e:Entity {{name: $name}})-[r]->(o)
    RETURN e.name AS from, type(r) AS rel, o.name AS to
    """
    return graph.run(q, name=entity_name).data()


In [32]:
from langchain_community.llms import Ollama
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.chains import LLMChain
from langchain.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings

# ——————————————————————
# 1) Load FAISS Indexes
# ——————————————————————

embedding_fn = OllamaEmbeddings(model="nomic-embed-text")

# Load from saved folders (make sure they exist)
faiss_kg_index = FAISS.load_local(
    "/Users/inviforce/Downloads/VS_CODE(C++,PYTHON,JUPYTER,DEV)/tracknet/TrackNet-/llm/faiss_kg_index", 
    embeddings=embedding_fn,
    allow_dangerous_deserialization=True
)

faiss_semantic_index = FAISS.load_local(
    "/Users/inviforce/Downloads/VS_CODE(C++,PYTHON,JUPYTER,DEV)/tracknet/TrackNet-/llm/faiss_semantic_index", 
    embeddings=embedding_fn,
    allow_dangerous_deserialization=True
)


# ——————————————————————
# 2) Set up Chat-Style Prompt
# ——————————————————————

system = SystemMessagePromptTemplate.from_template(
    """You are an elite intelligence analyst. 
You will be given two sources of context:
1) KG Facts (high‐precision triples extracted from internal intelligence KG)
2) Semantic Passages (raw intelligence snippets retrieved by semantic similarity)

Your job is to synthesize both to answer the question as accurately as possible."""
)

human = HumanMessagePromptTemplate.from_template(
    """KG Facts:
{kg_facts}

Semantic Passages:
{sem_passages}

Question:
{question}

Answer in a concise, structured form. Cite KG facts by prefixing “(KG)” and semantic passages by “(SEM)”."""
)

chat_prompt = ChatPromptTemplate.from_messages([system, human])

# ——————————————————————
# 3) Load the LLM
# ——————————————————————

llm = Ollama(model="llama3.1:8b")

# Chain
chain = LLMChain(llm=llm, prompt=chat_prompt)

# ——————————————————————
# 4) Final Hybrid Answer Function
# ——————————————————————

def hybrid_answer(question: str, entity: str, k_kg: int = 3, k_sem: int = 3) -> str:
    # KG-based retrieval
    kg_hits = faiss_kg_index.similarity_search(entity, k=k_kg)
    kg_text = "\n".join(f"(KG) {d.page_content}" for d in kg_hits)

    # Semantic retrieval
    sem_hits = faiss_semantic_index.similarity_search(question, k=k_sem)
    sem_text = "\n".join(f"(SEM) {d.page_content}" for d in sem_hits)

    # Answer generation
    return chain.run(
        question=question,
        kg_facts=kg_text or "None",
        sem_passages=sem_text or "None",
    )

# ——————————————————————
# 5) Usage Example
# ——————————————————————

if __name__ == "__main__":
    response = hybrid_answer(
        "Who is Irfan Khalid and what is his current status or sightings?",
        "Irfan Khalid",
        k_kg=5,
        k_sem=5
    )
    print(response)

    #partially correct as the confuction betweern iqra and ifran and name confuion most probalby due to similar names


**Subject: Irfan Khalid (alias Naseer)**

**Status:** Flagged as suspicious at post-Hajin Eid Mela crowd.

**Possible Associations:**

* Associated with RF disruption at 446.2 MHz.
* Listed in notebook from Chitibandi as "IK/Healer."
* Detainee notes from 2022 mentioned "Healer picks up after snowfall."

**Current Sightings:** None explicitly mentioned, but:

* Iqra's phone last pinged near Rajan Top (SEM).
* A local boy overheard a conversation where the individual asked for Aadhaar (SEM).

**Connections to Other Subjects:**

* Mentioned in context with "Cache 5-Delta" and Feroza/Iqra delivery operations (SEM).
* Linked to suspected Pakistani national, handler Waleed, at Achabal madrasa (SEM).
* Possibly related to encrypted short-range communication (LoRa devices) near Kunan-Poshpora (SEM).

**Note:** No explicit confirmation of Irfan Khalid's current status or exact location. The flagging at the post-Hajin Eid Mela crowd indicates potential suspicious activity, but no concrete evide

In [33]:
if __name__ == "__main__":
    response = hybrid_answer(
        "Is Zahoor dead or active? Provide evidence",
        "Zahoor",
        k_kg=5,
        k_sem=5
    )
    print(response)

    #good

**Assessment:**

Zahoor's status is uncertain, but based on available intelligence, there are indications that suggest he may be active.

**Evidence:**

* (SEM) * Zahoor heard over ham radio: “Split is fake. Zubair leads. Faizan loop bait only. Alpha meet under shrine.”
	+ This passage suggests that Zahoor was involved in a communication that implies he is still operational.
* (KG) Zahoor heard ham radio
	+ This KG fact confirms that Zahoor was indeed heard on the ham radio, but it does not provide information about his current status.

**Contradictory Evidence:**

* (SEM) * Two silhouettes matched known biometric estimates for Zubair and Zahoor. The third remains unknown.
	+ This passage implies that there may be some discrepancy in identifying Zahoor, which could suggest he might be compromised or dead.
* (KG) Ghulam was compromised
	+ This KG fact suggests that at least one person connected to Zahoor has been compromised, but it does not directly imply Zahoor's status.

**Conclusion

In [31]:
if __name__ == "__main__":
    response = hybrid_answer(
        "Trace Zubair’s movements from July 2 to July 13.",
        "Zubair",
        k_kg=5,
        k_sem=5
    )
    print(response)

    #### bad at tmeporal and location based but valid as the neo4j currently doe snot store inoformation in that format

**Movement Trace for Zubair**

* **July 2-3**: (SEM) No explicit movement information available
* **July 4-5**: (SEM) Signal relay atop Point K-217 began emitting modified AIS signals, indicating possible reactivation of Faizan route.
* **July 6-7**: (SEM) Shepherd overheard phrases: “Hidayat marker” and “Noor route.” This suggests Zubair may have been operating near Hidayat's location.
* **July 8**:
	+ (KG) Matches triggers/from 2024 Waleed intercepts
	+ (SEM) Intercepted fragments from Zubair's satphone logs mentioned: "Delta leak — switch to Faizan route — Ghulam’s contact burnt."
* **July 9-10**: (SEM) Coded message passed via a Qari at Sopore madrasa mentioned "G72 arrival via walnut grove." This may indicate Zubair's movement towards the G72 location.
* **July 11**:
	+ (KG) Faizan activity covers
	+ (SEM) Drone thermal traces confirmed multiple night-stay signatures near the upper orchards of Reshipora (grid 43-B).
* **July 12-13**: (SEM) Female informant (codename: Falcon-12) re

In [35]:
if __name__ == "__main__":
    response = hybrid_answer(
        "What routes were used to access Cache 5-Delta?",
        "Cache 5-Delta",
        k_kg=5,
        k_sem=5
    )
    print(response)

    #seconday loop is far fetched 

    #gave dffent output each time lickly not much properly made neo4j graph
    #multiple answers and repeptitive answer need fine tuning and more elaborated extraction

**Route Analysis: Access to Cache 5-Delta**

Based on the provided KG Facts and Semantic Passages, we can deduce the following routes were used to access Cache 5-Delta:

1. **Faizan Route**
	* (KG) Whisper intercept from Faizan route repeated "Code broken. Ghulam compromised."
	* (SEM) * Faizan route likely refers to the left spur ridge from Chitibandi to Tragbal.
2. **Left Ridge**
	* (SEM) * Female informant (codename: Falcon-12) reported Feroza and Iqra delivered inhalers and codeine syrups to “Cache 5-Delta.”
	* (SEM) * Intercepted fragments from Zubair's satphone logs mentioned: "Delta leak — switch to Faizan route — Ghulam’s contact burnt."
3. **Loop Route**
	* (SEM) **SSP INT 03** (AOR: 21 RR | Dated: 06 July 2025)
		+ Local OGW from Chuntimulla (code: “Deer-5”) surrendered and revealed GPS trails.
		+ Trails traced a loop: Sangrampora -> Mirgund -> Pethkoot -> Chuntimulla, between 18–25 June.
4. **GPS Trail**
	* (SEM) **SSP INT 03** (AOR: 21 RR | Dated: 06 July 2025)
		+ Deer-5 

In [37]:
if __name__ == "__main__":
    response = hybrid_answer(
        "What is meant by “Shift crescent before 2nd moon”?",
        "Shift crescent",
        k_kg=5,
        k_sem=5
    )
    print(response)

    #went too much in depth related to the literal definintion needs teemperature relations as well

**Analysis**

To determine the meaning of "Shift crescent before 2nd moon," we'll synthesize both KG Facts and Semantic Passages.

The phrase is mentioned in two KG Facts:

1. (KG) Spectral analysis decoded ”Shift crescent before 2nd moon”
2. (KG) Matches triggers/from 2024 Waleed intercepts

In the semantic passages, there are several relevant mentions:

* (SEM) Spectral analysis decoded: “Shift crescent before 2nd moon” and “Farmer reads from left ridge.”
* (SEM) Captured USB contained mp3s with ultrasonic modulations.
	+ This suggests a possible connection to audio signals or messages.
* (SEM) Timeline aligned with audio spikes from FM 87.7 MHz.
	+ This implies that the phrase might be related to radio communications.

Given these connections, it's likely that "Shift crescent before 2nd moon" refers to a specific astronomical event or timing signal used for communication or coordination. The mention of "crescent" and "moon" suggests a lunar-based calendar or timing system.

To furth

In [38]:
if __name__ == "__main__":
    response = hybrid_answer(
        "Which locations are most frequently linked with Zahoor?",
        "Zahoor",
        k_kg=5,
        k_sem=5
    )
    print(response)

Based on the provided context, I have identified the following locations as being most frequently linked to Zahoor:

1. **Rajan Top** - Mentioned in (KG) "Iqra near Rajan Top"
2. **Ham Radio transmission area** - Although not a specific location, Zahoor's involvement with ham radio is mentioned in (SEM) "* Zahoor heard over ham radio: “Split is fake. Zubair leads. Faizan loop bait only. Alpha meet under shrine.”"

Additionally, locations that are indirectly linked to Zahoor through his associates or activities include:

1. **Shrine area** - Mentioned as a possible regroup point in (SEM) "* Alpha meet reference suggests regroup point near shrine area."
2. **Bandipora Ridge** - Referenced as the location of an FM node, which was part of a sleeper tech setup involving Zahoor's alias "Farhan-ul-Islam" in (SEM) "**SSP INT 04**"

Note that these locations are not exhaustively listed, and further analysis may reveal additional connections to Zahoor.


In [39]:
if __name__ == "__main__":
    response = hybrid_answer(
        "What does “Healer” refer to? Where is Healer expected to operate?",
        "Healer",
        k_kg=5,
        k_sem=5
    )
    print(response)

**Synthesized Answer**

* **Entity:** "Healer"
	+ (KG) Whisper intercept mentions "Code broken. Ghulam compromised," suggesting Healer's involvement in communication.
	+ (SEM) Notebook from Chitibandi lists "446" next to "IK/Healer," indicating a connection between Healer and Irfan Khalid (alias Naseer).
	+ (SEM) Detainee notes mention "Healer picks up after snowfall," implying Healer's communication is weather-dependent.
* **Location:** Healer is expected to operate in the Tragbal area, specifically along the Faizan route left spur ridge from Chitibandi.
	+ (KG) Faizan activity covers indicate Healer may be operating under cover of Faizan's activities.
	+ (SEM) Intercepted fragments mention "Faizan route" and "Ghulam’s contact burnt," suggesting a connection between Healer, Ghulam, and the Faizan route.

**Conclusion:** "Healer" refers to a key communication hub or operative involved in encrypted communication, likely operating along the Faizan route left spur ridge from Chitibandi.
