In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

RAG_PATH = '/content/drive/MyDrive/RAG'
os.makedirs(RAG_PATH, exist_ok=True)
print("RAG folder:", RAG_PATH)
print("Contents now:")
print("\n".join(os.listdir(RAG_PATH)))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
RAG folder: /content/drive/MyDrive/RAG
Contents now:
phi3_rag_chunks_structured.json
faiss_index.index
faiss_id_map.pkl
phi3_rag_chunks_structured (1).json


In [None]:
from google.colab import files
import os

RAG_PATH = '/content/drive/MyDrive/RAG'
uploaded = files.upload()

for fname, content in uploaded.items():
    dest = os.path.join(RAG_PATH, fname)
    with open(dest, 'wb') as f:
        f.write(content)
    print("Saved:", dest)

print("\nFolder now contains:")
print("\n".join(os.listdir(RAG_PATH)))


Saving phi3_rag_chunks_structured.json to phi3_rag_chunks_structured (2).json
Saved: /content/drive/MyDrive/RAG/phi3_rag_chunks_structured (2).json

Folder now contains:
phi3_rag_chunks_structured.json
faiss_index.index
faiss_id_map.pkl
phi3_rag_chunks_structured (1).json
phi3_rag_chunks_structured (2).json


In [None]:
!pip install -q faiss-cpu sentence-transformers tqdm

In [None]:
import os, json, glob
RAG_PATH = '/content/drive/MyDrive/RAG'

# candidate file patterns (priority order)
candidates = []
candidates += glob.glob(os.path.join(RAG_PATH, '*structured*.json'))
candidates += glob.glob(os.path.join(RAG_PATH, '*.jsonl'))
candidates += glob.glob(os.path.join(RAG_PATH, '*rag_ready*.txt'))
candidates += glob.glob(os.path.join(RAG_PATH, '*.json'))

if not candidates:
    raise FileNotFoundError(f"No structured/jsonl/rag_ready files found in {RAG_PATH}. Put them there and re-run the upload cell.")

file_path = candidates[0]
print("Using file:", file_path)

# Load chunks into a list of dicts: [{'chunk_id':..., 'text':..., 'meta':...}, ...]
chunks = []

if file_path.endswith('.json') and 'structured' in os.path.basename(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    # Expecting the same structure your script writes: data['sections'] -> section_data
    for section, secdata in data.get('sections', {}).items():
        for parent in secdata.get('parent_chunks', []):
            chunks.append({'chunk_id': parent.get('chunk_id'), 'text': parent.get('content', '').replace('\\n','\n'), 'meta': {'section': section, 'level': 'parent'}})
        for child in secdata.get('child_chunks', []):
            chunks.append({'chunk_id': child.get('chunk_id'), 'text': child.get('content', '').replace('\\n','\n'), 'meta': {'section': section, 'level': 'child'}})

elif file_path.endswith('.jsonl'):
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            obj = json.loads(line)
            chunks.append({'chunk_id': obj.get('chunk_id') or obj.get('id'), 'text': obj.get('content','').replace('\\n','\n'), 'meta': obj})

else:  # rag_ready.txt
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            if line.startswith('#') or not line.strip():
                continue
            parts = line.strip().split("|")
            if len(parts) >= 7:
                chunk_id, level, section, parent, tokens, hash_, content = parts[:7]
                # restore newlines if they were escaped
                content = content.replace('\\n', '\n')
                chunks.append({'chunk_id': chunk_id, 'text': content, 'meta': {'section': section, 'level': level, 'parent': parent}})

print(f"Loaded {len(chunks)} chunks. Example:")
print(chunks[0] if chunks else "NO CHUNKS")


Using file: /content/drive/MyDrive/RAG/phi3_rag_chunks_structured.json
Loaded 106 chunks. Example:
{'chunk_id': 'section_47_P1', 'text': 'Overall QoS concept\n4.7.1 PDN (Packet Data Network (internet)) connectivity service\nThe Evolved Packet System provides connectivity between a UE (User Equipment (mobile device)) and a PLMN external packet data network. This is\nreferred to as PDN Connectivity Service. The IP PDN Connectivity Service supports the transport of traffic flow aggregate(s), consisting of one or more Service\nData Flows (SDFs). NOTE: The concept of SDF is defined in the context of PCC (Policy And Charging Control), TS 23.203 [6], and is not explicitly visible in the\nNAS (NoN-Access-Stratum) signalling. A PDN connection to an SCEF has the following characteristics:\n- It is only supported for WB-EUTRA (Evolved Universal Terrestrial Radio Access), LTE-M and NB-IoT (Internet of Things) RAT types;\n- It applies only when Control Plane CIoT EPS (Evolved Packet System (LTE cor

In [None]:
import os, pickle
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
from tqdm import tqdm

RAG_PATH = '/content/drive/MyDrive/RAG'
EMBED_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'  # fast, good general embedder
BATCH = 64

# Load SBERT
model = SentenceTransformer(EMBED_MODEL)
print("Embedding model loaded.")

texts = [c['text'] for c in chunks]
ids = [c['chunk_id'] for c in chunks]

# compute embeddings in batches
emb_list = []
for i in tqdm(range(0, len(texts), BATCH), desc="Embedding batches"):
    batch_texts = texts[i:i+BATCH]
    embs = model.encode(batch_texts, convert_to_numpy=True, show_progress_bar=False)
    emb_list.append(embs.astype('float32'))

if emb_list:
    embeddings = np.vstack(emb_list)
else:
    embeddings = np.zeros((0, model.get_sentence_embedding_dimension()), dtype='float32')

print("Embeddings shape:", embeddings.shape)


d = embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)
print("FAISS index built - total vectors:", index.ntotal)


faiss_index_path = os.path.join(RAG_PATH, 'faiss_index.index')
faiss.write_index(index, faiss_index_path)

id_map_path = os.path.join(RAG_PATH, 'faiss_id_map.pkl')
with open(id_map_path, 'wb') as f:
    pickle.dump({'ids': ids, 'chunks': chunks}, f)

print("Saved FAISS index to:", faiss_index_path)
print("Saved id map to:", id_map_path)


Embedding model loaded.


Embedding batches: 100%|██████████| 2/2 [00:11<00:00,  5.76s/it]

Embeddings shape: (106, 384)
FAISS index built - total vectors: 106
Saved FAISS index to: /content/drive/MyDrive/RAG/faiss_index.index
Saved id map to: /content/drive/MyDrive/RAG/faiss_id_map.pkl





In [None]:
def rag_search(query, k=3):
    qv = model.encode([query], convert_to_numpy=True).astype('float32')
    D, I = index.search(qv, k)
    results = []
    for dist, idx in zip(D[0], I[0]):
        cid = ids[idx]
        text = chunks[idx]['text']
        meta = chunks[idx]['meta']
        results.append({'score': float(dist), 'chunk_id': cid, 'text_snippet': text[:500], 'meta': meta})
    return results

# Example query
q = "explain to me in detail the LTE attachment Procudeure end-to-end"
res = rag_search(q, k=3)
for i,r in enumerate(res,1):
    print(f"--- Result {i} (score {r['score']:.4f}) ---")
    print(r['chunk_id'], r['meta'])
    print(r['text_snippet'])
    print()


--- Result 1 (score 0.6641) ---
section_53_P1_C1 {'section': 'section_53', 'level': 'child'}
Initial Attachment LTE E-UTRAN
A UE (User Equipment (mobile device))/user needs to register with the network to receive services that require registration. This registration is described
as Network Attachment. The always-on IP connectivity for UE/users of the EPS (Evolved Packet System (LTE core)) may be enabled by establishing a
default EPS bearer during Network Attachment.

--- Result 2 (score 0.7281) ---
section_53_P1 {'section': 'section_53', 'level': 'parent'}
Initial Attachment LTE E-UTRAN
A UE (User Equipment (mobile device))/user needs to register with the network to receive services that require registration. This registration is described
as Network Attachment. The always-on IP connectivity for UE/users of the EPS (Evolved Packet System (LTE core)) may be enabled by establishing a
default EPS bearer during Network Attachment. The PCC (Policy And Charging Control) rules applied to the 

In [None]:

!mkdir -p /content/models/phi3

!wget -O /content/models/phi3/phi-3-mini-128k.gguf \
https://huggingface.co/eccheng/Phi-3-mini-128k-instruct-Q4_0-GGUF/resolve/main/phi-3-mini-128k-instruct-q4_0.gguf


--2025-09-15 13:09:32--  https://huggingface.co/eccheng/Phi-3-mini-128k-instruct-Q4_0-GGUF/resolve/main/phi-3-mini-128k-instruct-q4_0.gguf
Resolving huggingface.co (huggingface.co)... 3.170.185.33, 3.170.185.25, 3.170.185.35, ...
Connecting to huggingface.co (huggingface.co)|3.170.185.33|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cas-bridge.xethub.hf.co/xet-bridge-us/6668cbcb4a8417ea20302af9/92598a92fa413f3573df29b033caba4cfd9b8040a14d6b3a5c7cf7b7f7735dfe?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=cas%2F20250915%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250915T130933Z&X-Amz-Expires=3600&X-Amz-Signature=d6ebe56bf4fb41f958780e4f32e429545b1e1b4f64b7771974d8f0cd91eea874&X-Amz-SignedHeaders=host&X-Xet-Cas-Uid=public&response-content-disposition=inline%3B+filename*%3DUTF-8%27%27phi-3-mini-128k-instruct-q4_0.gguf%3B+filename%3D%22phi-3-mini-128k-instruct-q4_0.gguf%22%3B&x-id=GetObject&Expires=175794537

In [None]:
!pip install llama-cpp-python



In [None]:
from llama_cpp import Llama
import multiprocessing

MODEL_PATH = "/content/models/phi3/phi-3-mini-128k.gguf"

llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=8192,                            # matches  current chunking setup
    n_threads=multiprocessing.cpu_count(), # max CPU usage
    n_batch=64,                            # good batch size for CPU
    logits_all=False,
    embedding=False,
    verbose=False,
    repetition_penalty=1.1,
    stream=True                            # enable streaming
)

print("✅ Phi-3 Mini 128k Q4 loaded with llama-cpp-python (CPU-only)")


llama_context: n_ctx_per_seq (8192) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


✅ Phi-3 Mini 128k Q4 loaded with llama-cpp-python (CPU-only)


In [None]:
def generate_with_rag(question, k=5, max_tokens=512):
    """
    Generate a detailed answer using Phi-3 Mini with FAISS RAG, streaming output.
    """
    # 1️⃣ Embed query & retrieve top-k chunks
    qv = model.encode([question], convert_to_numpy=True).astype('float32')
    D, I = index.search(qv, k)

    # 2️⃣ Collect retrieved context
    retrieved_chunks = [chunks[idx]['text'] for idx in I[0]]
    context = "\n\n".join(retrieved_chunks)

    # 3️⃣ Build prompt
    prompt = f"""You are a senior  QoS engineer.
Answer based ONLY on the provided context.
Try to answer in a detailed, step-by-step manner.

Context:
{context}

Question: {question}
Answer:"""

    # 4️⃣ Stream generation
    print("💬 Answer (streaming):\n")
    generated_text = ""
    for token in llm(prompt, max_tokens=max_tokens, stop=["</s>", "Question:"], stream=True):
        t = token["choices"][0]["text"]
        print(t, end="", flush=True)
        generated_text += t

    print("\n\n✅ Generation complete.")
    return generated_text.strip()


In [None]:
print(generate_with_rag("explain to me in detail the LTE attachment Procudeure end-to-end, highlighting key messages on every interface and key messages exchanged", k=3))


💬 Answer (streaming):

 

Response: 
In the LTE attachment procedure, there are several key messages exchanged on different interfaces to establish a connection between the User Equipment (UE) and the network. Here is a step-by-step explanation:

1. Network Attachment Initiation: The attachment process begins when a UE attempts to register with the network for services that require registration. This process is initiated through the Attach procedure.

2. Initial Bearer Establishment: The Mobility Management Entity (MME), a critical component of the Evolved Packet Service Node (EPS), establishes a connection with the Packet Data Network Gateway (PDN) for IP connectivity. This is usually done by establishing a default EPS bearer.

3. PDN GW Address Allocation: During the attach process, the UE may request an IP address allocation. This can be done using IETF-based mechanisms supported by the PDN GW.

4. Message Exchange: The MME operator checks the Mobile Equipment Identity (ME ID) obtai