In [1]:
import os
import json

def load_json_data(root_folder):
    data_list = []
    for subdir, _, files in os.walk(root_folder):
        for file in files:
            if file.endswith('.json'):
                with open(os.path.join(subdir, file), 'r') as f:
                    try:
                        data = json.load(f)
                        data_list.append(data)
                    except Exception as e:
                        print(f"Error loading {file}: {e}")
    return data_list


# def load_json_data(root_folder):
#     data_list = []
#     count = 0
#     for subdir, _, files in os.walk(root_folder):
#         for file in files:
#             if file.endswith('.json'):
#                 with open(os.path.join(subdir, file), 'r') as f:
#                     try:
#                         data = json.load(f)
#                         data_list.append(data)
#                         count += 1
#                     except Exception as e:
#                         print(f"Error loading {file}: {e}")
#     print(f"[INFO] Loaded {count} JSON documents.")
#     return data_list



In [2]:
def flatten_json_tree(tree, parent_key=""):
    flattened = []

    for key, value in tree.items():
        if key.startswith("input"):
            continue  # Skip input keys, will handle separately
        full_key = f"{parent_key} → {key}" if parent_key else key
        if isinstance(value, dict) and value:
            flattened.extend(flatten_json_tree(value, full_key))
        else:
            flattened.append(full_key)

    return flattened


In [3]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

model = SentenceTransformer('all-MiniLM-L6-v2')

# Load and flatten
data_list = load_json_data("samples")
all_sentences = []
metadata = []

for data in data_list:
    flat_sentences = flatten_json_tree(data)
    all_sentences.extend(flat_sentences)
    metadata.extend([data.get("input2", "")] * len(flat_sentences))  # Or any relevant note

# Embedding
embeddings = model.encode(all_sentences, show_progress_bar=True)


Batches:   0%|          | 0/160 [00:00<?, ?it/s]

In [4]:
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))


In [5]:
def search(query, k=5):
    query_vec = model.encode([query])
    D, I = index.search(query_vec, k)
    return [(all_sentences[i], metadata[i]) for i in I[0]]


In [9]:
import requests
import json

def llama2_ollama_generate(prompt):
    response = requests.post(
        "http://localhost:11434/api/generate",
        json={"model": "gemma:2b", "prompt": prompt},
        stream=True
    )

    result = "" 
    for line in response.iter_lines():
        if line:
            data = json.loads(line.decode('utf-8'))
            if 'response' in data:
                result += data['response']
    
    return result


In [10]:
answer = llama2_ollama_generate("What are symptoms of heart failure?")
print(answer)


Sure, here are the symptoms of heart failure:

* Shortness of breath
* Fatigue
* Swelling in the legs, ankles, and abdomen
* Increased heart size and weight
* Irregular heartbeat
* Swelling in the face, ears, nose, and throat
* Sudden weight loss
* Nausea and vomiting
* Swelling in the hands, feet, and face
* Wheezing or coughing
* Chest pain or pressure
