# Components

In [20]:
import os
from langchain_ollama import OllamaEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

embeddings = OllamaEmbeddings(model="mxbai-embed-large")

d = len(embeddings.embed_query("hello world"))
index = faiss.IndexFlatL2(d)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [21]:
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    # base_url="https://api.llama-api.com/"
)

def ask_gpt(prompt: str, model="gpt-4o") -> str:
    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ]
    )
    return response.choices[0].message.content

In [22]:
from pypdf import PdfReader

loader = PdfReader("/home/ngjabach/Documents/State-of-the-Art-Papers/SurgeryLLM (VIET + DONE)/ExternalDoc/labval.pdf")
faiss_index_path = "/home/ngjabach/Documents/State-of-the-Art-Papers/SurgeryLLM (VIET + DONE)/faiss_index"
vector_store = FAISS.load_local(faiss_index_path, embeddings, allow_dangerous_deserialization=True)

retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 20})

In [23]:
prompt = "Given the lab values: "
for page in loader.pages:
    prompt += page.extract_text()
prompt += '''
Your job is to generate another patient record using those lab values, but tweak some to make them abnormal, even out of healthy range.
Write the answer separate to the record so we could double check our model's performance.
Here is an example of a patient record:

Patient: John Smith
Age: 58, Male
Medical History: Diabetes; multivessel coronary artery disease with left anterior descending (LAD) involvement
Vital Signs: BP 140/85 mmHg, HR 80 bpm
Laboratory Findings: Hemoglobin 9.0 g/dL
Preoperative Workup: Basic clinical assessment, coronary angiography

Your response should be of format like this:
Record:...
Anomalies:...
'''

In [24]:
def logging(cnt: int):
    print(f"Gen Test {cnt}")
    with open(f"Task1/Test{cnt}.txt", "w", encoding="utf-8") as file:
        file.write(ask_gpt(prompt))

for test in range(10):
    logging(test)

Gen Test 0
Gen Test 1
Gen Test 2
Gen Test 3
Gen Test 4
Gen Test 5
Gen Test 6
Gen Test 7
Gen Test 8
Gen Test 9
