In [8]:
import os
from typing import Annotated

from dotenv import load_dotenv
from pathlib import Path

from jedi.inference.gradual.typing import TypedDict

# Construct path two directories up from the **current working directory**
env_path = Path.cwd().resolve().parents[1] / '.env'

# Load .env from that path
load_dotenv(dotenv_path=env_path)

cohere_api_key = os.getenv('COHERE_API_KEY')
if cohere_api_key is None:
    raise ValueError("COHERE_API_KEY not set in .env")

#print(cohere_api_key)  # For debug only

groq_api_key = os.getenv('GROQ_API_KEY')
if groq_api_key is None:
    raise ValueError("GROQ_API_KEY not set in .env")

# Optional: explicitly set in os.environ if required by libraries
os.environ['COHERE_API_KEY'] = cohere_api_key
os.environ['GROQ_API_KEY'] = groq_api_key


In [9]:
import os

# Starting from your current working directory
base_dir = os.getcwd()  # or set explicitly

user_id = 1 # In future I should get this via API

# Build the path
media_path = os.path.join(base_dir, "media", "documents", f"user_{user_id}")

print("Full path:", media_path)

#with open(os.path.join(media_path, "Lab0_Laravel11.pdf")) as f:
#    text = f.read()


Full path: /home/matthew/Desktop/med-assistant-project/rag/notebooks/media/documents/user_1


In [10]:
# Install the splitter module (if needed)
# pip install -qU langchain-text-splitters

from langchain_text_splitters import RecursiveCharacterTextSplitter

print(media_path)

# For testing in ipynb
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('/home/matthew/Desktop/med-assistant-project/backend/media/documents/user_1/Lab0_Laravel11.pdf')
pages = loader.load()




# Initialize the splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1800,         # max characters per chunk
    chunk_overlap=300,       # overlap between chunks
    length_function=len,    # use character count
    separators=["\n\n", "\n", " ", ""],  # split hierarchy
    is_separator_regex=False
)

docs = text_splitter.split_documents(pages)

# Inspect first couple of chunks:
print(docs[7])


/home/matthew/Desktop/med-assistant-project/rag/notebooks/media/documents/user_1
page_content='7 
Zintegrowany Program Rozwoju Politechniki Lubelskiej – część druga 
 
Laravel Breeze  domyślnie generuje strony za pomocą wi doków Blade, ale m ożna 
skonfigurować go do wykorzystania widoków wykorzystujących Vue, React lub Inertia. 
Najprostsza konfiguracja korzysta z domyślego silnika widoków Blade.  Ten silnik oraz 
Breeze wskazaliśmy przy tworzeniu nowego projektu. 
 
Aby poprawnie działało uwierzytelnienie użytkownika, należy wykorzystać (utworzone już 
w tym celu przez Laravel) elementy związane z zapisem danych użytkownika w bazie danych 
(domyślnie MySQL) . W celu ustawienia połączenia do bazy danych, w pliku .env 
(w głównym folderze projektu) należy podać dane autoryzujące dostęp do bazy danych. Dla 
MySQL domyślne ustawienia w pliku .env (DB_CONNECTION, DB_HOST, itd.) są już 
gotowe (Rys. 12.8). 
 
 
Rys. 12.8. Domyślne ustawienia połączenia do bazy o nazwie lab12 na serwerze My

In [19]:
type(docs[1])

langchain_core.documents.base.Document

In [11]:
import cohere

co = cohere.ClientV2()

text_inputs = [
    {
        "content": [
            {"type": "text", "text": f"{docs[7]}"}
        ]
    },
]

response = co.embed(
    inputs=text_inputs,
    model="embed-v4.0",
    input_type="classification",
    embedding_types=["float"],
)
print(response)




In [4]:
from groq import Groq

client = Groq()

chat_completion = client.chat.completions.create(
    messages=[
        # Set an optional system message. This sets the behavior of the
        # assistant and can be used to provide specific instructions for
        # how it should behave throughout the conversation.
        {
            "role": "system",
            "content": "You are a doctor, explain things thoroughly, try to analyze which illness could the patient suffer from. and what's the best solution to resolve it."
        },
        # Set a user message for the assistant to respond to.
        {
            "role": "user",
            "content": "Teach me how to learn quicker",
        }
    ],

    # The language model which will generate the completion.
    model="llama-3.3-70b-versatile"
)

print("Given 3 choices with higher randomness: \n")
num_choices = 3
#for i in range(num_choices):
 #   print(chat_completion.choices[i].message.content)

# Print the completion returned by the LLM.
print(chat_completion.choices[0].message.content)

Given 3 choices with higher randomness: 

Learning how to learn quicker is a valuable skill that can benefit you in many areas of life. Here are some effective strategies to help you learn quicker:

1. **Set Clear Goals**: Before starting to learn, define what you want to achieve. Setting specific, measurable, and attainable goals will help you stay focused and motivated.
2. **Use Active Recall**: Instead of just re-reading your notes, actively try to recall the information from memory. Quiz yourself, test your knowledge, and try to summarize what you've learned in your own words.
3. **Spaced Repetition**: Review material at increasingly longer intervals to help solidify it in your long-term memory. This technique can help you retain information more effectively.
4. **Chunking**: Break down complex information into smaller, manageable chunks. Organize these chunks into categories, create concept maps, or use mind maps to visualize relationships between ideas.
5. **Mnemonics**: Use asso

In [None]:
# Reranking the embeddings
co_rerank = cohere.ClientV2()

docs = [
    "Carson City is the capital city of the American state of Nevada.",
    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
    "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.",
    "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
    "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
]

response = co_rerank.rerank(
    model="rerank-v3.5",
    query="What is the capital of the United States?",
    documents=docs,
    top_n=3,
)
print(response)


In [20]:
"""
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS

urls = [
    "https://www.matthewlozinski.com/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

vectorstore = FAISS.from_documents(
    documents=doc_splits,
    embedding=co,
)

vectorstore_retriever = vectorstore.as_retriever()
"""

'\nfrom langchain.text_splitter import RecursiveCharacterTextSplitter\nfrom langchain_community.document_loaders import WebBaseLoader\nfrom langchain_community.vectorstores import FAISS\n\nurls = [\n    "https://www.matthewlozinski.com/",\n]\n\ndocs = [WebBaseLoader(url).load() for url in urls]\ndocs_list = [item for sublist in docs for item in sublist]\n\ntext_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n    chunk_size=512, chunk_overlap=0\n)\ndoc_splits = text_splitter.split_documents(docs_list)\n\nvectorstore = FAISS.from_documents(\n    documents=doc_splits,\n    embedding=co,\n)\n\nvectorstore_retriever = vectorstore.as_retriever()\n'

In [None]:
import os

# Path to shared documents inside the container
documents_path = "/app/shared_documents/user_1"

# List files
files = os.listdir(documents_path)

# Print the list of files
print(files)

In [None]:
class State(TypedDict):
    messages: Annotated[list, add_messages]

In [7]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_cohere import CohereEmbeddings


embeddings = CohereEmbeddings(model="embed-v4.0")
index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(), # It is in RAM, thus in future I will need to store it in Postgres container's volume
    index_to_docstore_id={},
)

In [None]:

from IPython.display import Image, display

#display(Image(graph.get_graph().draw_mermaid_png()))