In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
os.environ['GEMINI_API_KEY']=os.getenv("GEMINI_API_KEY")
os.environ['GROQ_API_KEY']=os.getenv("GROQ_API_KEY")

In [7]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="mixtral-8x7b-32768",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=os.environ['GROQ_API_KEY']
)

In [None]:
from langchain.embeddings import OllamaEmbeddings

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=os.environ["GEMINI_API_KEY"]
)

In [10]:
# pdf loading
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("data/paper.pdf")
pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [17]:
import pdfplumber
with pdfplumber.open("data/paper.pdf") as pdf:
    first_page = pdf.pages[0]
    table = first_page.extract_table()

In [19]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma

#Load the models
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=os.environ["GEMINI_API_KEY"]
)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=os.environ["GEMINI_API_KEY"])

#Load the PDF and create chunks
loader = PyPDFLoader("data/paper.pdf")
text_splitter = CharacterTextSplitter(
    separator=".",
    chunk_size=500,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)
pages = loader.load_and_split(text_splitter)

#Turn the chunks into embeddings and store them in Chroma
vectordb=Chroma.from_documents(pages,embeddings)

#Configure Chroma as a retriever with top_k=5
retriever = vectordb.as_retriever(search_kwargs={"k": 5})

#Create the retrieval chain
template = """
You are a helpful AI assistant.
Answer based on the context provided. 
context: {context}
input: {input}
answer:
"""
prompt = PromptTemplate.from_template(template)
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

#Invoke the retrieval chain
response=retrieval_chain.invoke({"input":"can you recreate the tables present and show me as it is"})

#Print the answer to the question
print(response["answer"])

Based on the provided text snippets, I can partially recreate the structure of the tables, but the content is missing.  The text only describes *what* the tables are about, not the data *within* them.

**Table I:**

This table is referenced as containing information gathered from the developed hardware used on 10 people.  The data includes:

* Heart Rate
* Blood Oxygen Levels
* Limb Movement
* Body Temperature

Therefore, Table I would likely look something like this (with example data, as the real data isn't provided):

| Person | Heart Rate (bpm) | Blood Oxygen (%) | Limb Movement (description) | Body Temperature (°C) |
|---|---|---|---|---|
| 1 | 72 | 98 | Resting | 36.7 |
| 2 | 80 | 99 | Shifting | 36.5 |
| ... | ... | ... | ... | ... |
| 10 | 65 | 97 | Resting | 37.0 |


**Table II:**

This table is described as a comparison of existing studies with health vitals versus the proposed solution.  The only column header provided is "Reference No".  It's impossible to reconstruct the o

In [None]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma
from langchain.schema import Document
import pdfplumber

# Load the models
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=os.environ["GEMINI_API_KEY"]
)
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.environ["GEMINI_API_KEY"]
)

# Load the PDF using pdfplumber for table and text extraction
with pdfplumber.open("data/paper.pdf") as pdf:
    all_text = ""
    pages = []

    # Loop through each page in the PDF
    for page in pdf.pages:
        # Extract text
        text = page.extract_text()
        all_text += text + "\n"

        # Extract tables from each page
        tables = page.extract_tables()
        for table in tables:
            # Filter out rows containing None
            filtered_table = [row for row in table if None not in row]
            table_text = "\n".join(["\t".join(row) for row in filtered_table])
            all_text += "\n" + table_text + "\n"

        # Add page content to the list
        pages.append(text + "\n" + table_text)

# Now, create chunks and embeddings
text_splitter = CharacterTextSplitter(
    separator=".",
    chunk_size=500,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_text(all_text)

# Convert text chunks to Document objects
documents = [Document(page_content=chunk, metadata={}) for chunk in chunks]

# Turn the chunks into embeddings and store them in Chroma
vectordb = Chroma.from_documents(documents, embeddings)

# Configure Chroma as a retriever with top_k=5
retriever = vectordb.as_retriever(search_kwargs={"k": 5})

# Create the retrieval chain
template = """
You are a helpful AI assistant.
Answer based on the context provided. 
context: {context}
input: {input}
answer:
"""
prompt = PromptTemplate.from_template(template)
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

# Invoke the retrieval chain
response = retrieval_chain.invoke({"input": "Can you recreate the tables present and show me as it is"})

# Print the answer to the question
print(response["answer"])


TABLE II.  COMPARISION TABLE OF EXISTING STUDIES WITH HEALTH VITALS VS PROPOSED SOLUTION . 

| Reference No. | Dataset | Heart rate | Sp O2 | Body Temp | Sleep hours | Limb Accuracy |
|---|---|---|---|---|---|---|
| Ref 5 | Private | α | α | α | Nan | Nan |
| Ref 6 | Private | α | α |  | Nan | Nan |
| Ref 7 | Private | α | α |  | Nan | Nan |
| Ref 8 | Private | α |  |  | Nan | Nan |
| Ref 9 | Private | α | α |  | Nan | Nan |
| Ref 10 | Private | α | α | α | α | Nan |
| Proposed solution <br> SaYo Pillow | SaYo Pillow | α | α | α | α | α | 96.7 % |


TABLE IV. (Repeated - Identical to TABLE II)  COMPARISION TABLE OF EXISTING STUDIES WITH HEALTH VITALS VS PROPOSED SOLUTION . 

| Reference No. | Dataset | Heart rate | Sp O2 | Body Temp | Sleep hours | Limb Accuracy |
|---|---|---|---|---|---|---|
| Ref 5 | Private | α | α | α | Nan | Nan |
| Ref 6 | Private | α | α |  | Nan | Nan |
| Ref 7 | Private | α | α |  | Nan | Nan |
| Ref 8 | Private | α |  |  | Nan | Nan |
| Ref 9 | Private | α |

In [None]:
import os
# from groq_sdk import GroqModel, GroqEmbeddings
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma
from langchain.schema import Document
import pdfplumber

# Initialize Groq LLM and Embeddings
llm = ChatGroq(
    model="mixtral-8x7b-32768",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=os.environ["GROQ_API_KEY"]
)
# embeddings = GroqEmbeddings(
#     model="groq-embedding-1",  # Replace with the correct embedding model
#     api_key=os.environ["GROQ_API_KEY"]
# )

# Load the PDF using pdfplumber for table and text extraction
with pdfplumber.open("data/paper.pdf") as pdf:
    all_text = ""
    pages = []

    # Loop through each page in the PDF
    for page in pdf.pages:
        # Extract text
        text = page.extract_text()
        all_text += text + "\n"

        # Extract tables from each page
        tables = page.extract_tables()
        for table in tables:
            # Filter out rows containing None
            filtered_table = [row for row in table if None not in row]
            table_text = "\n".join(["\t".join(row) for row in filtered_table])
            all_text += "\n" + table_text + "\n"

        # Add page content to the list
        pages.append(text + "\n" + table_text)

# Create text chunks
text_splitter = CharacterTextSplitter(
    separator=".",
    chunk_size=500,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_text(all_text)

# Convert chunks into Document objects
documents = [Document(page_content=chunk, metadata={}) for chunk in chunks]

# Generate embeddings and store them in Chroma
vectordb = Chroma.from_documents(documents, embeddings)

# Configure Chroma as a retriever with top_k=5
retriever = vectordb.as_retriever(search_kwargs={"k": 5})

# Create the retrieval chain
template = """
You are a helpful AI assistant.
Answer based on the context provided. 
context: {context}
input: {input}
answer:
"""
prompt = PromptTemplate.from_template(template)
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

# Query the chain
response = retrieval_chain.invoke({"input": "Can you recreate the tables present and show me as it is"})

# Print the answer
print(response["answer"])


Sure, here are the tables as they were provided:

Table I. Data Acquisition

| Data Type | Variables |
| --- | --- |
| Heart rate | BPM |
| Blood oxygen levels | SpO2 |
| Limb movement | Accuracy (%) |
| Body temperature | Temperature (°C) |

Table II. Comparison Table of Existing Studies with Health Vitals vs Proposed Solution

| Reference No. | Dataset | Heart rate | SpO2 | Body Temp | Sleep hours | Limb Accuracy |
| --- | --- | --- | --- | --- | --- | --- |
| Ref 5 | Private | ✓ | ✓ | ✓ | Nan |  |
| Ref 6 | Private | ✓ | ✓ | ✓ | Nan |  |
| Ref 7 | Private | ✓ | ✓ | ✓ | Nan |  |
| Ref 8 | Private | ✓ |  |  | Nan |  |
| Ref 9 | Private | ✓ | ✓ | ✓ | Nan |  |
| Ref 10 | Private | ✓ | ✓ | ✓ | ✓ | ✓ |
| Proposed solution | SaYo Pillow | ✓ | ✓ | ✓ | ✓ | 96.7% |

Note: In the tables, "Nan" represents missing data, and "✓" indicates that the data is present. The proposed solution, SaYo Pillow, has been tested and validated using the recorded database and a similar existing database called t