# RAG Prototype + Fine Tuning Phi3

# Data Preprocessing: Parsing PDF textbook to markdown

## Option 1: LlamaParse - Parsing with parsing intructions

### Install & Import libraries

In [None]:
!pip install llama-index
!pip install llama-index-core
!pip install llama-index-embeddings-openai
!pip install llama-parse
!pip install nest_asyncio



### Setup llama parse and openAI API keys

In [None]:
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader, Document
import os
import nest_asyncio
nest_asyncio.apply()

# Parsing instructions for your specific documents
parsingInstruction = """The provided document is a chemical engineering book. It contains page numbers, chapter numbers,
tables, equations, and diagrams. Output any math or chemical equation in LATEX markdown (between $$).
Include page numbers and chapter numbers if available."""
# Setup the parser
parser = LlamaParse(
    api_key="llx-3RcWpehMhUliZL8wntIvy5GcmLsRmoz2bliCZSJxuhqV7vkR",
    result_type="markdown",
    parsing_instruction=parsingInstruction
)
# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = #insertyourkey

### Parsing documents using llama parse and linking it each document with metadata

In [None]:
# Path to your specific document
file_path = "/content/chapter1.pdf"

# Ensure the file exists
if not os.path.isfile(file_path):
    print(f"File {file_path} does not exist. Please check the file path.")
else:
    # Use SimpleDirectoryReader for the individual file
    reader = SimpleDirectoryReader(input_files=[file_path], file_extractor={".pdf": parser})
    documents = reader.load_data()

    # Add custom metadata, if needed
    for doc in documents:
        doc.metadata["source"] = "chapter1.pdf"  # You can change the key to something more descriptive

    # Check and print the parsed content
    for i, doc in enumerate(documents):
        print(f"Document {i+1}:")
        print("Text Content:\n", doc.text[:1000])  # Print first 1000 characters of text
        print("Metadata:\n", doc.metadata)
        print("="*80)

Started parsing the file under job_id c1647450-3bd9-4d51-a9ad-e003c8e1dac0
Document 1:
Text Content:
 # Delinilions and Principles


Definitions and Principles

Eleunalenzi

Grinmn uuiv &gt; lc GTt JAL Lciu
Metadata:
 {'file_path': '/content/chapter1.pdf', 'file_name': 'chapter1.pdf', 'file_type': 'application/pdf', 'file_size': 4575768, 'creation_date': '2024-08-26', 'last_modified_date': '2024-08-26', 'source': 'chapter1.pdf'}
Document 2:
Text Content:
 # Chemical Engineering Book

Chapter 4

$$\text{Arala}$$

$$\text{Ln: A4}$$

$$3r \quad ucu \quad M$$

$$\text{untn\_Lut}$$

$$474\_ \quad aliicbc \quad uarraliuta$$
Metadata:
 {'file_path': '/content/chapter1.pdf', 'file_name': 'chapter1.pdf', 'file_type': 'application/pdf', 'file_size': 4575768, 'creation_date': '2024-08-26', 'last_modified_date': '2024-08-26', 'source': 'chapter1.pdf'}
Document 3:
Text Content:
 # Chemical Engineering Book

Chapter 4

$$LCV = \frac{445}{luib} - SUnire + \frac{40}{tin}$$
Metadata:
 {'file_path': '/c

The quality is not good, all text are garble

## Option 2: LlamaParse With Json

In [None]:
%pip install llama-index
%pip install llama-index-core
%pip install llama-index-llms-anthropic llama-index-multi-modal-llms-anthropic
%pip install llama-index-embeddings-huggingface
%pip install llama-parse



In [None]:
from llama_parse import LlamaParse
import json

# Initialize LlamaParse
parser = LlamaParse(verbose=True)

# Extract data from the PDF
file_path = "/content/chapter1.pdf"
json_objs = parser.get_json_result(file_path)

# Verify the extraction result
if json_objs:
    json_list = json_objs[0]["pages"]  # Assuming the first object in the list contains the pages

    # Print the extracted JSON data
    for page_num, page in enumerate(json_list):
        print(f"Page {page_num + 1}:")
        print("Text Content:\n", page.get('text', 'No text found'))  # Print the text content
        print("Metadata:\n", page.get('metadata', 'No metadata found'))  # Print metadata
        print("="*80)
else:
    print("No JSON objects returned from parsing.")

Started parsing the file under job_id b5462bb2-e733-4838-beb2-279449512c8b
Page 1:
Text Content:
 Delinilions and Principles          Definitions and Principlcs               Definitions and Prineiples
                                                                Eleunalenzi
        Grinmn              uuiv >
                                    lc
                                    GTt JAL
                                                                                Lciu
Metadata:
 No metadata found
Page 2:
Text Content:
                                     Jt
              Arala
          Ln: A4
3r                  ucu M
                                        atr
untn_Lut
                  474_     aliicbc
                                             uarraliuta
Metadata:
 No metadata found
Page 3:
Text Content:
                                                     Aa
        LA     SEEZS eskdai
LC V                                      Mnth
               04
                     

In [None]:
from llama_parse import LlamaParse

# Initialize the parser
parser = LlamaParse(verbose=True)

# Parse the PDF
json_objs = parser.get_json_result("/content/chapter1.pdf")

# Ensure the JSON object is correctly retrieved
if json_objs:
    json_list = json_objs[0]["pages"]

    # Process and print each page's text content and metadata
    for page_num, page_data in enumerate(json_list):
        print(f"Page {page_num + 1}:")
        print("Text Content:\n", page_data.get("text", "No text found"))
        print("Metadata:\n", page_data.get("metadata", "No metadata found"))
        print("="*80)
else:
    print("No JSON objects were returned.")

Started parsing the file under job_id d790c883-49f3-4c3d-beb3-a56106bb6497
Page 1:
Text Content:
 Delinilions and Principles          Definitions and Principlcs               Definitions and Prineiples
                                                                Eleunalenzi
        Grinmn              uuiv >
                                    lc
                                    GTt JAL
                                                                                Lciu
Metadata:
 No metadata found
Page 2:
Text Content:
                                     Jt
              Arala
          Ln: A4
3r                  ucu M
                                        atr
untn_Lut
                  474_     aliicbc
                                             uarraliuta
Metadata:
 No metadata found
Page 3:
Text Content:
                                                     Aa
        LA     SEEZS eskdai
LC V                                      Mnth
               04
                     

In [None]:
for page in json_list:
    print(page)
    break

{'page': 1, 'text': 'Delinilions and Principles          Definitions and Principlcs               Definitions and Prineiples\n                                                                Eleunalenzi\n        Grinmn              uuiv >\n                                    lc\n                                    GTt JAL\n                                                                                Lciu', 'md': 'Delinilions and Principles          Definitions and Principlcs               Definitions and Prineiples\n                                                                Eleunalenzi\n        Grinmn              uuiv >\n                                    lc\n                                    GTt JAL\n                                                                                Lciu', 'images': [{'name': 'img_p0_1.png', 'height': 1600, 'width': 958, 'x': 0, 'y': 0, 'original_width': 1165, 'original_height': 1946}], 'items': [{'type': 'text', 'value': 'Delinilions and Princi

## Option 3: Using Nougat model

In [None]:
!pip install -q pdf2image
!apt-get install poppler-utils
!pip install --upgrade torch transformers
!pip install python-Levenshtein
!pip uninstall torch torchvision transformers
!pip install torch torchvision transformers

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
poppler-utils is already the newest version (22.02.0-2ubuntu0.5).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.
Found existing installation: torch 2.4.0
Uninstalling torch-2.4.0:
  Would remove:
    /usr/local/bin/convert-caffe2-to-onnx
    /usr/local/bin/convert-onnx-to-caffe2
    /usr/local/bin/torchrun
    /usr/local/lib/python3.10/dist-packages/functorch/*
    /usr/local/lib/python3.10/dist-packages/torch-2.4.0.dist-info/*
    /usr/local/lib/python3.10/dist-packages/torch/*
    /usr/local/lib/python3.10/dist-packages/torchgen/*
Proceed (Y/n)? Y
  Successfully uninstalled torch-2.4.0
Found existing installation: torchvision 0.18.1+cu121
Uninstalling torchvision-0.18.1+cu121:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/torchvision-0.18.1+cu121.dist-info/*
    /usr/local/lib/python3.10/dist-packages/torchvision.libs/libcudart.7ec1eba6.so.12
    /usr/local/

In [None]:
from transformers import AutoProcessor, VisionEncoderDecoderModel
import torch
import torch
import torchvision
import transformers

# Load the Nougat model and processor from the hub
processor = AutoProcessor.from_pretrained("facebook/nougat-small")
model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-small")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


VisionEncoderDecoderModel(
  (encoder): DonutSwinModel(
    (embeddings): DonutSwinEmbeddings(
      (patch_embeddings): DonutSwinPatchEmbeddings(
        (projection): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      )
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): DonutSwinEncoder(
      (layers): ModuleList(
        (0): DonutSwinStage(
          (blocks): ModuleList(
            (0-1): 2 x DonutSwinLayer(
              (layernorm_before): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
              (attention): DonutSwinAttention(
                (self): DonutSwinSelfAttention(
                  (query): Linear(in_features=128, out_features=128, bias=True)
                  (key): Linear(in_features=128, out_features=128, bias=True)
                  (value): Linear(in_features=128, out_features=128, bias=True)
                  (dropout): Dropout(p=0.0, inplace=False)
           

In [None]:
!find . -name "*.pyc" -exec rm -f {} \;

In [None]:
from pdf2image import convert_from_path
import os

def convert_pdf_to_images(pdf_path):
    # Convert PDF to list of images
    pdf_images = convert_from_path(pdf_path)
    return pdf_images

file_path = "/content/3pages.pdf"
all_images = convert_pdf_to_images(file_path)

In [None]:
pixel_values = [processor(images=all_images[i] , return_tensors="pt").pixel_values for i in range(len(all_images))]

In [None]:
from tqdm import tqdm

markdown = ""
for pixel_value in tqdm(pixel_values, desc="Processing images"):
  pixel_value = pixel_value.to(device)
  outputs = model.generate(
      pixel_value,
      min_length=1,
      max_new_tokens=4096,
      bad_words_ids=[[processor.tokenizer.unk_token_id]],
  )
  sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
  sequence = processor.post_process_generation(sequence, fix_markdown=True)
  markdown += sequence

Processing images: 100%|██████████| 3/3 [00:43<00:00, 14.37s/it]


In [None]:
from IPython.display import display, Markdown
display(Markdown(markdown))



## Chapter 1 Definitions and Principles

**C**hemical engineering has to do with industrial processes in which raw materials are changed or separated into useful products. The chemical engineer must develop, design, and engineer both the complete process and the equipment used; choose the proper raw materials; operate the plants efficiently, safely, and economically; and see to it that products meet the requirements set by the customers. Chemical engineering is both an art and a science. Whenever science helps the engineer to solve a problem, science should be used. When, as is usually the case, science does not give a complete answer, it is necessary to use experience and judgment. The professional stature of an engineer depends on skill in utilizing all sources of information to reach practical solutions to processing problems.

The variety of processes and industries that call for the services of chemical engineers is enormous. In the past, the areas of most concern to chemical engineers were ore beneficiation, petroleum refining, and the manufacture of heavy chemicals and organics such as sulfuric acid, methyl alcohol, and polyethylene. Today items such as polymeric lithographic supports for the electronics industry, high-strength composite materials, genetically modified biochemical agents in areas of food processing, and drug manufacture and drug delivery have become increasingly important. The processes described in standard treatises on chemical technology and the process and biochemical industries give a good idea of the field of chemical engineering.11

Footnote 11: [https://www.nist.gov/](https://www.nist.gov/)

Because of the variety and complexity of modern processes, it is not practicable to cover the entire subject matter of chemical engineering under a single head. The field is divided into convenient, but arbitrary, sectors. This text covers that portion of chemical engineering known as the unit operations.


## Unit Operations

An economical method of organizing much of the subject matter of chemical engineering is based on two facts: (1) Although the number of individual processes is great, each one can be broken down into a series of steps, called operations, each of which in turn appears in process after process; (2) the individual operations have common techniques and are based on the same scientific principles. For example, in most processes solids and fluids must be moved; heat or other forms of energy must be transferred from one substance to another; and tasks such as drying, size reduction, distillation, and evaporation must be performed. The unit operation concept is this: By studying systematically these operations themselves--operations that clearly cross industry and process lines--the treatment of all processes is unified and simplified.

The strictly chemical aspects of processing are studied in a companion area of chemical engineering called reaction kinetics. The unit operations are largely used to conduct the primarily physical steps of preparing the reactants, separating and purifying the products, recycling unconverted reactants, and controlling the energy transfer into or out of the chemical reactor.

The unit operations are as applicable to many physical processes as to chemical ones. For example, the process used to manufacture common salt consists of the following sequence of unit operations: transportation of solids and liquids, transfer of heat, evaporation, crystallization, drying, and screening. No chemical reaction appears in these steps. On the other hand, the cracking of petroleum, with or without the aid of a catalyst, is a typical chemical reaction conducted on an enormous scale. Here the unit operations--transportation of fluids and solids, distillation, and various mechanical separations--are vital, and the cracking reaction could not be utilized without them. The chemical steps themselves are conducted by controlling the flow of material and energy to and from the reaction zone.

Because the unit operations are a branch of engineering, they are based on both science and experience. Theory and practice must combine to yield designs for equipment that can be fabricated, assembled, operated, and maintained. A balanced discussion of each operation requires that theory and equipment be considered together. This book presents such a balanced treatment.

**Scientific foundations of unit operations**

A number of scientific principles and techniques are basic to the treatment of the unit operations. Some are elementary physical and chemical laws such as the conservation of mass and energy, physical equilibria, kinetics, and certain properties of matter. Their general use is described in the remainder of this chapter. Other special techniques important in chemical engineering are considered at the proper places in the text.

## Unit Systems

The official international system of units is SI (Systeme International d'Unites). Strong efforts are underway for its universal adoption as the exclusive system forall engineering and science, but older systems, particularly the centimeter-gram-second (cgs) and foot-pound-second (fps) engineering gravitational systems, are still in use and probably will be around for some time. The chemical engineer finds many physiochemical data given in cgs units; that many calculations are most conveniently done in fps units; and that SI units are increasingly encountered in science and engineering. Thus it becomes necessary to be expert in the use of all three systems.

In the following treatment, SI is discussed first, and then the other systems are derived from it. The procedure reverses the historical order, as the SI units evolved from the cgs system. Because of the growing importance of SI, it should logically be given a preference. If, in time, the other systems are phased out, they can be ignored and SI used exclusively.

### Physical Quantities

Any physical quantity consists of two parts: a unit, which tells what the quantity is and gives the standard by which it is measured, and a number, which tells how many units are needed to make up the quantity. For example, the statement that the distance between two points is 3 m means all this: A definite length has been measured; to measure it, a standard length, called the meter, has been chosen as a unit; and three 1-m units, laid end to end, are needed to cover the distance. If an integral number of units are either too few or too many to cover a given distance, submultiples, which are fractions of the unit, are defined by dividing the unit into fractions, so that a measurement can be made to any degree of precision in terms of the fractional units. No physical quantity is defined until both the number and the unit are given.

### SI Units

The SI system covers the entire field of science and engineering, including electromagnetics and illumination. For the purposes of this book, a subset of the SI units covering chemistry, gravity, mechanics, and thermodynamics is sufficient. The units are derivable from (1) four proportionalities of chemistry and physics; (2) arbitrary standards for mass, length, time, temperature, and the mole; and (3) arbitrary choices for the numerical values of two proportionality constants.

#### Basic equations

The basic proportionalities, each written as an equation with its own proportionality factor, are

\[F =k_{1}\frac{d}{dt}(mu)\] (1.1) \[F =k_{2}\frac{m_{a}m_{b}}{r^{2}}\] (1.2) \[Q_{c} =k_{3}W_{c}\] (1.3) \[T =k_{4}\lim_{p\to 0}\frac{pV}{m}\] (1.4) 

## Option 4: Using Gemini 1.5 Flash

In [None]:
!pip install google-generativeai



In [None]:
import google.generativeai as genai

api_key=#insertyourkey
genai.configure(api_key=api_key)

generation_config = {
  "temperature": 0.5,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

In [None]:
SYSTEM_PROMPT = """
You are a highly accurate document extraction model.
Your task is to extract content from PDFs while maintaining
the original structure and formatting using markdown syntax.
When you encounter a plot or image, do not include any URLs or HTML tags.
Instead, extract both the original title and description from the PDF,
and generate a detailed description. Enclose all this information
inside square brackets like this: `[Original title: Original description.]
[Detailed description]`. Ensure the content is unaltered and
free of unnecessary code.
"""

In [None]:
gemini_model = genai.GenerativeModel("models/gemini-1.5-flash",
                              system_instruction=SYSTEM_PROMPT,
                              generation_config=generation_config)

In [None]:
USER_PROMPT = """
Extract the content from this PDF without changing anything.
Preserve the original structure and use markdowns for formatting.
When you find plots or images, include both the original title and
description from the PDF along with a generated detailed description,
all enclosed inside square brackets, like `[Original title: Original
description.] [Detailed description]`. Do not include URLs or HTML tags.
"""

In [None]:
def upload_to_gemini(path, mime_type=None):
  """
  Uploads the given file to Gemini.
  """
  file = genai.upload_file(path, mime_type=mime_type)
  return file

In [None]:
def pdf2markdown(pdf_path):
    files = [ upload_to_gemini(pdf_path, mime_type="application/pdf"),]
    response = gemini_model.generate_content([files[0] , USER_PROMPT])
    return response.text

In [None]:
markdown = pdf2markdown("/content/chapter1.pdf")

In [None]:
from IPython.display import display, Markdown
display(Markdown(markdown))

# CHAPTER 1
## Definitions and Principles

Chemical engineering has to do with industrial processes in which raw materials
are changed or separated into useful products. The chemical engineer must develop,
design, and engineer both the complete process and the equipment used; choose the
proper raw materials; operate the plants efficiently, safely, and economically; and
see to it that products meet the requirements set by the customers. Chemical engi-
neering is both an art and a science. Whenever science helps the engineer to solve a
problem, science should be used. When, as is usually the case, science does not give
a complete answer, it is necessary to use experience and judgment. The professional
stature of an engineer depends on skill in utilizing all sources of information to
reach practical solutions to processing problems.

The variety of processes and industries that call for the services of chemical en-
gineers is enormous. In the past, the areas of most concern to chemical engineers
were ore beneficiation, petroleum refining, and the manufacture of heavy chemicals
and organics such as sulfuric acid, methyl alcohol, and polyethylene. Today items
such as polymeric lithographic supports for the electronics industry, high-strength
composite materials, genetically modified biochemical agents in areas of food pro-
cessing, and drug manufacture and drug delivery have become increasingly impor-
tant. The processes described in standard treatises on chemical technology and the
process and biochemical industries give a good idea of the field of chemical
engineering. It

Because of the variety and complexity of modern processes, it is not practica-
ble to cover the entire subject matter of chemical engineering under a single head.
The field is divided into convenient, but arbitrary, sectors. This text covers that por-
tion of chemical engineering known as the unit operations.

<sup>1</sup>Superscript numerals in the text correspond to the numbered references at the end of each chapter.

## SECTION 1: Introduction
### UNIT OPERATIONS

An economical method of organizing much of the subject matter of chemical engi-
neering is based on two facts: (1) Although the number of individual processes is
great, each one can be broken down into a series of steps, called operations, each of
which in turn appears in process after process; (2) the individual operations have com-
mon techniques and are based on the same scientific principles. For example, in most
processes solids and fluids must be moved; heat or other forms of energy must be
transferred from one substance to another; and tasks such as drying, size reduction,
distillation, and evaporation must be performed. The unit operation concept is this: By
studying systematically these operations themselves-operations that clearly cross
industry and process lines-the treatment of all processes is unified and simplified.

The strictly chemical aspects of processing are studied in a companion area of
chemical engineering called reaction kinetics. The unit operations are largely used
to conduct the primarily physical steps of preparing the reactants, separating and
purifying the products, recycling unconverted reactants, and controlling the energy
transfer into or out of the chemical reactor.

The unit operations are as applicable to many physical processes as to chemical
ones. For example, the process used to manufacture common salt consists of the fol-
lowing sequence of unit operations: transportation of solids and liquids, transfer of
heat, evaporation, crystallization, drying, and screening. No chemical reaction ap-
pears in these steps. On the other hand, the cracking of petroleum, with or without
the aid of a catalyst, is a typical chemical reaction conducted on an enormous scale.
Here the unit operations-transportation of fluids and solids, distillation, and vari-
ous mechanical separations-are vital, and the cracking reaction could not be uti-
lized without them. The chemical steps themselves are conducted by controlling the
flow of material and energy to and from the reaction zone.

Because the unit operations are a branch of engineering, they are based on both
science and experience. Theory and practice must combine to yield designs for
equipment that can be fabricated, assembled, operated, and maintained. A balanced
discussion of each operation requires that theory and equipment be considered to-
gether. This book presents such a balanced treatment.

#### Scientific foundations of unit operations

A number of scientific principles and techniques are basic to the treatment of
the unit operations. Some are elementary physical and chemical laws such as the
conservation of mass and energy, physical equilibria, kinetics, and certain proper-
ties of matter. Their general use is described in the remainder of this chapter. Other
special techniques important in chemical engineering are considered at the proper
places in the text.

### UNIT SYSTEMS

The official international system of units is SI (Système International d'Unités).
Strong efforts are underway for its universal adoption as the exclusive system for
all engineering and science, but older systems, particularly the centimeter-gram-
second (cgs) and foot-pound-second (fps) engineering gravitational systems, are still
in use and probably will be around for some time. The chemical engineer finds many
physiochemical data given in cgs units; that many calculations are most conveniently
done in fps units; and that SI units are increasingly encountered in science and engi-
neering. Thus it becomes necessary to be expert in the use of all three systems.

In the following treatment, SI is discussed first, and then the other systems are
derived from it. The procedure reverses the historical order, as the SI units evolved
from the cgs system. Because of the growing importance of SI, it should logically
be given a preference. If, in time, the other systems are phased out, they can be ig-
nored and SI used exclusively.

#### Physical Quantities

Any physical quantity consists of two parts: a unit, which tells what the quantity is
and gives the standard by which it is measured, and a number, which tells how many
units are needed to make up the quantity. For example, the statement that the dis-
tance between two points is 3 m means all this: A definite length has been measured;
to measure it, a standard length, called the meter, has been chosen as a unit; and
three 1-m units, laid end to end, are needed to cover the distance. If an integral num-
ber of units are either too few or too many to cover a given distance, submultiples,
which are fractions of the unit, are defined by dividing the unit into fractions, so that
a measurement can be made to any degree of precision in terms of the fractional
units. No physical quantity is defined until both the number and the unit are given.

#### SI Units

The SI system covers the entire field of science and engineering, including electro-
magnetics and illumination. For the purposes of this book, a subset of the SI units
covering chemistry, gravity, mechanics, and thermodynamics is sufficient. The units
are derivable from (1) four proportionalities of chemistry and physics; (2) arbitrary
standards for mass, length, time, temperature, and the mole; and (3) arbitrary
choices for the numerical values of two proportionality constants.

#### Basic equations

The basic proportionalities, each written as an equation with its own propor-
tionality factor, are
```
d
F = k₁(mu)
(1.1)
dt
тать
F=k2
(1.2)
r2
Qc = k3Wc
(1.3)
pV
T = k4 lim
(1.4)
p-0 m
```
where
F = force
t = time
m = mass
u = velocity
r = distance
W = work
Q = heat
p = pressure
V = volume
T = thermodynamic absolute temperature
k₁, k₂, k₃, k₄ = proportionality factors

Equation (1.1) is Newton's second law of motion, showing the proportionality
between the resultant of all the forces acting on a particle of mass m and the time
rate of increase in momentum of the particle in the direction of the resultant force.
Equation (1.2) is Newton's law of gravitation, giving the force of attraction be-
tween two particles of masses m₁ and m₁ a distance r apart.
Equation (1.3) is one statement of the first law of thermodynamics. It affirms
the proportionality between the work performed by a closed system during a cycle
and the heat absorbed by that system during the same cycle.
Equation (1.4) shows the proportionality between the thermodynamic absolute
temperature and the zero-pressure limit of the pressure-volume product of a definite
mass of any gas.

Each equation states that if means are available for measuring the values of all
variables in that equation and if the numerical value of k is calculated, then the value
of k is constant and depends only on the units used for measuring the variables in
the equation.

#### Standards

By international agreement, standards are fixed arbitrarily for the quantities of
mass, length, time, temperature, and the mole. These are five of the base units of SI.
Currently, the standards are as follows.

The standard of mass is the kilogram (kg), defined as the mass of the interna-
tional kilogram, a platinum cylinder preserved at Sèvres, France.

The standard of length is the meter (m), defined (since 1983) as the length of the
path traveled by light in vacuum during a time interval of 1/299,792,458* of a second.

The standard of time is the second (s), defined as 9,192,631.770* frequency cy-
cles of a certain quantum transition in an atom of 133Ce.

The standard of temperature is the kelvin (K), defined by assigning the value
273.16* K to the temperature of pure water at its triple point, the unique temperature
at which liquid water, ice, and steam can exist at equilibrium.

The mole (abbreviated mol) is defined as the amount of a substance compris-
ing as many elementary units as there are atoms in 12 g of '2C. The definition of
the mole is equivalent to the statement that the mass of one mole of a pure substance
in grams is numerically equal to its molecular weight calculated from the standard
table of atomic weights, in which the atomic weight of carbon is given as 12.01115.
This number differs from 12 because it applies to the natural isotopic mixture of
carbon rather than to pure 12C. In engineering calculations the terms kilogram mole
and pound mole are commonly used to designate the mass of a pure substance in
kilograms or pounds that is equal to its molecular weight.

The actual number of molecules in one gram mole is given by Avogadro's num-
ber, 6.022 x 10<sup>23</sup>.

#### Evaluation of constants

From the basic standards, values of m, m, and m₁ in Eqs. (1.1) and (1.2) are
measured in kilograms, r in meters, and u in meters per second. Constants k, and k₂
are not independent but are related by eliminating F from Eqs. (1.1) and (1.2). This
gives
```
k
k2
d(mu)/dt
mamb/r2
```
Either k₁ or k₂ may be fixed arbitrarily. Then the other constant must be found by
experiments in which inertial forces calculated by Eq. (1.1) are compared with
gravitational forces calculated by Eq. (1.2). In SI, k₁ is fixed at unity and k₂ found
experimentally. Equation (1.1) then becomes
```
F =
d
(mu)
dt
(1.5)
```
The force defined by Eq. (1.5) and also used in Eq. (1.2) is called the newton (N).
From Eq. (1.5),
```
1 N = 1 kg.m/s²
(1.6)
```
Constant k₂ is denoted by G and called the gravitational constant. Its recom-
mended value is
```
G = 6.6726 × 10-11 N. m²/kg2
(1.7)
```
#### Work, energy, and power

In SI, both work and energy are measured in newton-meters, a unit called the
joule (J), and so
```
1J=1Nm= 1 kg. m²/s²
(1.8)
```
Power is measured in joules per second, a unit called the watt (W).

#### Heat

The constant k3 in Eq. (1.3) may be fixed arbitrarily. In SI it, like k₁, is set at
unity. Equation (1.3) becomes
```
Qc = W
(1.9)
```
Heat, like work, is measured in joules.

#### Temperature

The quantity pV/m in Eq. (1.4) may be measured in (N/m²)(m³/kg), or J/kg.
With an arbitrarily chosen gas, this quantity can be determined by measuring p
and V of m kg of gas while it is immersed in a thermostat. In this experiment, only
constancy of temperature, not magnitude, is needed. Values of pV/m at various
pressures and at constant temperature can then be extrapolated to zero pressure to
obtain the limiting value required in Eq. (1.4) at the temperature of the thermostat.
For the special situation in which the thermostat contains water at its triple point,
the limiting value is designated by (pV/m). For this experiment Eq. (1.4) gives
```
273.16 = kalim (DV)
m
0
(1.10)
```
For an experiment at temperature TK, Eq. (1.4) can be used to eliminate k₁ from
Eq. (1.10), giving
```
T = 273.16 limpo(pV/m)г
limp-o(pV/m)o
(1.11)
```
Equation (1.11) is the definition of the Kelvin temperature scale from the experi-
mental pressure-volume properties of a real gas.

#### Celsius temperature

In practice, temperatures are expressed on the Celsius scale, in which the zero
point is set at the ice point, defined as the equilibrium temperature of ice and air-
saturated water at a pressure of one atmosphere. Experimentally, the ice point is
found to be 0.01K below the triple point of water, and so it is at 273.15 K. The
Celsius temperature (°C) is defined by
```
T°C = TK - 273.15
(1.12)
```
On the Celsius scale, the experimentally measured temperature of the steam point,
which is the boiling point of water at a pressure of 1 atm, is 100.00°C.

#### Decimal units

In SI, a single unit is defined for each quantity, but named decimal multiples
and submultiples also are recognized. They are listed in Table 1.1. Time may be
expressed in the nondecimal units: minutes (min), hours (h), or days (d).

#### Standard gravity

For certain purposes, the acceleration of free fall in the earth's gravitational
field is used. From deductions based on Eq. (1.2), this quantity, denoted by g, is
nearly constant. It varies slightly with latitude and height above sea level. For pre-
cise calculations, an arbitrary standard g,, has been set, defined by
```
gn = 9.80665* m/s²
(1.13)
```
[Original title: TABLE 1.1
SI and cgs prefixes for multiples and submultiples
[Detailed description: | Factor | Prefix | Abbreviation | Factor | Prefix | Abbreviation |
|---|---|---|---|---|---|
| 10<sup>12</sup> | tera | T | 10<sup>-1</sup> | deci | d |
| 10<sup>9</sup> | giga | G | 10<sup>-2</sup> | centi | c |
| 10<sup>6</sup> | mega | M | 10<sup>-3</sup> | milli | m |
| 10<sup>3</sup> | kilo | k | 10<sup>-6</sup> | micro | μ |
| 10<sup>2</sup> | hecto | h | 10<sup>-9</sup> | nano | n |
| 10<sup>1</sup> | deka | da | 10<sup>-12</sup> | pico | P |
|  |  |  | 10<sup>-15</sup> | femto | f |
|  |  |  | 10<sup>-18</sup> | atto | a |]

#### Pressure units

The natural unit of pressure in SI is the newton per square meter. This unit,
called the pascal (Pa), is inconveniently small, and a multiple, called the bar, also is
used. It is defined by
```
1 bar = 1 × 105 Pa = 1 × 105 N/m²
(1.14)
```
A more common empirical unit for pressure, used with all systems of units, is
the standard atmosphere (atm), defined by
```
1 atm = 1.01325* x 105 Pa = 1.01325 bars
(1.15)
```
### CGS Units

The older cgs system can be derived from SI by making certain arbitrary decisions.
The standard for mass is the gram (g), defined by
```
1 g = 1 x 10-3 kg
(1.16)
```
The standard for length is the centimeter (cm), defined by
```
1 cm = 1 × 10-2 m
(1.17)
```
Standards for time, temperature, and the mole are unchanged.

As in SI, constant k₁ in Eq. (1.1) is fixed at unity. The unit of force is called the
dyne (dyn), defined by
```
1 dyn = 1 g.cm/s²
(1.18)
```
The unit for energy and work is the erg, defined by
```
1 erg = 1 dyn. cm = 1 × 10-7 J
(1.19)
```
Constant k3 in Eq. (1.3) is not unity. A unit for heat, called the calorie (cal), is
used to convert the unit for heat to ergs. Constant 1/k3 is replaced by J, which
denotes the quantity called the mechanical equivalent of heat and is measured in
joules per calorie. Equation (1.3) becomes
```
W = JQc
(1.20)
```
Two calories are defined. The thermochemical calorie (cal), used in chemistry,
chemical engineering thermodynamics, and reaction kinetics, is defined by
```
1 cal = 4.1840* × 107 ergs = 4.1840* J
(1.21)
```
The international steam table calorie (cal₁₁), used in heat power engineering, is de-
fined by
```
1 cal₁т = 4.1868* × 107 ergs = 4.1868* J
(1.22)
```
The calorie is so defined that the specific heat of water is approximately 1 cal/g. °C.
The standard acceleration of free fall in cgs units is
```
&n
= 980.665 cm/s²
(1.23)
```
### FPS Engineering Units

In some countries a nondecimal gravitational unit system has long been used in
commerce and engineering. The system can be derived from SI by making the fol-
lowing decisions.

The standard for mass is the avoirdupois pound (lb), defined by
```
1 lb = 0.45359237* kg
(1.24)
```
The standard for length is the inch (in.), defined as 2.54 cm. This is equivalent
to defining the foot (ft) as
```
1 ft = 2.54 x 12 x 10-2 m = 0.3048* m
(1.25)
```
The standard for time remains the second (s).

The thermodynamic temperature scale is called the Rankine scale, in which
temperatures are denoted by degrees Rankine and defined by
```
1°R =
1
K
1.8
(1.26)
```
The ice point on the Rankine scale is 273.15 × 1.8 = 491.67°R.

The analog of the Celsius scale is the Fahrenheit scale, in which readings are
denoted by degrees Fahrenheit. It is derived from the Rankine scale by setting its
zero point exactly 32°F below the ice point on the Rankine scale, so that
```
T°F = T°R- (491.67-32) = T°R-459.67
(1.27)
```
The relation between the Celsius and Fahrenheit scales is given by the exact equation
```
T°F = 32 + 1.8°C
(1.28)
```
From this equation, temperature differences are related by
```
AT°C = 1.8 AT°F = ΔΤ Κ
(1.29)
```
The steam point is 212.00°F.

#### Pound force

The fps system is characterized by a gravitational unit of force, called the
pound force (lb). The unit is so defined that a standard gravitational field exerts a
force of one pound on a mass of one avoirdupois pound. The standard acceleration
of free fall in fps units is, to five significant figures,
```
8n
9.80665 m/s²
0.3048 m/ft
32.174 ft/s²
(1.30)
```
The pound force is defined by
```
1 lbf = 32.174 lb. ft/s²
(1.31)
```
Then Eq. (1.1) gives
```
d(mu)/dt
F lbf =
lb-ft/s²
32.174
(1.31)
```
Equation (1.1) can also be written with 1/g in place of k₁:
```
d(mu)/dt
F =
8c
(1.32)
```
```
d(mu)/dt
F =
8c
(1.33)
```
Comparison of Eqs. (1.32) and (1.33) shows that to preserve both numerical equal-
ity and consistency of units in these equations, it is necessary to define ge, called
Newton's law proportionality factor for the gravitational force unit, by
```
gc = 32.174 lb. ft/s². lbf
(1.34)
```
The unit for work and mechanical energy in the fps system is the foot-pound
force (ft.lby). Power is measured by an empirical unit, the horsepower (hp), de-
fined by
```
1 hp = 550 ft-lbf/s
(1.35)
```
The unit for heat is the British thermal unit (Btu), defined by the implicit
relation
```
1 Btu/lb. °F = 1 calıT/g °C
(1.36)
```
As in the cgs system, constant k3 in Eq. (1.3) is replaced by 1/J, where J is the me-
chanical equivalent of heat, equal to 778.17 ft lby/Btu.

The definition of the Btu requires that the numerical value of specific heat be
the same in both systems, and in each case the specific heat of water is approxi-
mately 1.0.

[Original title: TABLE 1.2
Values of the gas constant R
[Detailed description: | Temperature | Mass | Energy | R |
|---|---|---|---|
| Kelvins | kg mol | J | 8,314.47 |
|  |  | calır | 1.9859 x 10<sup>3</sup> |
|  |  | cal | 1.9873 x 10<sup>3</sup> |
|  |  | m³-atm | 82.056 x 10<sup>-3</sup> |
|  | g mol | cm³-atm | 82.056 |
| Degrees Rankine | lb mol | Btu | 1.9858 |
|  |  | ft.lby | 1,545.3 |
|  |  | hp.h | 7.8045 x 10<sup>-4</sup> |
|  |  | kWh | 5.8198 x 10<sup>-4</sup> |]

### Gas Constant

If mass is measured in kilograms or grams, constant k₁ in Eq. (1.4) differs from gas
to gas. But when the concept of the mole as a mass unit is used, k₁ can be replaced
by the universal gas constant R, which, by Avogadro's law, is the same for all gases.
The numerical value of R depends only on the units chosen for energy, temperature,
and mass. Then Eq. (1.4) is written
```
pV
lim
p→0 nT
R
(1.37)
```
where n is the number of moles. This equation applies also to mixtures of gases if n
is the total number of moles of all the molecular species that make up the volume V.

The accepted experimental value of R is
```
R = 8.31447 J/K mol = 8.31447 × 107 ergs/K. mol
(1.38)
```
Values of R in other units for energy, temperature, and mass are given in Table 1.2.

Although the mole is defined as a mass in grams, the concept of the mole is eas-
ily extended to other mass units. Thus, the kilogram mole (kg mol) is the usual mo-
lecular or atomic weight in kilograms, and the pound mole (lb mol) is that in
avoirdupois pounds. When the mass unit is not specified, the gram mole (g mol) is
intended. Molecular weight M is a pure number.

Standard molar volume. From Table 1.2, the volume of 1 kg mol of gas at stan-
dard conditions (1 atm, 0°C), is 82.056 x 10<sup>-3</sup> x 273 = 22.4 m³, or 22.4 (L/g mol).
In fps units, the standard volume at 1 atm and 32°F is 359 ft³/lb mol.

### Conversion of Units

Since three unit systems are in common use, it is often necessary to convert the
magnitudes of quantities from one system to another. This is accomplished by using
conversion factors. Only the defined conversion factors for the base units are
required since conversion factors for all other units can be calculated from them.
Interconversions between SI and the cgs system are simple. Both use the same
standards for time, temperature, and the mole, and only the decimal conversions de-
fined by Eqs. (1.16) and (1.17) are needed. Both SI and the fps system also use the
second as the standard for time; the three conversion factors defined for mass,
length, and temperature by Eqs. (1.24), (1.25), and (1.26), respectively, are suffi-
cient for all conversions of units between these two systems.

Example 1.1 demonstrates how conversion factors are calculated from the
exact numbers used to set up the definitions of units in SI and the fps system. In
conversions involving ge in fps units, the use of the exact numerical ratio
9.80665/0.3048 in place of the fps number 32.1740 is recommended to give maxi-
mum precision in the final calculation and to take advantage of possible cancella-
tions of numbers during the calculation.

**EXAMPLE 1.1.** Using only exact definitions and standards, calculate factors for con-
verting (a) newtons to pounds force, (b) British thermal units to IT calories, (c) atmo-
spheres to pounds force per square inch, and (d) horsepower to kilowatts.

**Solution**

(a) From Eqs. (1.6), (1.24), and (1.25),
```
1 lb.ft/s²
1 N = 1 kg.m/s²
2
0.45359237 x 0.3048
```
From Eq. (1.30)
```
1 lb-ft/s²
0.3048
9.80665
lbf
0.3048
```
and so
```
1 N =
=
9.80665 x 0.45359237 × 0.3048
1
9.80665 x 0.45359237
lbf
lbf = 0.224809 lbf
```
In Appendix 1 it is shown that to convert newtons to pound force, one should multiply by
0.224809. Clearly, to convert from pounds force to newtons, multiply by 1/0.224809 =
4.448221.

(b) From Eq. (1.36)
```
1 lb 1°F
1 Btu = 1 calı
1 g 1°C
1 lb 1 kg 1°F
= 1 calit
1 kg 1g 1°C
```
From Eqs. (1.16), (1.24), and (1.29)
```
1 Btu = 1 calı
0.45359237 × 1000
251.996 calit
1.8
```
(c) From Eqs. (1.6), (1.14), and (1.15)
```
1 atm = 1.01325 x 105 kg. m/s² m²
.
```
From Eqs. (1.24), (1.25), and (1.34), since 1 ft = 12 in.,
```
1 atm = 1.01325 × 105 ×
-
1 lb/s² 0.3048
0.45359237 ft
1.01325 × 105 × 0.3048
32.174 x 0.45359237 × 122
= 14.6959 lbf/in.2
```
(d) From Eqs. (1.31) and (1.35)
```
lbf/in.2
1 hp = 550 ft.lb/s = 550 x 32.174 ft². lb/s³
```
Using Eqs. (1.24) and (1.25) gives
```
1 hp = 550 x 32.174 x 0.45359237 × 0.30482
= 745.70 J/s
```
Substituting from Eq. (1.8) and dividing by 1,000,
```
1 hp = 0.74570 kW
```
Although conversion factors may be calculated as needed, it is more efficient to use
tables of the common factors. A table for the factors used in this book is given in
Appendix 1.

### Units and Equations

Although Eqs. (1.1) to (1.4) are sufficient for the description of unit systems, they
are but a small fraction of the equations needed in this book. Many such equations
contain terms that represent properties of substances, and these are introduced as
needed. All new quantities are measured in combinations of units already defined,
and all are expressible as functions of the five base units for mass, length, time, tem-
perature, and the mole.

#### Precision of calculations

In the above discussion, the values of experimental constants are given with the
maximum number of significant digits consistent with present estimates of the pre-
cision with which they are known, and all digits in the values of defined constants
are retained. In practice, such extreme precision is seldom necessary, and defined
and experimental constants can be truncated to the number of digits appropriate to
the problem at hand, although the advent of the digital computers make it possible
to retain maximum precision at small cost. The engineer should use judgment in set-
ting a suitable level of precision for the particular problem to be solved.

#### General equations

Except for the appearance of the proportionality factors go and J, the equations
for all three unit systems are alike. In this text, equations are written for SI units,
with a reminder to use g and J when working examples in cgs or fps units.

#### Dimensionless equations and consistent units

Equations derived directly from the basic laws of the physical sciences consist
of terms that either have the same units or can be written in the same units by using
the definitions of derived quantities to express complex units in terms of the five
base ones. Equations meeting this requirement are called dimensionally homoge-
neous equations. When such an equation is divided by any one of its terms, all units
in each term cancel and only numerical magnitudes remain. These equations are
called dimensionless equations.

A dimensionally homogeneous equation can be used as it stands with any set of
units provided that the same units for the five base units are used throughout. Units
meeting this requirement are called consistent units. No conversion factors are
needed when consistent units are used.

For example, consider the usual equation for the vertical distance Z traversed
by a freely falling body during time t when the initial velocity is up:
```
Z = uot + 18t²
(1.39)
```
Examination of Eq. (1.39) shows that the units in each term reduce to that for
length. Dividing the equation by Z gives
```
uot
1=+ 1 =
Z
gt2
2Z
(1.40)
```
A check of each term in Eq. (1.40) shows that the units in each term cancel and each
term is dimensionless. A combination of variables for which all dimensions cancel in
this manner is called a dimensionless group. The numerical value of a dimensionless
group for given values of the quantities contained in it is independent of the units
used, provided they are consistent. Both terms on the right-hand side of Eq. (1.40)
are dimensionless groups.

#### Dimensional equations

Equations derived by empirical methods, in which experimental results are cor-
related by empirical equations without regard to dimensional consistency, usually
are not dimensionally homogeneous and contain terms in several different units.
Equations of this type are dimensional equations, or dimensionally nonhomoge-
neous equations. In these equations there is no advantage in using consistent units,
and two or more length units, for example, inches and feet, or two or more time
units, for example, seconds and minutes, may appear in the same equation. For ex-
ample, a formula for the rate of heat loss from a horizontal pipe to the atmosphere
by conduction and convection is
```
9
= 0.50
A
ΔΤ 1.25
(D)0.25
(1.41)
```
where
q = rate of heat loss, Btu/h
A = area of pipe surface, ft²
ΔΤ = excess of temperature of pipe wall over that of ambient
(surrounding atmosphere), °F
D = outside diameter of pipe, in.

Obviously, the units of q/A are not those of the right-hand side of Eq. (1.41),
and the equation is dimensional. Quantities substituted in Eq. (1.41) must be ex-
pressed in the units as given, or the equation will give the wrong answer. If other
units are to be used, the coefficient must be changed. To express AT in degrees
Celsius, for example, the numerical coefficient must be changed to 0.50 x 1.8<sup>1.25</sup> =


## Option 5: Using MinerU

# Extracting Metadata

# Chunking by Langchain (Should use semantic chunking)

In [None]:
!pip install langchain

Collecting langchain
  Downloading langchain-0.2.14-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.3.0,>=0.2.32 (from langchain)
  Downloading langchain_core-0.2.35-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.104-py3-none-any.whl.metadata (13 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.32->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting httpx<1,>=0.23.0 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading orjson-3.10.7-cp310-cp31

In [None]:
from langchain.text_splitter import MarkdownTextSplitter

markdown_splitter = MarkdownTextSplitter(chunk_size=100, chunk_overlap=0)
docs = markdown_splitter.create_documents([markdown])

In [None]:
docs

[Document(page_content='# CHAPTER 1\n## Definitions and Principles'),
 Document(page_content='Chemical engineering has to do with industrial processes in which raw materials'),
 Document(page_content='are changed or separated into useful products. The chemical engineer must develop,'),
 Document(page_content='design, and engineer both the complete process and the equipment used; choose the'),
 Document(page_content='proper raw materials; operate the plants efficiently, safely, and economically; and'),
 Document(page_content='see to it that products meet the requirements set by the customers. Chemical engi-'),
 Document(page_content='neering is both an art and a science. Whenever science helps the engineer to solve a'),
 Document(page_content='problem, science should be used. When, as is usually the case, science does not give'),
 Document(page_content='a complete answer, it is necessary to use experience and judgment. The professional'),
 Document(page_content='stature of an engineer d

# Attach Metadata with each chunk

# Embedding Text Chunks with using OpenAI (Don't chunk metadata)

In [None]:
!pip uninstall openai
!pip install openai
!pip install --upgrade openai

Found existing installation: openai 1.42.0
Uninstalling openai-1.42.0:
  Would remove:
    /usr/local/bin/openai
    /usr/local/lib/python3.10/dist-packages/openai-1.42.0.dist-info/*
    /usr/local/lib/python3.10/dist-packages/openai/*
Proceed (Y/n)? Y
  Successfully uninstalled openai-1.42.0
Collecting openai
  Using cached openai-1.42.0-py3-none-any.whl.metadata (22 kB)
Using cached openai-1.42.0-py3-none-any.whl (362 kB)
Installing collected packages: openai
Successfully installed openai-1.42.0


In [None]:
# Import the OpenAI library
import openai
from google.colab import userdata

# Set your OpenAI API key
openai.api_key = #insertyourkey
EMBEDDING_MODEL = "text-embedding-3-small"

def get_embedding(text):
    """Generate an embedding for the given text using OpenAI's API."""

    # Check for valid input
    if not text or not isinstance(text, str):
        return None

    try:
        # Call OpenAI API to get the embedding
        embedding = openai.embeddings.create(input=text, model=EMBEDDING_MODEL).data[0].embedding
        return embedding
    except Exception as e:
        print(f"Error in get_embedding: {e}")
        return None

# Prepare your text chunks
context = [row.page_content for row in docs]

# Generate embedding for each chunk
embeddings = [get_embedding(text) for text in context]


Error in get_embedding: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
Error in get_embedding: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
Error in get_embedding: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quot

# Vector database setup for storing embedding chunk and metadata using ChromeDB

# Embedding Queries/ Retrieval K-Nearest Neighbors (KNN) Semantic Search

Embedding Documents and Queries, and Indexing with KNN

# Retrieve Metadata (Enhancing metadata with ranking - optional)

# Fine Tuned Phi3 for generating responses

# Prompt Template and Integration of Phi3 and RAG

# Testing and Evaluation