In [3]:
from utils import read_file, divide_docs, recover_doc, calculate_similarity
import oci
import pandas as pd
from numpy import array, max
import yaml


# Embedding a document in a vector space

In [4]:
config = oci.config.from_file('config', "DEFAULT") # Caminho do arquivo de configuração
endpoint = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"

In [5]:
generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config=config,
                                                                                         service_endpoint=endpoint,
                                                                                         retry_strategy=oci.retry.NoneRetryStrategy(), 
                                                                                         timeout=(10,240)) # Configuração do client

In [6]:
# load yaml file with secrets using pyyaml
with open("secrets.yaml", 'r') as stream:
	try:
		secret = yaml.safe_load(stream)
	except yaml.YAMLError as exc:
		print(exc)

In [7]:
def embed_query(query, generative_ai_inference_client): 
	embed_text_detail = oci.generative_ai_inference.models.EmbedTextDetails()
	embed_text_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id="cohere.embed-english-v3.0") # Modelo a ser utilizado
	embed_text_detail.inputs = query
	embed_text_detail.input_type = "SEARCH_QUERY" # Tipo de input, no caso, uma query
	embed_text_detail.truncate = "NONE"
	embed_text_detail.compartment_id = secret['compartment_id']
  
	embed_text_response = generative_ai_inference_client.embed_text(embed_text_detail)
	return embed_text_response.data.embeddings

In [8]:
def add_chunks_to_df(df_docs, doc_title, doc_text):
    '''
    Function to add document's chunks to the dataframe
    '''
    chunks = divide_docs(doc_text)
    n_chunk = len(chunks)
    embeds = embed_query(chunks, generative_ai_inference_client)
    for i in range(n_chunk):
        df_docs.loc[len(df_docs.index)] = [doc_title, chunks[i], embeds[i], i+1]
    return df_docs



In [9]:
# list all docs in docs folder and process them
import os

docs_folder = "../docs/"
df_docs = pd.DataFrame([], columns=['doc_title', 'doc_text', 'doc_embed', 'n_chunk'])

for file in os.listdir(docs_folder):
    if file.endswith(".txt"):
        doc = read_file(docs_folder + file)
        df_docs = add_chunks_to_df(df_docs, file, doc)


In [10]:
df_docs

Unnamed: 0,doc_title,doc_text,doc_embed,n_chunk
0,OML Services - Deploy.txt,Deploy Oracle Machine Learning Models\nOracle ...,"[-0.009811401, -0.033966064, -0.035125732, -0....",1
1,OML Services - Deploy.txt,ust send this token in the Authorization heade...,"[0.01676941, -0.015113831, 5.930662e-05, -0.06...",2
2,OML Services - Deploy.txt,ment of a deployed model endpoint with URI km_...,"[-0.0005927086, -0.069885254, 0.01576233, -0.0...",3
3,Overview of Generative AI Service.txt,Overview of Generative AI Service\nOracle Clou...,"[-0.018096924, -0.047912598, -0.012390137, -0....",1
4,Overview of Generative AI Service.txt,which the fine-tuned model is derived from.\n\...,"[-0.017562866, -0.026641846, -0.02432251, -0.0...",2
5,Overview of Generative AI Service.txt,"s, for example:\n\nEvaluate a list of question...","[0.00844574, -0.033172607, -0.008522034, -0.04...",3
6,Pretrained Foundational Models in Generative A...,Pretrained Foundational Models in Generative A...,"[-0.009719849, -0.021118164, -0.014373779, -0....",1
7,Pretrained Foundational Models in Generative A...,ish or multilingual.\nModel creates a 384-dime...,"[-0.04537964, -0.012008667, -0.033569336, -0.0...",2
8,Pretrained Image Analysis Models.txt,Pretrained Image Analysis Models\nVision provi...,"[0.059417725, 0.0031318665, 0.008033752, -0.02...",1
9,Pretrained Image Analysis Models.txt,the objects and identifies them.\n\nVision pr...,"[0.010192871, -0.003168106, -0.031311035, -0.0...",2


In [11]:
print(recover_doc(df_docs,'Pretrained Image Analysis Models.txt'))

Pretrained Image Analysis Models
Vision provides pretrained image analysis AI models that let you to find and tag objects, text, and entire scenes in images.

Pretrained models let you use AI with no data science experience. Provide an image to the Vision service and get back information about the objects, text, scenes, and any faces in the image without having to create your own model.

Use Cases
Here are several use cases for pretrained image analysis models.

Digital asset management
Tag digital media-like images for better indexing and retrieval.
Scene monitoring
Detect if items are on retail shelves, vegetation is growing in the surveillance image of a power line, or if trucks are available at a lot for delivery or shipment.
Face detection
Privacy: Hide identities by adding a blur to the image using face location information returned through the face detection feature.
Prerequisite for Biometrics: Use the facial quality score to decide if a face is clear and unobstructed.
Digital 

# Measuring the similarity between query and docs

In [12]:
query = "Please help me with iot stuff"

In [13]:
# embed query and calculate similarity
query_embedding = embed_query([query], generative_ai_inference_client)
embeds_array = array(df_docs['doc_embed'])
similarities = [calculate_similarity(query_embedding, embedding) for embedding in embeds_array]

In [14]:
# return the most similar document
max_similarity = max(similarities)
most_similar_doc = df_docs.iloc[similarities.index(max_similarity)]
print(most_similar_doc['doc_title'], "Similaty Score:", round(max_similarity,3))
recover_doc(df_docs, most_similar_doc['doc_title'])

Speech Overview.txt Similaty Score: 0.289


"Speech Overview\nYou can use the Speech service to convert media files to readable text that's stored in JSON and SRT format.\n\nSpeech harnesses the power of spoken language enabling you to easily convert media files containing human speech into highly exact text transcriptions. The service is an Oracle Cloud Infrastructure (OCI) native application that you can access using the Console, REST API, CLI, and SDK. In addition, you can use the Speech service in a Data Science notebook session.\n\nSpeech uses automatic speech recognition (ASR) technology to provide a grammatically correct transcription. Speech handles low-fidelity media recordings and transcribes challenging recordings such as meetings or call centers calls. Using Speech, you can turn files stored in Object Storage or a data asset into exact, normalized, timestamped, and profanity-filtered text. This functionality is only available with the Speech. For example, you could index the output of speech (a text file) using Data 

# LLM Prompting

In [15]:
def generate_text(prompt, model_id, generative_ai_inference_client = generative_ai_inference_client):
    '''
    Function to generate text based on a prompt and a LLM
    '''
    generate_text_detail = oci.generative_ai_inference.models.GenerateTextDetails()
    llm_inference_request = oci.generative_ai_inference.models.CohereLlmInferenceRequest()
    llm_inference_request.prompt = prompt
    llm_inference_request.max_tokens = 1024
    llm_inference_request.temperature = 0.11
    llm_inference_request.frequency_penalty = 0
    llm_inference_request.top_p = 0.2

    generate_text_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id)
    generate_text_detail.inference_request = llm_inference_request
    generate_text_detail.compartment_id = secret['compartment_id']
    generate_text_response = generative_ai_inference_client.generate_text(generate_text_detail)

    return generate_text_response.data.inference_response.generated_texts[0].text

In [16]:
model_id = "ocid1.generativeaimodel.oc1.us-chicago-1.amaaaaaask7dceyafhwal37hxwylnpbcncidimbwteff4xha77n5xz4m7p6a"
prompt = "Please help me with iot stuff"
ans = generate_text(prompt, model_id)

In [17]:
print(ans)

 I'd be happy to help you with Internet of Things (IoT) related concepts and questions. Feel free to ask specific questions or share the area of IoT you'd like to explore further. Here's a general overview of IoT to get us started:

The Internet of Things (IoT) refers to the interconnection of devices, vehicles, buildings, and other physical objects that are embedded with electronics, software, sensors, and network connectivity, enabling them to collect and exchange data. This network of physical objects, connected to the internet, can communicate and collaborate with each other and with human users via smart devices.

Here are some key aspects and components of IoT:

1. **Devices**: These are the physical objects or systems that are connected to the internet. Examples include smart home devices like light bulbs, thermostats, and security cameras; industrial sensors and equipment; healthcare devices like wearables and medical devices; and many more. These devices often have embedded se

# RAG Assistant

In [18]:
def get_knowledge(query, df_docs, generative_ai_inference_client, 
                  threshold=0.45, verbose=False):
    '''
    Function to get the most similar document to a query
    '''
    query_embedding = embed_query([query], generative_ai_inference_client)
    embeds_array = array(df_docs['doc_embed'])
    similarities = [calculate_similarity(query_embedding, embedding) for embedding in embeds_array]
    max_similarity = max(similarities)
    most_similar_doc = df_docs.iloc[similarities.index(max_similarity)]
    if verbose:
        print(most_similar_doc['doc_title'], "Similaty Score:", round(max_similarity,3))
    if max_similarity < threshold:
        return None
    else:
        return recover_doc(df_docs, most_similar_doc['doc_title'])


In [19]:
def get_answer(query, df_docs, model_id, generative_ai_inference_client, 
               threshold=.45, verbose=False):
    '''
    Function to get the answer to a query
    '''
    most_similar_doc = get_knowledge(query, df_docs, generative_ai_inference_client,
                                     threshold, verbose)
    if most_similar_doc is None:
        return "I'm sorry, I don't have the information in my database."
    else:
        prompt = most_similar_doc + "\n\n" + query
        return generate_text(prompt, model_id)

In [24]:
query = "Which services are avaliable in the OCI vision API?"

In [25]:
print(get_answer(query, df_docs, model_id, generative_ai_inference_client, 
                 threshold=0.4, verbose=True))

Vision Service Overview.txt Similaty Score: 0.633
 The Oracle Cloud Infrastructure (OCI) Vision API provides two main categories of services: Document AI and Image Analysis. 

Document AI:
- Optical Character Recognition (OCR)
- Document Classification
- Language Classification
- Table Extraction
- Key-Value Extraction

Image Analysis:
- Object Detection
- Image Classification
- Face Detection
- Custom Object Detection
- Custom Image Classification

It's important to note that according to the provided documentation, some of these features are set to be moved to a new service called Document Understanding, and will only be available in Vision until January 1, 2024. After this date, they will be available exclusively in Document Understanding. 

Please refer to the official Oracle Cloud Infrastructure Vision API documentation for the most up-to-date information regarding available services and any service changes. 

Would you like me to help you with anything else regarding the OCI Visi