# Pipeline 3: A RAG Pinecone vector store for Generative AI

copyright 2024, Denis Rothman



#Installing the environment

In [None]:
#API Key
#Store you key in a file and read it(you can type it directly in the notebook but it will be visible for somebody next to you)
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install openai==1.33.0
!pip install pinecone-client==4.1.1

Collecting openai==1.33.0
  Downloading openai-1.33.0-py3-none-any.whl (325 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m325.5/325.5 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai==1.33.0)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai==1.33.0)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai==1.33.0)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
Successfully instal

In [None]:
f = open("drive/MyDrive/files/pinecone.txt", "r")
PINECONE_API_KEY=f.readline()
f.close()

In [None]:
f = open("drive/MyDrive/files/api_key.txt", "r")
API_KEY=f.readline()
f.close()

In [None]:
#The OpenAI Key
import os
import openai
os.environ['OPENAI_API_KEY'] =API_KEY
openai.api_key = os.getenv("OPENAI_API_KEY")

#  The Pinecone index

In [None]:
import os
from pinecone import Pinecone, ServerlessSpec

# initialize connection to pinecone (get API key at app.pinecone.io)
api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'

from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key=PINECONE_API_KEY)

In [None]:
from pinecone import ServerlessSpec

index_name = 'bank-index-2'
cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'

spec = ServerlessSpec(cloud=cloud, region=region)

In [None]:
import time
import pinecone
# check if index already exists (it shouldn't if this is first time)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=1536,  # dimensionality of text-embedding-ada-002
        metric='cosine',
        spec=spec
    )
    # wait for index to be initialized
    time.sleep(1)

# connect to index
index = pc.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1017581}},
 'total_vector_count': 1017581}

# RAG with GPT-4o

# Query the dataset

In [None]:
import openai
import time

embedding_model="text-embedding-ada-002"

# Initialize the OpenAI client
client = openai.OpenAI()

def get_embedding(text, model=embedding_model):
    text = text.replace("\n", " ")
    response = client.embeddings.create(input=[text], model=model)
    embedding = response.data[0].embedding
    return embedding

## Querying a target vector

In [None]:
import time
start_time = time.time()  # Start timing before the request
# Target vector
query_text = "Customer Paulsen CreditScore 619Age 42Tenure 2Balance 0.0NumOfProducts 1HasCrCard 1IsActiveMember 1EstimatedSalary 101348.88Exited 1Complain 1Satisfaction Score 2Card Type DIAMONDPoint Earned 464"
query_embedding = get_embedding(query_text,model=embedding_model)

In [None]:
# Perform the query using the embedding
query_results = index.query(vector=query_embedding, top_k=5, include_metadata=True)  # Request metadata

# Print the query results along with metadata
print("Query Results:")
for match in query_results['matches']:
    print(f"ID: {match['id']}, Score: {match['score']}")
    if 'metadata' in match and 'text' in match['metadata']:
        print(f"Text: {match['metadata']['text']}")
    else:
        print("No metadata available.")

response_time = time.time() - start_time              # Measure response time
print(f"Querying response time: {response_time:.2f} seconds")  # Print response time

Query Results:
ID: 753144, Score: 0.941335917
Text: CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 114323.66 Exited: 0 Complain: 0 Satisfaction Score: 5 Card Type: DIAMOND Point Earned: 222
ID: 773144, Score: 0.941335917
Text: CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 114323.66 Exited: 0 Complain: 0 Satisfaction Score: 5 Card Type: DIAMOND Point Earned: 222
ID: 463144, Score: 0.941335917
Text: CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 114323.66 Exited: 0 Complain: 0 Satisfaction Score: 5 Card Type: DIAMOND Point Earned: 222
ID: 453144, Score: 0.941335917
Text: CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 1143

In [None]:
query_results

{'matches': [{'id': '753144',
              'metadata': {'text': 'CustomerId: 15722061 CreditScore: 619 Age: '
                                   '41 Tenure: 8 Balance: 142015.76 '
                                   'NumOfProducts: 2 HasCrCard: 1 '
                                   'IsActiveMember: 0 EstimatedSalary: '
                                   '114323.66 Exited: 0 Complain: 0 '
                                   'Satisfaction Score: 5 Card Type: DIAMOND '
                                   'Point Earned: 222'},
              'score': 0.941335917,
              'values': []},
             {'id': '773144',
              'metadata': {'text': 'CustomerId: 15722061 CreditScore: 619 Age: '
                                   '41 Tenure: 8 Balance: 142015.76 '
                                   'NumOfProducts: 2 HasCrCard: 1 '
                                   'IsActiveMember: 0 EstimatedSalary: '
                                   '114323.66 Exited: 0 Complain: 0 '
               

## Extract Relevant Texts

In [None]:
relevant_texts = [match['metadata']['text'] for match in query_results['matches'] if 'metadata' in match and 'text' in match['metadata']]

# Join all items in the list into a single string separated by a specific delimiter (e.g., a newline or space)
combined_text = '\n'.join(relevant_texts)  # Using newline as a separator for readability
print(combined_text)

CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 114323.66 Exited: 0 Complain: 0 Satisfaction Score: 5 Card Type: DIAMOND Point Earned: 222
CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 114323.66 Exited: 0 Complain: 0 Satisfaction Score: 5 Card Type: DIAMOND Point Earned: 222
CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 114323.66 Exited: 0 Complain: 0 Satisfaction Score: 5 Card Type: DIAMOND Point Earned: 222
CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 114323.66 Exited: 0 Complain: 0 Satisfaction Score: 5 Card Type: DIAMOND Point Earned: 222
CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 N

## Augmented prompt

In [None]:
# Combine texts into a single string, separated by new lines
combined_context = "\n".join(relevant_texts)
#prompt
query_prompt="I have this customer bank record with interesting information on age, credit score and more and similar customers. What could I suggest to keep them in my bank in an email with an url to get new advantages based on the fields for each Customer ID:"
itext=query_prompt+ query_text+combined_context
# Augmented input
print("Prompt for the Generative AI model:", itext)

Prompt for the Generative AI model: I have this customer bank record with interesting information on age, credit score and more and similar customers. What could I suggest to keep them in my bank in an email with an url to get new advantages based on the fields for each Customer ID:Customer Paulsen CreditScore 619Age 42Tenure 2Balance 0.0NumOfProducts 1HasCrCard 1IsActiveMember 1EstimatedSalary 101348.88Exited 1Complain 1Satisfaction Score 2Card Type DIAMONDPoint Earned 464CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 114323.66 Exited: 0 Complain: 0 Satisfaction Score: 5 Card Type: DIAMOND Point Earned: 222
CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Balance: 142015.76 NumOfProducts: 2 HasCrCard: 1 IsActiveMember: 0 EstimatedSalary: 114323.66 Exited: 0 Complain: 0 Satisfaction Score: 5 Card Type: DIAMOND Point Earned: 222
CustomerId: 15722061 CreditScore: 619 Age: 41 Tenure: 8 Bala

## Augmented generation

In [None]:
from openai import OpenAI
client = OpenAI()
gpt_model = "gpt-4o"

import time
start_time = time.time()  # Start timing before the request

response = client.chat.completions.create(
  model=gpt_model,
  messages=[
    {
      "role": "system",
      "content": "You are the community manager can write engaging email based on the text you have. Do not use a surname but simply Dear Valued Customer instead."
    },
    {
      "role": "user",
      "content": itext
    }
  ],
  temperature=0,
  max_tokens=300,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)
print(response.choices[0].message.content)

response_time = time.time() - start_time              # Measure response time
print(f"Querying response time: {response_time:.2f} seconds")  # Print response time

Subject: Exclusive Benefits Await You at Our Bank!

Dear Valued Customer,

We hope this email finds you well. As a cherished member of our banking family, we are always looking for ways to enhance your experience and provide you with the best possible services.

We noticed that you have been with us for quite some time and have a DIAMOND card with impressive points earned. To show our appreciation for your loyalty, we are excited to offer you exclusive benefits tailored just for you!

Here are some of the exciting advantages you can now enjoy:

1. **Enhanced Rewards Program**: Earn more points on every transaction and redeem them for exciting gifts and offers.
2. **Personalized Financial Advice**: Get access to our expert financial advisors to help you make the most of your investments and savings.
3. **Exclusive Discounts**: Enjoy special discounts on various products and services, curated just for our DIAMOND cardholders.
4. **Priority Customer Service**: Experience faster and more e