In [1]:
# Install necessary libraries
%pip install openai numpy scikit-learn PyPDF2

Note: you may need to restart the kernel to use updated packages.


In [42]:
import os
from openai import OpenAI
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import PyPDF2

OPENAI_API_KEY=""

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)

In [6]:
# Function to read text from a PDF
def read_pdf(file_path):
    text = ""
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text += page.extract_text()
    return text

# Load text from a PDF
pdf_text = read_pdf("path_to_pdf_file.pdf")

In [18]:
print(f"Text length:\n{len(pdf_text)}\n")
print(f"First 100 char:\n{pdf_text[:100]}")

Text length:
1952

First 100 char:
Quarintium Drift in Quantum Computing
In the field of quantum computing, researchers have recently d


In [34]:
# Function to split text into chunks with overlap
def split_text(text, chunk_size=400, overlap=20):
    chunks = []
    for i in range(0, len(text), chunk_size - overlap):
        chunk = text[i:i + chunk_size]
        if len(chunk) > 0:
            chunks.append(chunk)
    return chunks

# Split text into chunks
chunks = split_text(pdf_text)

In [35]:
print(f"Chunks length:\n{len(chunks)}\n")
print(f"First chunk:\n{chunks[0]}")

Chunks length:
6

First chunk:
Quarintium Drift in Quantum Computing
In the field of quantum computing, researchers have recently discovered a phenomenon they call
Quarintium Drift. This phenomenon occurs when particles within a quantum system experience an
unexpected shift in their state, resulting in unpredictable outcomes during computation. The
Quarintium Drift has perplexed scientists due to its seemingly random nature, ye


In [57]:
# Function to create embeddings using OpenAI API
def create_embeddings(chunks):
    embeddings = []
    for chunk in chunks:
        response = client.embeddings.create(
            input=chunk,
            model="text-embedding-ada-002"  # Choose the appropriate embedding model
        )
        embeddings.append(response.data[0].embedding)
    return np.array(embeddings)

# Create embeddings for the chunks
chunk_embeddings = create_embeddings(chunks)

In [67]:
# Function to perform vector search
def vector_search(query, chunk_embeddings, chunks, top_k):
    query_embedding = create_embeddings([query])[0]
    similarities = cosine_similarity([query_embedding], chunk_embeddings)[0]
    top_k_indices = similarities.argsort()[-top_k:][::-1]
    return [chunks[i] for i in top_k_indices]

# Example query
query = "what is Quarintium Drift in Quantum Computing?"

# Perform vector search
top_k = 1
relevant_chunks = vector_search(query, chunk_embeddings, chunks, top_k)

In [74]:
# Generate response using the language model
context = "\n".join(relevant_chunks)
prompt=[
    {
    "role": "system",
    "content": [
        {
        "type": "text",
        "text": "You are an AI assistant that provides answer to the user's query solely based on the provided content."
        }
    ]
    },
    {
    "role": "user",
    "content": [
        {
        "type": "text",
        "text": f"Based on the following information:\n{context}\n\nAnswer the following question:\n{query}"
        }
    ]
    },
]
print(f"Prompt:\n{prompt}\n")

Prompt:
[{'role': 'system', 'content': [{'type': 'text', 'text': "You are an AI assistant that provides answer to the user's query solely based on the provided content."}]}, {'role': 'user', 'content': [{'type': 'text', 'text': 'Based on the following information:\nQuarintium Drift in Quantum Computing\nIn the field of quantum computing, researchers have recently discovered a phenomenon they call\nQuarintium Drift. This phenomenon occurs when particles within a quantum system experience an\nunexpected shift in their state, resulting in unpredictable outcomes during computation. The\nQuarintium Drift has perplexed scientists due to its seemingly random nature, ye\n\nAnswer the following question:\nwhat is Quarintium Drift in Quantum Computing?'}]}]



In [77]:
response = client.chat.completions.create(
    model="gpt-4o",  # Replace with the appropriate model name
    messages=prompt,
    max_tokens=150
)

In [90]:
def format_text(text, max_words_per_line=15):
    words = text.split()
    lines = []
    for i in range(0, len(words), max_words_per_line):
        line = " ".join(words[i:i + max_words_per_line])
        lines.append(line)
    return "\n".join(lines)

response_text = response.choices[0].message.content
print(format_text(response_text))

Quarintium Drift in Quantum Computing is a phenomenon where particles within a quantum system experience
an unexpected shift in their state, leading to unpredictable outcomes during computation. It is characterized
by its seemingly random nature, perplexing scientists.
