In [None]:
!pip install pymilvus openai langchain faiss-cpu python-docx pdfplumber tiktoken sentence-transformers

In [64]:
!pip install pymilvus



In [None]:
!pip install -U langchain-community

In [None]:
!pip install python-dotenv

In [None]:
!pip install langchain-openai

In [1]:
!pip freeze > requirements.txt

In [51]:
import os
from pymilvus import (connections, MilvusClient, utility)
from dotenv import load_dotenv

In [54]:

#/var/tmp/.env
load_dotenv()  # Loads from .env file by default
api_key = os.getenv("GOOGLE_API_KEY")
debug = os.getenv("DEBUG")
pinecone_api_key=os.getenv("PINECONE_API_KEY")
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
MILVUS_USER=os.getenv("MILVUS_USERNAME")
MILVUS_PASSWORD = os.getenv("MILVUS_PASSWORD")
MILVUS_URI = os.getenv("MILVUS_URI")
MILVUS_API_TOKEN = os.getenv("MILVUS_API_TOKEN")

In [56]:
import time
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Milvus
# Removed direct import: from langchain_community.retrievers import MMRRetriever
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from pymilvus import connections, utility
from docx import Document
import os
import pdfplumber # Import pdfplumber

# --- Step 1: Load & Chunk Text ---
pdf_path = r"C:\Gajanan\data\Generative AI for Cloud Solutions Architect modern AI LLMs in secure, scalable, and ethical cloud environments (Anurag Karuparti) (Z-Library).pdf"
full_text = ""
try:
    # Use pdfplumber to extract text from the PDF
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            # Extract text from each page and append to full_text
            page_text = page.extract_text()
            if page_text: # Ensure text was extracted from the page
                full_text += page_text + "\n" # Add a newline between pages
    if not full_text.strip(): # Check if any text was actually extracted
         print("Warning: No text extracted from the PDF. Check if the PDF contains extractable text.")
         full_text = "Placeholder text as no text could be extracted from the PDF." # Use placeholder if extraction fails
    print(full_text)
except FileNotFoundError:
    print(f"Error: PDF file not found at the specified path: {pdf_path}")
    full_text = "Placeholder text because the PDF file was not found. Please replace with actual PDF content extraction."
except Exception as e:
    print(f"Error processing PDF file {pdf_path}: {e}")
    full_text = "Placeholder text due to an error during PDF processing."











Generative AI for Cloud Solutions
Architect modern AI LLMs in secure, scalable,
and ethical cloud environments
Paul Singh
Anurag Karuparti
Generative AI for Cloud Solutions
Copyright © 2024 Packt Publishing
All rights reserved. No part of this book may be reproduced, stored in a retrieval system, or transmitted
in any form or by any means, without the prior written permission of the publisher, except in the case
of brief quotations embedded in critical articles or reviews.
Every effort has been made in the preparation of this book to ensure the accuracy of the information
presented. However, the information contained in this book is sold without warranty, either express
or implied. Neither the authors, nor Packt Publishing or its dealers and distributors, will be held liable
for any damages caused or alleged to have been caused directly or indirectly by this book.
Packt Publishing has endeavored to provide trademark information about all of the companies and
products mentioned in this 

In [57]:
print(len(full_text))

591891


In [58]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=300)
chunks = text_splitter.split_text(full_text)
sum_chunk_total_chars = 0
for chunk in chunks:
    #print(len(chunk))
    sum_chunk_total_chars +=len(chunk)

print(f"Total chunks: {len(chunks)}")

print("sum_chunk_total_chars",sum_chunk_total_chars)
# --- Step 2: Embedding ---
# Ensure 'YOUR_OPENAI_API_KEY' is replaced with an actual valid key or an environment variable
openai_api_key = OPENAI_KEY# Use environment variable or replace
if openai_api_key == OPENAI_KEY:
    print("Warning: Replace 'YOUR_OPENAI_API_KEY' with your actual key or set the OPENAI_API_KEY environment variable.")

embeddings = OpenAIEmbeddings(model="text-embedding-3-small", api_key=openai_api_key,disallowed_special=()) # Use the variable


Total chunks: 220
sum_chunk_total_chars 648348


In [59]:
# --- Step 3: Connect to Milvus and Store Data ---
# Helper to create Milvus index
def store_milvus(chunks, index_name, metric_type, index_type):
    if utility.has_collection(index_name):
        utility.drop_collection(index_name)
    # Ensure the chunks are not empty before creating the collection
    if not chunks:
        print(f"Warning: No chunks to store for index '{index_name}'. Skipping collection creation.")
        return None

    return Milvus.from_texts(
        chunks,
        embedding=embeddings,
        collection_name=index_name,
        connection_args={"uri": "https://in03-c8ed7ba112ba348.serverless.gcp-us-west1.cloud.zilliz.com",
        "token": MILVUS_API_TOKEN},
        index_params={"metric_type": metric_type, "index_type": index_type})


In [65]:
print(Milvus.__doc__)


.. deprecated:: 0.2.0 Use ``:class:`~langchain_milvus.MilvusVectorStore``` instead. It will not be removed until langchain-community==1.0.

`Milvus` vector store.

You need to install `pymilvus` and run Milvus.

See the following documentation for how to run a Milvus instance:
https://milvus.io/docs/install_standalone-docker.md

If looking for a hosted Milvus, take a look at this documentation:
https://zilliz.com/cloud and make use of the Zilliz vectorstore found in
this project.

IF USING L2/IP metric, IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA.

Args:
    embedding_function (Embeddings): Function used to embed the text.
    collection_name (str): Which Milvus collection to use. Defaults to
        "LangChainCollection".
    collection_description (str): The description of the collection. Defaults to
        "".
    collection_properties (Optional[dict[str, any]]): The collection properties.
        Defaults to None.
        If set, will override collection existing properties.
   

In [None]:
milvus_flat = None # Initialize vectorstore variables
milvus_hnsw = None
milvus_ivf = None
milvus_connected = False # Flag to indicate if Milvus connection was successful

# Ensure Milvus server is running and accessible at localhost:19530
try:
    #connections.connect("default", host="localhost", port="19530")
    # Connect to Zilliz cloud using endpoint URI and API key TOKEN.
    #CLUSTER_ENDPOINT="https://in03-xxxx.api.gcp-us-west1.zillizcloud.com:443"
    #CLUSTER_ENDPOINT="https://in03-8bc9fd463236b1a.api.gcp-us-west1.zillizcloud.com:443"
    CLUSTER_ENDPOINT = "https://in03-c8ed7ba112ba348.serverless.gcp-us-west1.cloud.zilliz.com"

    connections.connect(
    alias='default',
    uri=CLUSTER_ENDPOINT,
    token=MILVUS_API_TOKEN

    )
    print("Connected. Existing collections:", utility.list_collections())

    # Check if the server is ready and get collection name.
    print(f"Type of server: {utility.get_server_version()}")
    print("Connected to Milvus")
    milvus_connected = True # Set flag to True on successful connection

    # Create three indexes ONLY if co
    # Create three indexes ONLY if connection is successful
    #milvus_flat = store_milvus(chunks,"flat_index", "L2", IndexType.FLAT)
    milvus_hnsw = store_milvus(chunks, "hnsw_index", "COSINE", "HNSW")
    milvus_ivf = store_milvus(chunks, "ivf_index", "COSINE", "IVF_FLAT")
except Exception as e:
    print(f"Error connecting to Milvus or storing data: {e}")
    # The vectorstore variables will remain None if connection fails

Connected to Milvus


In [67]:
# --- Step 4: Retriever + Timer + Accuracy ---
def test_retriever(name, vectorstore, query):
    if vectorstore is None: # Handle cases where collection wasn't created
        print(f"Skipping {name} retriever test as the collection was not created.")
        return []

    # Use search_type argument for different retriever types
    # Ensure the vectorstore object has the as_retriever method
    if hasattr(vectorstore, 'as_retriever'):
        if name == "MMR":
            retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5, "lambda_mult": 0.5})
        else:
            retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

        start = time.time()
        # Ensure retriever returns list of Documents; handle potential errors
        try:
            docs = retriever.get_relevant_documents(query)
        except Exception as e:
            print(f"Error during {name} retrieval: {e}")
            docs = []
            return docs

        end = time.time()
        print(f"{name} Retriever Time: {end - start:.4f}s")
        # Calculate accuracy if docs are not empty
        # Note: Simple substring match for accuracy is basic;
        # more sophisticated methods are needed for robust evaluation.
        if docs:
            # Ensure document objects have 'page_content' attribute
            hits = sum(1 for d in docs if hasattr(d, 'page_content') and query.lower() in d.page_content.lower())
            accuracy = hits / len(docs)
            print(f"{name} Accuracy: {accuracy:.2%}")
        else:
             print(f"{name} Accuracy: N/A (no documents retrieved)")

        return docs
    else:
        print(f"Error: Vectorstore object for {name} does not have 'as_retriever' method.")
        return []


In [75]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from docx import Document

def run_prompt_tuning_pipeline(query, milvus_flat, milvus_hnsw, milvus_ivf, milvus_connected, openai_api_key):
    docs_flat, docs_hnsw, docs_mmr = [], [], []
    
    if milvus_connected:
        docs_flat = test_retriever("Flat", milvus_flat, query)
        docs_hnsw = test_retriever("HNSW", milvus_hnsw, query)
        print("hnsw Top Docs:", docs_hnsw)
        print("hnsw context",docs_hnsw[1])
        docs_mmr = test_retriever("MMR", milvus_ivf, query)
        print("MMR Top Docs:", docs_mmr)
        print("mmr ivf context",docs_mmr[1])
        print("Skipping retriever tests as Milvus connection failed.")
    
    prompt = PromptTemplate.from_template("""
You are an AI assistant. Use the following retrieved context to answer the question.

Context:
{context}

Question:
{question}
""")
    
    llm = ChatOpenAI(model="gpt-4o", temperature=0.2, api_key=openai_api_key)
    output = "Could not generate answer due to an error or Milvus connection failure."

    if milvus_ivf is not None and milvus_connected:
        try:
            retriever = milvus_ivf.as_retriever(search_type="mmr", search_kwargs={"k": 5, "lambda_mult": 0.5})
            chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type_kwargs={"prompt": prompt})
            output = chain.run(query)
            print("Answer:\n", output)
        except Exception as e:
            print(f"Error running RetrievalQA chain with MMR: {e}")
            output = f"Could not generate answer due to an error: {e}"
    else:
        print("Milvus IVF collection was not initialized or Milvus connection failed.")
        output = "Could not create RetrievalQA chain because the Milvus IVF collection was not initialized or Milvus connection failed."

    # Export to DOCX if successful
    if not output.startswith("Could not"):
        try:
            doc = Document()
            doc.add_heading("LLM Answer", level=1)
            doc.add_paragraph(output)
            doc.save("LLM_Answer_Output.docx")
            print("Output saved to LLM_Answer_Output.docx")
        except Exception as e:
            print(f"Error saving output to DOCX: {e}")
    else:
        print("DOCX file not saved due to an error in generating the answer.")

    return {
        "flat_docs": docs_flat,
        "hnsw_docs": docs_hnsw,
        "mmr_docs": docs_mmr,
        "answer": output
    }


In [98]:
query = "Expain in details the techniques for fine tuning models?"
results = run_prompt_tuning_pipeline(
    query=query,
    milvus_flat=milvus_flat,
    milvus_hnsw=milvus_hnsw,
    milvus_ivf=milvus_ivf,
    milvus_connected=milvus_connected,
    openai_api_key=OPENAI_KEY
)


Skipping Flat retriever test as the collection was not created.
HNSW Retriever Time: 0.9869s
HNSW Accuracy: 0.00%
hnsw Top Docs: [Document(metadata={'pk': 458312173011974295}, page_content='Techniques for fine-tuning models\nIn this section, we’ll discuss two fine-tuning methods: the traditional full fine-tuning approach and\nadvanced techniques such as PEFT, which integrates optimizations to attain comparable results to full\nfine-tuning but with higher efficiency and reduced memory and computational expenses.\nFull fine-tuning\nFull fine-tuning refers to the approach where all parameters/weights of a pretrained model are adjusted\nusing a task-specific dataset. It’s a straightforward method and is generally effective, but it might\nrequire a considerable amount of data to avoid overfitting and compute, especially for large models.\nThe challenges with generic full fine-tuning methods include updating all the model parameters of\nthe LLMs for every downstream task. Here are some more 

In [93]:
import pandas as pd

def write_results_to_csv_df(results, query,filename="results_output.csv"):
    # Normalize list values for clean storage
    formatted_results = {}

    for key, value in results.items():
        if isinstance(value, list):
            # Convert list of items to a single string (join by separator)
            formatted_results[key] = ["\n---\n".join(str(v) for v in value)]
        else:
            formatted_results[key] = [str(value)]  # Wrap in list to make it a row

    # Create a DataFrame with one row and keys as columns
    df = pd.DataFrame.from_dict(formatted_results)
    df["query"]=query
    # Save to CSV
    df.to_csv(filename, index=False, encoding="utf-8")
    print(f"Results saved to {filename}")


In [99]:
write_results_to_csv_df(results,query, filename="results_output_finttech.csv")

Results saved to results_output_finttech.csv


In [100]:
results = run_prompt_tuning_pipeline(
    query="Explain process of prompt tuning in detail with 500 bulletpoints?",
    milvus_flat=milvus_flat,
    milvus_hnsw=milvus_hnsw,
    milvus_ivf=milvus_ivf,
    milvus_connected=milvus_connected,
    openai_api_key=OPENAI_KEY
)


Skipping Flat retriever test as the collection was not created.
HNSW Retriever Time: 0.6683s
HNSW Accuracy: 0.00%
hnsw Top Docs: [Document(metadata={'pk': 458312173011974298}, page_content='specific approach, leading to “efficient multitask serving.”\nPrompt design (prompt engineering):\n• The focus is on designing a very specific input or prompt to guide the pre-trained model to\nproduce the desired output.\n• Like prompt tuning, the main model remains “frozen”.\n• This method is about exploiting the vast knowledge and capabilities of the pre-trained model\nby just crafting the right input. As mentioned earlier, we will cover prompt engineering in\ndetail in Chapter 5.\nIn prompt tuning and prompt design, original model weights remain frozen, whereas in model tuning\nmodel parameters are updated:\n62 Fine-Tuning – Building Domain-Specific LLM Applications\nFigure 3.10 – Model tuning, prompt tuning, and prompt design\nThe following figure demonstrates model tuning (full fine-tuning) on

In [101]:
query="Explain process of prompt tuning in detail with 500 bulletpoints?"

In [103]:
write_results_to_csv_df(results, query,filename="results_output_prompt_tuning_process2.csv")

Results saved to results_output_prompt_tuning_process2.csv


In [104]:
results = run_prompt_tuning_pipeline(
    query="Give types of prompt tuning?",
    milvus_flat=milvus_flat,
    milvus_hnsw=milvus_hnsw,
    milvus_ivf=milvus_ivf,
    milvus_connected=milvus_connected,
    openai_api_key=OPENAI_KEY
)


Skipping Flat retriever test as the collection was not created.
HNSW Retriever Time: 1.1928s
HNSW Accuracy: 0.00%
hnsw Top Docs: [Document(metadata={'pk': 458312173011974297}, page_content='location in the embedding vector space. Hence, they are also referred to as hard prompts. On the\nother hand, soft prompts are not confined to fixed, discrete words in natural language and can assume\nany value in the multi-dimensional embedding vector space. In the following figure, words such as\n“jump,” “fox,” and others are hard prompts, whereas the unlabeled black-colored token is a soft prompt.\nPrompt tuning process\nIn prompt tuning, soft prompts, also known as virtual tokens, are concatenated with the prompts;\nit’s left to a supervised training process to determine the optimal values. As shown in the following\nfigure, these trainable soft tokens are prepended to an embedding vector representation – in this case,\n“The student learns science:”\nFigure 3.8 – Soft prompt concatenation\nThe f

In [105]:
query="Give types of prompt tuning?"

In [106]:
write_results_to_csv_df(results, query,filename="results_output_types_prompt_tuning2.csv")

Results saved to results_output_types_prompt_tuning2.csv
