<a href="https://colab.research.google.com/github/RexPersicus/ChatGPT_Prompt_Eng_01/blob/main/Claude_RAG_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
 #Install required packages
!pip install langchain langchain-community chromadb pypdf python-dotenv openai tiktoken

Collecting tiktoken
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.8.0


Collecting langchain-community
  Downloading langchain_community-0.3.8-py3-none-any.whl.metadata (2.9 kB)
Collecting SQLAlchemy<2.0.36,>=1.4 (from langchain-community)
  Downloading SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.8 (from langchain-community)
  Downloading langchain-0.3.9-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.21 (from langchain-community)
  Downloading langchain_core-0.3.21-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.6.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from datac

In [15]:
import os
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
import openai

In [16]:
# Load OpenAI API key
load_dotenv()

# Get API key from environment variables
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY not found in environment variables. Please check your .env file.")

# Set OpenAI API key for use with langchain
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [17]:
def load_documents(folder_path='uploads'):
    """Load documents from a folder containing PDF and text files."""
    documents = []

    if not os.path.exists(folder_path):
        raise ValueError(f"Folder '{folder_path}' not found!")

    files = os.listdir(folder_path)
    if not files:
        raise ValueError(f"No files found in '{folder_path}' directory!")

    print(f"Found {len(files)} files in uploads folder:")
    for file in files:
        file_path = os.path.join(folder_path, file)
        print(f"Processing: {file}")
        if file.lower().endswith('.pdf'):
            loader = PyPDFLoader(file_path)
            documents.extend(loader.load())
        elif file.lower().endswith('.txt'):
            loader = TextLoader(file_path)
            documents.extend(loader.load())
        else:
            print(f"Skipping {file} - not a PDF or TXT file")

    if not documents:
        raise ValueError("No valid documents (PDF/TXT) found in the uploads folder!")

    return documents

In [18]:
def create_vector_store(documents):
    """Create a vector store from the documents."""
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100
    )
    texts = text_splitter.split_documents(documents)

    print(f"Created {len(texts)} text chunks for processing")

    # Create embeddings and vector store
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(texts, embeddings)

    return vectorstore

In [19]:
def generate_email(vectorstore, topic, recipient_type):
    """Generate an email using RAG."""
    # Create QA chain
    llm = ChatOpenAI(temperature=0.7)
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore.as_retriever(),
        return_source_documents=True
    )

    # Create prompt
    prompt = f"""
    Using the context provided, generate a compelling marketing email for {recipient_type}.
    The email should:
    1. Focus on {topic}
    2. Be professional and persuasive
    3. Include specific details from the provided documents
    4. Have a clear call to action
    5. Be formatted properly with a subject line, greeting, body, and signature
    """

    # Generate response
    response = qa_chain({"query": prompt})
    return response['result']


In [20]:
# Main execution
def main():
    print("Welcome to the Email Generator!")
    print("Please ensure your files are uploaded to the 'uploads' folder.")

    try:
        # Load documents and create vector store
        print("\nLoading documents from uploads folder...")
        documents = load_documents()  # No need to specify path - defaults to 'uploads'
        vectorstore = create_vector_store(documents)

        while True:
            # Get user input
            print("\nWhat would you like to focus on in the email?")
            topic = input("Enter topic (or 'quit' to exit): ")

            if topic.lower() == 'quit':
                break

            recipient_type = input("Who is the target recipient? ")

            # Generate email
            print("\nGenerating email...")
            email = generate_email(vectorstore, topic, recipient_type)

            print("\nGenerated Email:")
            print("-" * 50)
            print(email)
            print("-" * 50)

            continue_gen = input("\nWould you like to generate another email? (yes/no): ")
            if continue_gen.lower() != 'yes':
                break

    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()

Welcome to the Email Generator!
Please ensure your files are uploaded to the 'uploads' folder.

Loading documents from uploads folder...
Found 2 files in uploads folder:
Processing: bell_canada.txt
Processing: royal_persicus.pdf
Created 4 text chunks for processing

What would you like to focus on in the email?
Enter topic (or 'quit' to exit): Please write a compelling email to market the services of the Royal Persicus company to Bell Canada.
Who is the target recipient? The executive management of Bell

Generating email...


  llm = ChatOpenAI(temperature=0.7)
  response = qa_chain({"query": prompt})



Generated Email:
--------------------------------------------------
Subject: Elevate Your Operational Efficiency with Royal Persicus

Dear Executive Management Team at Bell Canada,

I hope this email finds you well. In the fast-paced world of telecommunications, maintaining a competitive edge and driving operational efficiency are critical to sustained growth. That's why I am reaching out to introduce you to Royal Persicus, a leading consulting firm specializing in enterprise-level transformations.

Royal Persicus offers tailored SAFe transformation coaching, empowering organizations to adopt agile methodologies at scale for enhanced alignment, collaboration, and delivery. With certified SAFe coaches, Royal Persicus ensures a seamless transition to agile workflows, a vital step in navigating the ever-evolving tech industry landscape.

Furthermore, Royal Persicus provides comprehensive cloud adoption consulting services. By partnering with enterprises like Bell Canada, Royal Persicus d