In [1]:
import os
import dotenv
from pathlib import Path

from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.document_loaders.text import TextLoader
from langchain_community.document_loaders import (
    WebBaseLoader, 
    PyPDFLoader, 
    Docx2txtLoader,
)
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

dotenv.load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
doc_paths = [
    "docs/AboutClass2Class.pdf",
    "docs/BuyerPersonasC2Corg.pdf",
    "docs/LearningJourney.pdf",
    "docs/mini-Brand-Guidelines.pdf",
    "docs/SEOKeywords.pdf",
]

docs = [] 
for doc_file in doc_paths:
    file_path = Path(doc_file)

    try:
        if doc_file.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        elif doc_file.endswith(".docx"):
            loader = Docx2txtLoader(file_path)
        elif doc_file.endswith(".txt") or doc_file.name.endswith(".md"):
            loader = TextLoader(file_path)
        else:
            print(f"Document type {doc_file.type} not supported.")
            continue

        docs.extend(loader.load())

    except Exception as e:
        print(f"Error loading document {doc_file.name}: {e}")
    
    finally:
        os.remove(file_path)


# Load URLs

""" url = "https://docs.streamlit.io/develop/quick-reference/release-notes"
try:
    loader = WebBaseLoader(url)
    docs.extend(loader.load()) """

""" except Exception as e:
    print(f"Error loading document from {url}: {e}") """

' except Exception as e:\n    print(f"Error loading document from {url}: {e}") '

In [3]:
docs

[Document(metadata={'source': 'docs/AboutClass2Class.pdf', 'page': 0}, page_content='Here is some information about Class2ClassClass2Class.org is an educational platform designed to foster international collaboration,cultural understanding, global citizenship education, and sustainable development amongstudents. It connects classrooms globally, enabling students and teachers to co-create projectsand engage in intercultural learning experiences.The platform offers a 4-level learning journey:● The first level centers on learning about other countries and cultures, encouragingclassrooms to meet and understand diverse perspectives.● The second level is focused on the United Nations\' Sustainable Development Goals(SDGs), prompting classes to carry out activities around these goals and share theirlearning outcomes.● The third level involves co-creating solutions to identified problems, based on the designthinking model, fostering problem-solving skills.● The fourth and final level teaches st

In [4]:
# Split docs

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=5000,
    chunk_overlap=1000,
)

document_chunks = text_splitter.split_documents(docs)

In [5]:
# Tokenize and load the documents to the vector store

vector_db = Chroma.from_documents(
    documents=document_chunks,
    embedding=OpenAIEmbeddings(),
)

In [6]:
def _get_context_retriever_chain(vector_db, llm):
    retriever = vector_db.as_retriever()
    prompt = ChatPromptTemplate.from_messages([
        MessagesPlaceholder(variable_name="messages"),
        ("user", "{input}"),
        ("user", "Given the above conversation, generate a search query to look up in order to get inforamtion relevant to the conversation, focusing on the most recent messages."),
    ])
    retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

    return retriever_chain

In [7]:
def get_conversational_rag_chain(llm):
    retriever_chain = _get_context_retriever_chain(vector_db, llm)

    prompt = ChatPromptTemplate.from_messages([
        ("system",
        """
        You are an expert SEO-optimized article outliner for Class2Class. You will receive a topic  and some information for a blog article, which you must create an outline for, fitting for Class2Class' blog section on the website. The outline structure must always include: Topic/article title, description, aim of the article, main points of the content, CTA, and a list of the used SEO keywords, which you must always access through the attached "SEOKeywords" file, which you have access to in your knowledge, and this should be the only source for used SEO words, which should also be in bold. Always write your outlines considering a SEO optimized format, which is described in the rules section - also available in your knowledge. 

__RULES for SEO optimized structure__
MUST ALWAYS FOLLOW AND CONSIDER THESE INSTRUCTIONS FOR THE OUTLINE:

Must directly or indirectly mention Class2Class
Must access and use the knowledge file "SEOkeywords" and mention at least 10 primary keywords, 5 secondary keywords and 3 long tail keywords in the article (marked bold in outline)
Must sure the Focus Keywords are in the SEO Title.
Must sure The Focus Keywords are in the SEO Meta Description.
Make Sure The Focus Keywords appears in the first 10% of the content.
Main content must be between 500-700 words
Must include focus Keyword in the subheading(s).
Must suggest 3 different titles.
Titles must be short. 
Must use a positive or a negative sentiment word in the Title.
Must use a Power Keyword in the Title.
Used SEO words must be written in a list
You must mimic Class2Class' writing style, tone, voice and help them write SEO optimized articles in their communication style which is all accessible in your knowledge. The outline must also be adhering to their brand guidelines. 
Your outlines focus on creating authentic, user-specific content for Class2Class website blogs and articles.

Based on the documents you have access to, create an outline for a blog post about online education platforms.
        \n
        {context}"""),
        MessagesPlaceholder(variable_name="messages"),
        ("user", "{input}"),
    ])
    stuff_documents_chain = create_stuff_documents_chain(llm, prompt)

    return create_retrieval_chain(retriever_chain, stuff_documents_chain)

In [8]:
llm_stream_openai = ChatOpenAI(
    model="gpt-4o",  # Here you could use "o1-preview" or "o1-mini" if you already have access to them
    temperature=0.3,
    streaming=True,
)

llm_stream_anthropic = ChatAnthropic(
    model="claude-3-5-sonnet-20240620",
    temperature=0.3,
    streaming=True,
)

llm_stream = llm_stream_anthropic  # Select between OpenAI and Anthropic models for the response

messages = [
    {"role": "user", "content": "i want to talk about AI in education"},
]
messages = [HumanMessage(content=m["content"]) if m["role"] == "user" else AIMessage(content=m["content"]) for m in messages]

conversation_rag_chain = get_conversational_rag_chain(llm_stream)
response_message = "*(RAG Response)*\n"
for chunk in conversation_rag_chain.pick("answer").stream({"messages": messages[:-1], "input": messages[-1].content}):
    response_message += chunk
    print(chunk, end="", flush=True)

messages.append({"role": "assistant", "content": response_message})

Certainly! I'd be happy to create an outline for a blog post about AI in education for Class2Class. Here's an SEO-optimized outline based on the guidelines and information provided:

Topic/Article Title: 
3 Suggested Titles:
1. "Revolutionizing Learning: How AI is Transforming **Education** in 2023"
2. "The Future of **Teaching**: Embracing AI in the **Classroom**"
3. "Empowering **Educators**: Harnessing the Potential of AI in **Schools**"

Description:
This article explores the impact of **artificial intelligence** on **education**, discussing its benefits, challenges, and potential future applications in **teaching** and **learning**. We'll examine how AI is reshaping the **educational landscape** and how **educators** can leverage this technology to enhance the **learning experience** for their **students**.

Aim of the Article:
To inform **educators** and **school administrators** about the current and future applications of AI in **education**, highlighting its potential to impro