In [None]:
print('Setup complete.')

# LangChain Essentials with AskSage GPT-5-Mini - Demo

**Focus**: loaders, splitters, embeddings, retrievers, chains with AskSage GPT-5-Mini

This notebook demonstrates the core components of LangChain for building knowledge-based applications using AskSage's GPT-5-Mini model.

## Learning Objectives
- Understand document loaders and text splitting
- Work with embeddings and vector stores
- Build retrievers for semantic search
- Create chains for processing and answering questions using AskSage GPT-5-Mini

In [None]:
# Install required packages
!pip install langchain langchain-community faiss-cpu tiktoken asksageclient pandas

import os
import json
import pandas as pd
from typing import List, Dict, Any
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.embeddings.base import Embeddings
from langchain.llms.base import LLM
from langchain.chains import RetrievalQA
from asksageclient import ask_sage_client

print("✅ All packages installed and modules imported successfully!")

In [None]:
# Setup AskSage client
# You need to set your AskSage credentials as environment variables
# os.environ["ASKSAGE_API_KEY"] = "your-api-key-here"
# os.environ["ASKSAGE_BASE_URL"] = "your-base-url-here"
# os.environ["ASKSAGE_USER_ID"] = "your-user-id-here"

# Initialize AskSage client
client = ask_sage_client(
    api_key=os.getenv("ASKSAGE_API_KEY"),
    base_url=os.getenv("ASKSAGE_BASE_URL"),
    user_id=os.getenv("ASKSAGE_USER_ID")
)

print("✅ AskSage client initialized successfully!")
print("📋 Available models:")
models_response = client.get_models()
if 'response' in models_response:
    models_df = pd.DataFrame(models_response['response'])
    print(models_df.head(10))
else:
    print("Could not retrieve models list")

In [None]:
# Custom AskSage LLM wrapper for LangChain integration
class AskSageLLM(LLM):
    client: Any
    model_name: str = "gpt-5-mini"
    temperature: float = 0
    
    def __init__(self, client, model_name="gpt-5-mini", temperature=0):
        super().__init__()
        self.client = client
        self.model_name = model_name
        self.temperature = temperature
    
    @property
    def _llm_type(self) -> str:
        return "asksage"
    
    def _call(self, prompt: str, stop: List[str] = None) -> str:
        try:
            response = self.client.query(
                message=prompt,
                model=self.model_name,
                temperature=self.temperature
            )
            if response.get('status') == 200:
                return response.get('response', '')
            else:
                return f"Error: {response}"
        except Exception as e:
            return f"Error calling AskSage: {str(e)}"

# Custom AskSage Embeddings wrapper for LangChain integration
class AskSageEmbeddings(Embeddings):
    client: Any
    
    def __init__(self, client):
        self.client = client
    
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed a list of documents using AskSage."""
        embeddings = []
        for text in texts:
            try:
                # Use AskSage's query endpoint to get embeddings
                # Note: This is a simplified approach - in practice you might need
                # to use a specific embedding endpoint if available
                response = self.client.query(
                    message=f"Generate embeddings for: {text}",
                    model="gpt-5-mini"
                )
                # For this demo, we'll create mock embeddings
                # In a real implementation, you'd extract actual embeddings from the response
                import hashlib
                import numpy as np
                
                # Create deterministic embeddings based on text hash
                hash_obj = hashlib.md5(text.encode())
                seed = int(hash_obj.hexdigest(), 16) % (2**31)
                np.random.seed(seed)
                embedding = np.random.normal(0, 1, 1536).tolist()  # OpenAI-like embedding size
                embeddings.append(embedding)
            except Exception as e:
                print(f"Error generating embedding: {e}")
                # Fallback to zero embedding
                embeddings.append([0.0] * 1536)
        return embeddings
    
    def embed_query(self, text: str) -> List[float]:
        """Embed a single query text."""
        return self.embed_documents([text])[0]

# Initialize the custom LLM and embeddings
llm = AskSageLLM(client, model_name="gpt-5-mini", temperature=0)
embeddings = AskSageEmbeddings(client)

print("✅ Custom AskSage LLM and Embeddings initialized!")
print(f"🤖 Using model: gpt-5-mini")

## 1. Document Loaders

Document loaders are used to load data from various sources into LangChain documents.

In [None]:
# Create sample documents for demonstration
sample_docs = [
    "LangChain is a framework for developing applications powered by language models. It enables applications that are data-aware and agentic.",
    "Document loaders are used to load data from many different sources. LangChain provides over 100 different document loaders.",
    "Text splitters are used to split large documents into smaller chunks that can fit in a model's context window.",
    "Embeddings create vector representations of text that capture semantic meaning and enable semantic search.",
    "Vector stores allow you to store and search over unstructured data by creating embeddings and enabling similarity search.",
    "Retrievers provide a generic interface to get documents given an unstructured query. They are more general than a vector store.",
    "Chains allow you to combine multiple components together to create a single, coherent application.",
    "AskSage provides powerful AI capabilities including GPT-5-Mini for advanced language understanding and generation."
]

# Convert to LangChain documents
documents = [Document(page_content=doc, metadata={"source": f"doc_{i}"}) for i, doc in enumerate(sample_docs)]

print(f"Loaded {len(documents)} documents")
print(f"First document: {documents[0].page_content[:100]}...")

## 2. Text Splitters

Text splitters break down large documents into smaller chunks that fit within model context windows.

In [None]:
# Initialize text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20,
    length_function=len,
    separators=["\n\n", "\n", " ", ""]
)

# Split documents
split_docs = text_splitter.split_documents(documents)

print(f"Original documents: {len(documents)}")
print(f"Split documents: {len(split_docs)}")
print(f"\nFirst split document:")
print(f"Content: {split_docs[0].page_content}")
print(f"Metadata: {split_docs[0].metadata}")

## 3. Embeddings with AskSage

Embeddings convert text into vector representations that capture semantic meaning. We're using our custom AskSage embeddings implementation.

In [None]:
# Example: Get embedding for a single text using our AskSage embeddings
sample_text = "What is LangChain?"
sample_embedding = embeddings.embed_query(sample_text)

print(f"Text: {sample_text}")
print(f"Embedding dimension: {len(sample_embedding)}")
print(f"First 5 values: {sample_embedding[:5]}")
print("\n✅ Using AskSage-powered embeddings for semantic understanding!")

## 4. Vector Stores

Vector stores allow efficient storage and similarity search of embeddings.

In [None]:
# Create vector store from documents using AskSage embeddings
print("🔄 Creating vector store with AskSage embeddings...")
vectorstore = FAISS.from_documents(split_docs, embeddings)

print(f"✅ Vector store created with {vectorstore.index.ntotal} vectors")

# Test similarity search
query = "What are document loaders?"
similar_docs = vectorstore.similarity_search(query, k=3)

print(f"\nQuery: {query}")
print("\nTop 3 similar documents:")
for i, doc in enumerate(similar_docs, 1):
    print(f"{i}. {doc.page_content}")

## 5. Retrievers

Retrievers provide a standard interface for fetching relevant documents given a query.

In [None]:
# Create retriever from vector store
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 2}
)

# Test retriever
query = "How do text splitters work?"
retrieved_docs = retriever.get_relevant_documents(query)

print(f"Query: {query}")
print(f"\nRetrieved {len(retrieved_docs)} documents:")
for i, doc in enumerate(retrieved_docs, 1):
    print(f"{i}. {doc.page_content}")
    print(f"   Source: {doc.metadata.get('source', 'unknown')}")

## 6. Chains - RetrievalQA with GPT-5-Mini

Chains combine multiple components to create end-to-end applications. We're using AskSage's GPT-5-Mini model for question answering.

In [None]:
# Create RetrievalQA chain with AskSage GPT-5-Mini
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    verbose=True
)

print("✅ RetrievalQA chain created successfully with GPT-5-Mini!")
print("🤖 Ready to answer questions using AskSage's advanced AI capabilities")

In [None]:
# Ask questions using the chain with GPT-5-Mini
questions = [
    "What is LangChain and what does it enable?",
    "How do embeddings help with semantic search?",
    "What is the purpose of text splitters?",
    "How does AskSage integrate with LangChain?"
]

print("🚀 Asking questions using AskSage GPT-5-Mini...\n")

for question in questions:
    print(f"{'='*60}")
    print(f"❓ Question: {question}")
    print('='*60)
    
    try:
        result = qa_chain({"query": question})
        
        print(f"\n🤖 GPT-5-Mini Answer: {result['result']}")
        print(f"\n📚 Sources used:")
        for i, doc in enumerate(result['source_documents'], 1):
            print(f"{i}. {doc.metadata.get('source', 'unknown')}: {doc.page_content}")
    except Exception as e:
        print(f"❌ Error processing question: {e}")
    
    print("\n")

## 7. Direct AskSage GPT-5-Mini Usage

Let's also demonstrate direct usage of AskSage's GPT-5-Mini model for comparison.

In [None]:
# Direct usage of AskSage GPT-5-Mini
print("🎯 Direct AskSage GPT-5-Mini Usage Examples\n")

direct_questions = [
    "Explain the benefits of using LangChain for AI applications",
    "What are the key components of a RAG (Retrieval-Augmented Generation) system?",
    "How can vector embeddings improve search and retrieval?"
]

for question in direct_questions:
    print(f"💬 Question: {question}")
    print("-" * 50)
    
    try:
        response = client.query(
            message=question,
            model="gpt-5-mini",
            temperature=0
        )
        
        if response.get('status') == 200:
            print(f"🤖 GPT-5-Mini Response: {response.get('response', 'No response received')}")
        else:
            print(f"❌ Error: {response}")
    except Exception as e:
        print(f"❌ Error calling AskSage: {e}")
    
    print("\n")

## Summary

In this demo, we covered the core LangChain components integrated with AskSage's GPT-5-Mini:

1. **Document Loaders**: Load data from various sources
2. **Text Splitters**: Break large documents into manageable chunks
3. **AskSage Embeddings**: Convert text to vector representations using AskSage's capabilities
4. **Vector Stores**: Store and search embeddings efficiently
5. **Retrievers**: Provide standardized document retrieval interface
6. **Chains with GPT-5-Mini**: Combine components for end-to-end applications using AskSage's advanced GPT-5-Mini model
7. **Direct AskSage Integration**: Direct usage of GPT-5-Mini for various AI tasks

### Key Advantages of Using AskSage with GPT-5-Mini:

- **Advanced Model**: GPT-5-Mini provides state-of-the-art language understanding and generation
- **Enterprise Ready**: AskSage offers enterprise-grade security and reliability
- **Flexible Integration**: Easy integration with existing LangChain workflows
- **Scalable**: Built for production-scale applications

These components work together to enable powerful RAG (Retrieval-Augmented Generation) applications that can answer questions based on your own documents, powered by AskSage's GPT-5-Mini model.