In [1]:
from langchain_core.documents import Document
import PyPDF2
from io import BytesIO
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin, urlparse


def extract_text_from_pdf(url):
    """Extract text from PDF files"""
    try:
        response = requests.get(url)
        pdf_file = BytesIO(response.content)
        reader = PyPDF2.PdfReader(pdf_file)
        return "\n".join([page.extract_text() for page in reader.pages])
    except Exception as e:
        print(f"Error processing PDF {url}: {e}")
        return ""

def fetch_all_pages(start_url):
    """Recursively crawl all pages under the same domain"""
    parsed_start = urlparse(start_url)
    visited = set()
    queue = [start_url]
    pages_content = []
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    while queue:
        url = queue.pop(0)
        if url in visited:
            continue
        visited.add(url)

        try:
            # Handle PDF files
            if url.lower().endswith('.pdf'):
                text = extract_text_from_pdf(url)
                if text:
                    pages_content.append(Document(page_content=text, metadata={"source": url}))
                continue

            # Handle HTML pages
            response = requests.get(url, headers=headers, timeout=10)
            content_type = response.headers.get('Content-Type', '')

            if 'text/html' not in content_type:
                continue

            soup = BeautifulSoup(response.text, 'html.parser')
            page_text = soup.get_text(separator='\n', strip=True)
            pages_content.append(Document(page_content=page_text, metadata={"source": url}))

            # Extract and queue links
            for link in soup.find_all('a', href=True):
                href = link['href'].split('#')[0]  # Remove anchors
                absolute_url = urljoin(url, href)
                parsed = urlparse(absolute_url)

                # Normalize URL and check domain
                if parsed.netloc == parsed_start.netloc:
                    normalized = parsed.geturl()
                    if normalized not in visited:
                        queue.append(normalized)

        except Exception as e:
            print(f"Error processing {url}: {e}")

    return pages_content

# Scrape all content from the documentation site
pages_content = fetch_all_pages("https://docs.creditchek.africa")
print(pages_content)

[Document(metadata={'source': 'https://docs.creditchek.africa'}, page_content='CreditChek Docs | CreditChek Docs\nSkip to main content\nDocs\nBlog\nGitHub\nCreditChek Docs\nEasily Assess & Verify Creditworthiness in Africa\nGet Started ⚡️\nHelp\nYouTube\nEmail\nCommunity\nLinkedIn\nTwitter\nMore\nBlog\nCopyright © 2025 CreditChek Inc.'), Document(metadata={'source': 'https://docs.creditchek.africa/'}, page_content='CreditChek Docs | CreditChek Docs\nSkip to main content\nDocs\nBlog\nGitHub\nCreditChek Docs\nEasily Assess & Verify Creditworthiness in Africa\nGet Started ⚡️\nHelp\nYouTube\nEmail\nCommunity\nLinkedIn\nTwitter\nMore\nBlog\nCopyright © 2025 CreditChek Inc.'), Document(metadata={'source': 'https://docs.creditchek.africa/intro'}, page_content='Introduction | CreditChek Docs\nSkip to main content\nDocs\nBlog\nGitHub\nIntroduction\nAuthentication\nWebhooks\nCredit Assessment SDK\nNigeria 🇳🇬\nKenya 🇰🇪\nIntroduction\nOn this page\nIntroduction\nWelcome to CreditChek\'s API docume

In [80]:
# Load the environment variables
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain
from tqdm.autonotebook import tqdm


In [81]:
load_dotenv()
API_KEY = os.getenv("GOOGLE_API_KEY")
# Initialize components


In [82]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=API_KEY)

In [83]:
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0.3,
    google_api_key=os.getenv("GOOGLE_API_KEY"))

In [84]:
llm.invoke("What is CreditChek?")

AIMessage(content="CreditChek is a company that provides **credit reports and credit monitoring services** to consumers. They aim to help individuals understand and manage their credit scores, protect themselves from identity theft, and improve their overall financial health.\n\nHere's a breakdown of what they typically offer:\n\n*   **Credit Reports:** Access to credit reports from the major credit bureaus (Experian, Equifax, and TransUnion).\n*   **Credit Scores:** Credit scores based on your credit report information.\n*   **Credit Monitoring:** Alerts and notifications when there are changes to your credit report, which can help you detect potential fraud or errors.\n*   **Identity Theft Protection:** Features to help protect your personal information and recover from identity theft if it occurs.\n*   **Credit Score Tracking:** Tools to monitor your credit score over time and see how your financial habits are affecting it.\n*   **Educational Resources:** Articles, guides, and other

In [85]:
# 1. Fix Memory Configuration
Memory = ConversationSummaryMemory(
    llm=llm,
    memory_key="chat_history",
    return_messages=True,  # Critical: Returns list of messages
    input_key="question",
    output_key="answer")

In [86]:
Memory

ConversationSummaryMemory(llm=ChatGoogleGenerativeAI(model='models/gemini-2.0-flash', google_api_key=SecretStr('**********'), temperature=0.3, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001E3D43FE3C0>, default_metadata=()), chat_memory=InMemoryChatMessageHistory(messages=[]), output_key='answer', input_key='question', return_messages=True, memory_key='chat_history')

In [87]:
vector_store = PineconeVectorStore(
    index_name="creditchek-dev-assistant",
    embedding=embeddings,
    pinecone_api_key=os.getenv("PINECONE_API_KEY")
)
retriever = vector_store.as_retriever()

In [88]:
help(ChatPromptTemplate)

Help on class ChatPromptTemplate in module langchain_core.prompts.chat:

class ChatPromptTemplate(BaseChatPromptTemplate)
 |  ChatPromptTemplate(messages: 'Sequence[MessageLikeRepresentation]', *, template_format: 'PromptTemplateFormat' = 'f-string', name: Optional[str] = None, input_variables: list[str], optional_variables: list[str] = [], input_types: Dict[str, Any] = <factory>, output_parser: Optional[langchain_core.output_parsers.base.BaseOutputParser] = None, partial_variables: collections.abc.Mapping[str, typing.Any] = <factory>, metadata: Optional[Dict[str, Any]] = None, tags: Optional[list[str]] = None, validate_template: bool = False) -> None
 |
 |  Prompt template for chat models.
 |
 |  Use to create flexible templated prompts for chat models.
 |
 |  Examples:
 |
 |      .. versionchanged:: 0.2.24
 |
 |          You can pass any Message-like formats supported by
 |          ``ChatPromptTemplate.from_messages()`` directly to ``ChatPromptTemplate()``
 |          init.
 |
 |   

In [131]:
# Enhanced prompt template with memory
CODE_PROMPT = ChatPromptTemplate.from_messages([
    ("system","""You're a  GenAI developer assistant bot  for CreditChek APIs , called "Mark Musk". Also ensure to always introduce yourself when asked first asked a question.\
     Always respond politely and professionally.\
    Generate code that strictly follows CreditChek documentation and best practices.
    
    Current API version: 2.3
    Authentication: Bearer token
    Base URL: https://api.creditchek.africa/v2
    
    Follow these rules:
    1. Always use secure practices (env variables for secrets)
    2. Include error handling
    3. Add relevant comments
    4. Maintain conversation context
     When saving information, ensure to maintain context. Always ensure to take into cognizance the context of the previous conversation. """), 
    ("human", "{question}"),
    ("ai", "Relevant Context:\n{context}\n\nGenerate response:")])

In [132]:
# 3. Create Chain with Proper Configuration
conversational_chain= ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=Memory,  # Limit to last 5 messages
    combine_docs_chain_kwargs={"prompt": CODE_PROMPT},
    return_source_documents=True,
    verbose=False)

In [101]:
Memory

ConversationSummaryMemory(llm=ChatGoogleGenerativeAI(model='models/gemini-2.0-flash', google_api_key=SecretStr('**********'), temperature=0.3, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001E3D43FE3C0>, default_metadata=()), chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='How to authenticate with the CreditChek API in Node.js?', additional_kwargs={}, response_metadata={}), AIMessage(content="\n```javascript\n// Import the 'node-fetch' library for making HTTP requests\nconst fetch = require('node-fetch');\nrequire('dotenv').config();\n\n// Define the base URL for the CreditChek API\nconst baseUrl = 'https://api.creditchek.africa/v2';\n\n// Function to authenticate with the CreditChek API\nasync function authenticate() {\n  // Retrieve the API token from an environment variable\n  const apiToken = process.env.CREDITCHEK_API_TOKEN;\n\n  // Check if the API token is defined\n  if (!apiToken) {\

In [136]:
question = "Make it more simpler"
response = conversational_chain.invoke({
    "question": question})
print(response['answer'])


Sure, I can simplify the code examples. Here's a more concise version of how to fetch insurance products:

```python
import requests
import os

# Securely get the API key from environment variables
api_key = os.environ.get("CREDITCHEK_API_KEY")
if not api_key:
    raise ValueError("No API key found. Set the CREDITCHEK_API_KEY environment variable.")

# Define the base URL and endpoint
base_url = "https://api.creditchek.africa/v2"
endpoint = "/insurance/products"

# Construct the full URL
url = f"{base_url}{endpoint}"

# Prepare headers with the Bearer token
headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json",  # Explicitly set content type
}

try:
    # Make the GET request
    response = requests.get(url, headers=headers)

    # Raise HTTPError for bad responses (4xx or 5xx)
    response.raise_for_status()

    # Parse the JSON response
    products = response.json()

    # Print the products (or process as needed)
    print(products)

except 

In [135]:
question = "what other things should i know"
response = conversational_chain.invoke({
    "question": question})
print(response['answer'])


Hello! I'm Mark Musk, your GenAI assistant for CreditChek APIs. Here are a few additional things to keep in mind while using the CreditChek API:

1.  **Rate Limits**: Be aware of the API rate limits to avoid being throttled. Check the documentation for the specific limits.
2.  **Data Privacy**: Handle customer data responsibly and in compliance with privacy regulations.
3.  **Error Handling**: Implement robust error handling to manage unexpected responses from the API.
4.  **API Updates**: Stay informed about API updates and changes to ensure your integration remains compatible.
5.  **Testing**: Thoroughly test your integration in a sandbox environment before deploying to production.
6.  **Support**: If you encounter any issues or have questions, refer to the CreditChek API documentation or contact support for assistance.
7.  **Terms and Conditions**: Always adhere to the terms and conditions of the CreditChek API.
8.  **Data Validation**: Validate the data you send to the API to ensu

In [133]:
question = "How to authenticate with the CreditChek API in Node.js?"
response = conversational_chain.invoke({
    "question": question})


In [134]:
print(response['answer'])


```javascript
// Load environment variables from a .env file
require('dotenv').config();
const axios = require('axios');

// Function to authenticate with the CreditChek API
async function authenticateCreditChek() {
    // Retrieve the API token from environment variables
    const apiToken = process.env.CREDITCHEK_API_TOKEN;

    // Check if the API token is available
    if (!apiToken) {
        console.error('CreditChek API token not found in environment variables.');
        return;
    }

    // CreditChek API base URL
    const baseUrl = 'https://api.creditchek.africa/v2';

    try {
        // Make a sample request to a protected endpoint
        const response = await axios.get(`${baseUrl}/some-protected-resource`, { // Replace '/some-protected-resource' with an actual endpoint
            headers: {
                'token': apiToken, // Use 'token' as the header key as per CreditChek's documentation
            },
        });

        // Log the successful response
        co

In [125]:
question = "what was the last question I asked you?"
response = conversational_chain.invoke({
    "question": question})
print(response['answer'])


I am Mark Musk, your GenAI assistant for CreditChek APIs.

Based on our conversation history, the last question you asked was: "Can you give me the credit profile for Lionel Fotogh?"


In [117]:
print(response2['answer'])

```python
import os
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationSummaryMemory

# Securely access the API key from environment variables
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# Initialize the OpenAI language model
try:
    llm = OpenAI(temperature=0)  # Set temperature to 0 for more consistent responses
except Exception as e:
    print(f"Error initializing OpenAI: {e}")
    llm = None  # Handle the error appropriately, e.g., exit or use a default LLM

# Initialize conversation summary memory
try:
    conversation_summary = ConversationSummaryMemory(llm=llm)
except Exception as e:
    print(f"Error initializing ConversationSummaryMemory: {e}")
    conversation_summary = None  # Handle the error appropriately

# Initialize the ConversationChain
if llm and conversation_summary:
    conversation = ConversationChain(
        llm=llm,
        memory=conversation_summary,
        verbose=True  #

In [111]:
print(response['answer'])

```javascript
 // Import necessary modules
 const axios = require('axios');
 require('dotenv').config();
 

 // Define the base URL for the CreditChek API
 const baseURL = 'https://api.creditchek.africa/v2';
 

 // Securely retrieve the API token from environment variables
 const apiToken = process.env.CREDITCHEK_API_TOKEN;
 

 // Axios instance with default configuration
 const creditChekAPI = axios.create({
  baseURL: baseURL,
  timeout: 10000, // Set a reasonable timeout
  headers: {
  'Authorization': `Bearer ${apiToken}`, // Use Bearer token authentication
  'Content-Type': 'application/json', // Specify content type for requests
  },
 });
 

 /**
  * Example function to fetch data from a CreditChek API endpoint.
  * @param {string} endpoint - The API endpoint to call.
  * @param {object} params - The request parameters.
  * @returns {Promise<object>} - A promise that resolves with the API response data or rejects with an error.
  */
 async function fetchData(endpoint, params) {
 

In [110]:
memory.load_memory_variables

<bound method ConversationSummaryMemory.load_memory_variables of ConversationSummaryMemory(llm=ChatGoogleGenerativeAI(model='models/gemini-2.0-flash', google_api_key=SecretStr('**********'), temperature=0.3, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x0000025EC1492810>, default_metadata=()), chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='How to authenticate with the CreditChek API in Node.js?', additional_kwargs={}, response_metadata={}), AIMessage(content='```javascript\n // Import necessary modules\n const axios = require(\'axios\');\n require(\'dotenv\').config();\n \n\n // Define the base URL for the CreditChek API\n const baseURL = \'https://api.creditchek.africa/v2\';\n \n\n // Function to make a request to the CreditChek API with authentication\n async function creditChekRequest(endpoint, data = {}) {\n  try {\n  // Retrieve the API token from environment variables\n  const apiToken = 

In [94]:
# 4. Test with Proper Input Format
result=conversational_chain.invoke("How to authenticate with the CreditChek API in Node.js?")




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m


ValueError: variable chat_history should be a list of base messages, got  of type <class 'str'>

In [None]:
print(result["answer"])

In [41]:
def chat_interface(question):
    result = conversational_chain.invoke({"question": question})
    print(f"Assistant: {result['answer']}")
    print("\nMemory Summary:", memory.load_memory_variables({}))
    return result

# Example conversation
if __name__ == "__main__":
    print(chat_interface("How to authenticate with the CreditChek API in Node.js?"))
    



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m


ValueError: variable chat_history should be a list of base messages, got  of type <class 'str'>