In [None]:
!pip install llama-index
!pip install python-dotenv
!pip install llama-index transformers torch accelerate
!pip install llama-index-llms-huggingface
!pip install chromadb
!pip install python-dotenv

In [97]:
import os
from dotenv import load_dotenv
load_dotenv()
monday_api_token = os.getenv('MONDAY_API_KEY')

## Set up local embedding LLM

In [None]:
%pip install llama-index-embeddings-huggingface
%pip install llama-index-embeddings-instructor
%pip install sentence-transformers

In [101]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

## Web reader

In [None]:
!pip install llama-index-readers-web

In [None]:
from llama_index.readers.web import WholeSiteReader

# Initialize the scraper with a prefix URL and maximum depth
scraper = WholeSiteReader(
    prefix="https://www.highspeedtraining.co.uk", max_depth=10
)

# Start scraping from a base URL
documents = scraper.load_data(
    base_url="https://www.highspeedtraining.co.uk/hub/"
)  # Example base URL

## Monday reader

In [94]:
# Custom Monday API reader

from typing import Dict, List
import requests
import json
from llama_index.core.readers.base import BaseReader
from llama_index.core.schema import Document

class MondayReader(BaseReader):
    """monday.com reader. Reads board's data by a GraphQL query."""
    def __init__(self, api_key: str) -> None:
        """Initialize monday.com reader."""
        self.api_key = api_key
        self.api_url = "https://api.monday.com/v2"

    def _parse_item_values(self, cv) -> Dict[str, str]:
        return {"title": cv.get("column", {}).get("title", ""), "value": cv.get("text", "")}

    def _parse_assets(self, assets) -> List[Dict[str, str]]:
        return [{"name": asset.get("name", ""), "url": asset.get("url", "")} for asset in assets]

    def _parse_updates(self, updates) -> List[Dict[str, str]]:
        return [{"body": update.get("body", ""), "created_at": update.get("created_at", "")} for update in updates]

    def _parse_data(self, item) -> Dict[str, any]:
        return {
            "id": item["id"],
            "name": item["name"],
            "values": [self._parse_item_values(cv) for cv in item["column_values"]],
            "assets": self._parse_assets(item.get("assets", [])),
            "updates": self._parse_updates(item.get("updates", [])),
            "subitems": [self._parse_data(subitem) for subitem in item.get("subitems", [])]
        }

    def _perform_request(self, board_id: int, cursor: str = None) -> Dict[str, any]:
        headers = {"Authorization": self.api_key}
        query = """
            query($boardId: ID!, $cursor: String) {
                boards(ids: [$boardId]) {
                    name,
                    items_page(limit: 100, cursor: $cursor) {
                        cursor
                        items {
                            id,
                            name,
                            column_values {
                                column { title }
                                text
                            }
                            assets {
                                name
                                url
                            }
                            updates {
                                body
                                created_at
                            }
                            subitems {
                                id,
                                name,
                                column_values {
                                    column { title }
                                    text
                                }
                                assets {
                                    name
                                    url
                                }
                                updates {
                                    body
                                    created_at
                                }
                            }
                        }
                    }
                }
            }
        """
        variables = {"boardId": board_id, "cursor": cursor}
        data = {"query": query, "variables": variables}
        response = requests.post(url=self.api_url, json=data, headers=headers)
        return response.json()

    def _subitem_to_dict(self, subitem):
        return {
            "id": subitem["id"],
            "name": subitem["name"],
            "values": [{"title": v["title"], "value": v["value"]} for v in subitem["values"]],
            "assets": [{"name": a["name"], "url": a["url"]} for a in subitem["assets"]],
            "updates": [{"body": u["body"], "created_at": u["created_at"]} for u in subitem["updates"]]
        }

    def load_data(self, board_id: int) -> List[Document]:
        """Load board data by board_id."""
        all_items = []
        cursor = None
        while True:
            json_response = self._perform_request(board_id, cursor)
            
            if "errors" in json_response:
                raise Exception(f"API Error: {json_response['errors']}")
            
            if "data" not in json_response or "boards" not in json_response["data"]:
                raise Exception("Unexpected API response structure")
            board_data = json_response["data"]["boards"][0]
            items_page = board_data["items_page"]
            
            all_items.extend(items_page["items"])
            
            if not items_page["cursor"]:
                break
            
            cursor = items_page["cursor"]
        parsed_items = [self._parse_data(item) for item in all_items]
        
        result = []
        for item in parsed_items:
            text = f"name: {item['name']}"
            for item_value in item["values"]:
                if item_value["value"]:
                    text += f", {item_value['title']}: {item_value['value']}"
            
            if item["assets"]:
                text += "\nAssets:"
                for asset in item["assets"]:
                    text += f"\n- {asset['name']}: {asset['url']}"
            
            if item["updates"]:
                text += "\nUpdates:"
                for update in item["updates"]:
                    text += f"\n- {update['created_at']}: {update['body']}"
            
            if item["subitems"]:
                text += "\nSubitems:"
                for subitem in item["subitems"]:
                    text += f"\n- {subitem['name']}"
                    for subitem_value in subitem["values"]:
                        if subitem_value["value"]:
                            text += f", {subitem_value['title']}: {subitem_value['value']}"

            result.append(
                Document(
                    text=text,
                    extra_info={
                        "board_id": board_id,
                        "item_id": item["id"],
                        "subitems_count": len(item["subitems"]),
                        "subitems_json": json.dumps([self._subitem_to_dict(subitem) for subitem in item["subitems"]])
                    }
                )
            )
        
        return result

In [98]:
# Monday API connection
board_id = 1564566045
reader = MondayReader(monday_api_token)
raw_response = reader._perform_request(board_id)
documents = reader.load_data(board_id)

## Local vector store

In [None]:
!pip install llama-index-llms-openai

In [23]:
import os
os.environ['TOKENIZERS_PARALLELISM'] = 'False'

In [20]:
# Setup database
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("site")

In [28]:
# Create index 
from llama_index.llms.openai import OpenAI
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Set up OpenAI LLM
llm = OpenAI(model="gpt-3.5-turbo")

index = VectorStoreIndex.from_documents(
    documents,
    embed_model=embed_model,
    storage_context=storage_context,
    llm=llm
)

### Supabase vector store

In [None]:
!pip install llama-index-vector-stores-supabase

In [104]:
from llama_index.vector_stores.supabase import SupabaseVectorStore

# Substitute your connection string here
DB_CONNECTION = os.getenv('DB_CONNECTION')

vector_store = SupabaseVectorStore(
    postgres_connection_string=DB_CONNECTION, 
    collection_name='monday'
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

In [None]:
llm = OpenAI(model="gpt-3.5-turbo")
index = VectorStoreIndex.from_documents(
    documents,
    embed_model=embed_model,
    storage_context=storage_context
)

In [105]:
query_engine = index.as_query_engine()

## Query

In [106]:
response = query_engine.query("What is Natasha's Law?")
print(response)



Natasha's Law extensively covers and explains how the new laws affect each type of business.


## Citations

In [86]:
from llama_index.core.query_engine import CitationQueryEngine

In [87]:
citation_query_engine = CitationQueryEngine.from_args(
    index,
    similarity_top_k=3,
    # here we can control how granular citation sources are, the default is 512
    citation_chunk_size=512,
)

In [88]:
response = citation_query_engine.query("Who is the Level 3 Supervising Food Safety course for?")
print(response)

The Level 3 Supervising Food Safety course is designed for managers and supervisors in the catering industry [1].


In [None]:
for i in range(len(response.source_nodes)):
    print(response.source_nodes[i].node.get_text())

In [79]:
chat_engine = index.as_chat_engine()

In [89]:
chat_engine.chat_repl()

===== Entering Chat REPL =====
Type "exit" to exit.



Human:  What is allergen awareness?


Assistant: Allergen awareness involves understanding the potential severe outcomes of hypersensitivities and the importance of addressing allergies properly. It is crucial for businesses to be transparent, inclusive, and proactive in managing allergens to ensure the safety of customers. This includes effective communication, staff training on food allergens, and providing accurate allergen information to customers to create a safe dining experience for individuals with food hypersensitivities.



Human:  please explain in more detail


Assistant: Allergen awareness is the understanding and acknowledgment of the potential severe reactions that can occur in individuals with allergies. It involves recognizing the importance of addressing allergies properly to ensure the safety and well-being of individuals with food hypersensitivities. 

Businesses play a significant role in allergen awareness by being transparent, inclusive, and proactive in managing allergens. This includes:

1. Effective Communication: Businesses need to communicate clearly with customers about the presence of allergens in their products or dishes. This can be done through menu labeling, signage, and verbal communication with customers.

2. Staff Training: It is essential for businesses to provide comprehensive training to their staff on food allergens. Staff members should be knowledgeable about common allergens, cross-contamination risks, and how to handle allergen-related inquiries from customers.

3. Providing Accurate Allergen Information: Busin

KeyboardInterrupt: Interrupted by user