In [None]:
!pip install accelerate>=0.12.0 transformers[torch]==4.25.1 
!pip install -U "ray[serve]"  # installs Ray + dependencies for Ray Serve
!pip install -U slack-bolt
!pip install faiss-cpu
!pip install atlassian-python-api
!pip install numpy




In [None]:
import torch
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import VisionEncoderDecoderModel, ViTImageProcessor
from transformers import AutoModelForSeq2SeqLM

from fastapi import FastAPI, Request
from ray import serve
from slack_bolt.async_app import AsyncApp
from slack_bolt.adapter.fastapi.async_handler import AsyncSlackRequestHandler
from slack_bolt.adapter.starlette.handler import SlackRequestHandler
import requests
from slack_sdk.signature import SignatureVerifier
import ray
import asyncio

import logging
# Configure the logger.

from atlassian import Confluence
import os

# Set up Confluence API connection
confluence = Confluence(
url='https://advendio.atlassian.net',
)

space_key = "SO"
pages = confluence.get_all_pages_from_space(space_key)

# Create a directory to store the downloaded pages
if not os.path.exists('advendio_pages'):
    os.makedirs('advendio_pages')
# Download each page
for page in pages:
    page_id = page['id']
    page_title = page['title']
    page_filename = page_title.replace(' ', '_') + '.html'
    page_content = confluence.get_page_by_id(page_id, expand='body.storage')['body']['storage']['value']
    try:
        with open('advendio_pages/' + page_filename, 'w') as f:
            f.write(page_content)
    except:
        pass
    print('Downloaded:', page_filename)

import numpy as np

import torch
from typing import List
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
from bs4 import BeautifulSoup
import os
import faiss

@serve.deployment(ray_actor_options={"num_gpus": 0.5})
class DocumentVectorDB:
    def __init__(self, 
                 question_encoder_model: str = "facebook/dpr-question_encoder-single-nq-base",
                 context_encoder: str = "facebook/dpr-ctx_encoder-single-nq-base"
                 ):
        self.token_limit = 512

        self.documents = self.format_documents()
        self.question_encoder = DPRQuestionEncoder.from_pretrained(
            question_encoder_model)
        self.question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(
            question_encoder_model)
        self.context_encoder = DPRContextEncoder.from_pretrained(
            context_encoder)
        self.context_tokenizer = DPRContextEncoderTokenizer.from_pretrained(
            context_encoder)
        count = self.index_documents(self.documents)
        print("document count:{}".format(count))
      
    def index_documents(self, documents: List[str]) -> int:
        # Encode the documents
        encoded_documents = self.context_tokenizer(
            self.documents, 
            return_tensors="pt", 
            padding=True, 
            truncation=True, 
            max_length=self.token_limit)
        document_embeddings = self.context_encoder(**encoded_documents).pooler_output

        document_embeddings = document_embeddings.detach().numpy()
        document_embeddings=np.ascontiguousarray(document_embeddings)
        # Create Faiss Index
        vector_dimension = document_embeddings.shape[1]
        print("vector dimension:{}".format(vector_dimension))
        self.index = faiss.IndexFlatL2(vector_dimension)
        faiss.normalize_L2(document_embeddings)
        self.index.add(document_embeddings)
        return self.index.ntotal

    def insert_documents(self) -> List[str]:
        """
        store some whre
        """
        pass

    def encode_questions(self, query: str) -> List[torch.Tensor]:
        encoded_query = self.question_tokenizer(query, return_tensors="pt")
        query_embedding = self.question_encoder(
            **encoded_query).pooler_output.detach().numpy()
        query_embedding = np.ascontiguousarray(query_embedding)
        return query_embedding

    def query_documents(self, query: str) -> str:
        # Encode the query
        query_embedding = self.encode_questions(query)
        _, idx = self.index.search(query_embedding, 4)
        doc = self.documents[idx[0][0]]
        print("query result: index={}, doc={}".format(idx[0][0], doc))
        return doc

    def format_documents(self):
        documents = []
        for filename in os.listdir('advendio_pages'):
            f = os.path.join('advendio_pages', filename)
            with open(f, 'r', encoding='utf-8') as file:
                html_content = file.read()
                soup = BeautifulSoup(html_content, "lxml")

                text_content = soup.get_text(separator=" ", strip=True)
                documents.append(text_content)
        return documents

@serve.deployment(route_prefix="/", ray_actor_options={"num_gpus": 0.5})
class RAGConversationBot:
    def __init__(self, 
                 db: DocumentVectorDB, 
                 model: str = "databricks/dolly-v2-3b"):
        self.model = model
        self.db = db

    async def prompt(self, input: str) ->str:
        context_ref = await self.db.query_documents.remote(input)
        context = await context_ref
        assert isinstance(context, str)
        return "{} \n context: {}\n output:".format(input, context)
         
    # Change this method to be an async function
    async def generate_text(self, input_text: str) -> str:
        generator = pipeline(model=self.model, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
        prompt_text = await self.prompt(input_text)
        assert isinstance(prompt_text, str)
        return generator(prompt_text)[0]

    async def __call__(self, http_request: Request) -> str:
        input_text: str = await http_request.json()
        return await self.generate_text(input_text)

# 2: Deploy the model.
serve.run(RAGConversationBot.bind(DocumentVectorDB.bind()))

english_text = "How is an Ads event stored in ADvendio?"
# 3: Query the deployment and print the result.
response = requests.post("http://127.0.0.1:8000/", json=english_text)
french_text = response.text
print(french_text)


In [None]:
if ray.is_initialized():
    ray.shutdown()
ray.init()
serve.start()
# Start deployment instances.
model_one.deploy()
model_two.deploy()
ComposedModel.deploy()

# Now send requests.
for _ in range(8):
    resp = requests.get("http://127.0.0.1:8000/composed", data="Hey!")
    print(resp.json())

ray.shutdown()
