In [None]:
#Installing the necessary libraries
%pip install cassio datasets langchain openai tiktoken flask python-dotenv

In [None]:
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings

from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement

from datasets import load_dataset

from dotenv import load_dotenv

import os

In [3]:
#Defining the different tokens
load_dotenv()

ASTRA_DB_SECURE_BUNDLE_PATH = "D:\\Users\\Julien\\Documents\\developpement\\python\\embedding-knowledge\\secure-connect-vector-database.zip"
ASTRA_DB_APPLICATION_TOKEN = "AstraCS:QMnMDNSekBwcZCCHRmaeqZFI:b2e2fa8e9ca207fcb4e6c35cbbf0be3975c208eb8d20dbfc45259a61bbd25e36"
ASTRA_DB_CLIENT_ID = "QMnMDNSekBwcZCCHRmaeqZFI"
ASTRA_DC_CLIENT_SECRET = "_SPYpZuo2ZY5zaANNScG62eWgUBZBtoeX.+2tkAmuCo,k730O5nAWL1PxhE-GMCFY2APc,T.NBReKAl7sZ_XGl7YZOtRNRe+_PwvKAzjmDRgdA7I0mY0Qgd,GeABI8,_"
ASTRA_DB_KEYSPACE = "search"
OPENAI_API_KEY = os.getenv('OPENAI_KEY')

In [4]:
#Creating the Vector Store

cloud_config={
    'secure_connect_bundle': ASTRA_DB_SECURE_BUNDLE_PATH
}
auth_provider = PlainTextAuthProvider(ASTRA_DB_CLIENT_ID,ASTRA_DC_CLIENT_SECRET)
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
astraSession = cluster.connect()

llm = OpenAI(openai_api_key = OPENAI_API_KEY)
myEmbedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

MyCassandraVStore = Cassandra(
    embedding=myEmbedding,
    session=astraSession,
    keyspace=ASTRA_DB_KEYSPACE,
    table_name="embedding_demo",
)

In [None]:
#Deleting everything from astraDB

query = SimpleStatement("TRUNCATE {}.{}".format(ASTRA_DB_KEYSPACE, "embedding_demo"))
astraSession.execute(query)

In [6]:
#Loading the data

print("Loading data from huggingface")
mydataset = load_dataset("Biddls/Onion_news", split="train")
headlines = mydataset["text"][:50]

Loading data from huggingface


In [None]:
print(headlines)

In [None]:
#Storing the Embedding in AstraDB

print("\nGenerating Embedding and storing in AstraDB")
MyCassandraVStore.add_texts(headlines)

print("Insert %i headlines.\n" % len(headlines))

vectorIndex = VectorStoreIndexWrapper(vectorstore=MyCassandraVStore)

In [None]:
#View the content of the Vector store

query = SimpleStatement("SELECT * FROM {}.{}".format(ASTRA_DB_KEYSPACE, "embedding_demo"))
rows = astraSession.execute(query)

for row in rows:
    print(row)

In [None]:
"""#Question loop on the embedded llm

while True:
    query_text = input("\nEnter a question (or type 'quit' to exit):")

    if query_text.lower() == "quit":
        break

    print("QUESTION: \"%s\"\n" % query_text)

    # Answer without embeddings
    answer_without_embeddings = llm.generate([query_text])
    print("ANSWER WITHOUT EMBEDDINGS: \"%s\"\n" % answer_without_embeddings)

    # Answer with embeddings
    answer_with_embeddings  = vectorIndex.query(query_text, llm=llm).strip()
    print("ANSWER WITH EMBEDDINGS: \"%s\"\n" % answer_with_embeddings)

    #Mots relevant documents
    print("DOCUMENTS BY RELEVANCE:")
    for doc, score in MyCassandraVStore.similarity_search_with_score(query_text, k=4):
        print("  %0.4f \"%s ...\"" % (score, doc.page_content[:60]))
        """

In [10]:
#expose a webservice instead

import threading
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/ask', methods=['POST'])
def ask():
    query_text = request.json['question']

    # Answer without embeddings
    answer_without_embeddings = llm.generate([query_text])
    answer_without_embeddings = answer_without_embeddings.generations[0][0].text.strip()

    # Answer with embeddings
    answer_with_embeddings  = vectorIndex.query(query_text, llm=llm).strip()

    # Documents by relevance
    documents_by_relevance = [
        {"score": score, "content": doc.page_content[:60]}
        for doc, score in MyCassandraVStore.similarity_search_with_score(query_text, k=4)
    ]

    return jsonify({
        "question": query_text,
        "answer_without_embeddings": answer_without_embeddings,
        "answer_with_embeddings": answer_with_embeddings,
        "documents_by_relevance": documents_by_relevance,
    })

def run_flask():
    app.run(host='192.168.0.10', port=5000)

threading.Thread(target=run_flask).start()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://192.168.0.10:5000
Press CTRL+C to quit
