In [4]:
ASTRA_DB_CLIENT_SECRET = ""
ASTRA_DB_APPLICATION_TOKEN = ""
ASTRA_DB_CLIENT_ID = ""
ASTRA_DB_SECURE_BUNDLE_PATH = ""
ASTRA_DB_KEYSPACE = ""
OPENAI_API_KEY = ""

In [5]:
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings

In [6]:
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider 

In [7]:
from datasets import load_dataset

In [8]:
cloud_config = {
    'secure_connect_bundle' : ASTRA_DB_SECURE_BUNDLE_PATH
}
auth_provider = PlainTextAuthProvider(ASTRA_DB_CLIENT_ID, ASTRA_DB_CLIENT_SECRET)
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)


In [9]:
astraSession = cluster.connect()

llm = OpenAI(openai_api_key=OPENAI_API_KEY)
myEmbedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

myCassandraVStore = Cassandra(
    embedding = myEmbedding,
    session = astraSession,
    keyspace = ASTRA_DB_KEYSPACE,
    table_name = "qa_mini_demo"
)

print("Loading data from HF")
myDataset = load_dataset("Biddls/Onion_News", split="train")
headlines = myDataset['text'][:50]

print("Generating embeddings and storing in astra")
myCassandraVStore.add_texts(headlines)

Loading data from HF


Downloading readme: 100%|██████████| 463/463 [00:00<00:00, 233kB/s]
Downloading data: 100%|██████████| 23.5M/23.5M [00:06<00:00, 3.44MB/s]
Downloading data files: 100%|██████████| 1/1 [00:06<00:00,  6.85s/it]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 40.33it/s]
Generating train split: 33880 examples [00:00, 39395.76 examples/s]


Generating embeddings and storing in astra


['14775e0cdf5d4e8eb1cc6f332bc48988',
 '4dba1cd7f47d4447b195cb73c7c47e7e',
 '7cb88c3648a54ea1973884f600a34761',
 '62e0ac5835214a33a451c15c93766c64',
 '628afd13fcd549ff8346c6edbf79dbfe',
 '8cf8889b7a1a453c91a425587f142d29',
 '1adbf28f509d4df1a033e62d6fce1eff',
 'af1e179f7c4c44b2b0dbab56f8dc0222',
 'e9651f82da374122a01c866784a565a2',
 '595307b8f07f4676b511a589f7104a70',
 '613e7c43929b48b5bdcee31b81326217',
 'a04c5a37613b449ca5842ca9fa4cc4a3',
 'abc9229025a341a3bad68e647147b65f',
 '9490e1b0ecc9468dad5eea158f94a609',
 'a2cad2379a864d0b8ed97280d07e9c5a',
 'f1fefc64eefe455a80bf38eff5f6d811',
 '06570eef5d5041cfb1612be9912a867d',
 '7e58c9171b784257a6f3a8ca35f068ed',
 '8e1200199b52458d9a8d62b11cf8449e',
 'bfbd9484a96645dd9787050be2374cef',
 '82a9504ab29b474bbbf7bb2560bf41c7',
 'c41369a7c915463990856835d7ec7f81',
 '91f78bfe3c844c1ab41c13057aa2bc5b',
 '7ea2d8be307d4f95af8056439b2b726f',
 '164e69c8134a4c80834e95513dfeedd4',
 '2be0f8cd0d784c6f8752de7d2b146073',
 '7749b2474cf14961b13c8fc821106489',
 

In [10]:
vectorIndex = VectorStoreIndexWrapper(vectorstore= myCassandraVStore)

first_question = True
while True:
    if first_question:
        query_text = input("\nEnter your question (or type 'quit' to exit):")
        first_question = False
    else:
        query_text = input("\nWhat's your next question (or type 'quit' to exit):")
    
    if query_text.lower() == 'quit':
        break
    print("Question: \"%s\"" % query_text)
    answer = vectorIndex.query(query_text, llm=llm).strip()
    print("Answer: \"%s\"" % answer)

    print("Documents by Relevance:")
    for doc, score in myCassandraVStore.similarity_search_with_score(query_text, k = 4):
        print(" %0.4f \"%s ...\"" % (score, doc.page_content[:60]))

Question: "how to find happenis "
Answer: "I'm sorry, I don't know what 'happenis' is."
Documents by Relevance:
 0.8725 "‘No Way To Prevent This,’ Says Only Nation Where This Regula ..."
 0.8673 "Man Buys Slice Of Honey-Roasted Ham For Attractive Woman At  ..."
 0.8648 "Report: Minority Of Murders Committed By Someone Victim Didn ..."
 0.8648 "Report: Everyone Laughing At What Is A Very Silly Misunderst ..."
Question: "howto find happeniss"
Answer: "I'm sorry, I don't know."
Documents by Relevance:
 0.8670 "‘No Way To Prevent This,’ Says Only Nation Where This Regula ..."
 0.8511 "Report: Everyone Laughing At What Is A Very Silly Misunderst ..."
 0.8501 "Report: Minority Of Murders Committed By Someone Victim Didn ..."
 0.8498 "Things To Never Google After You Commit A Crime #~# If you’r ..."
Question: "how to find happiness"
Answer: "It is different for everyone. Happiness is found in different ways for different people."
Documents by Relevance:
 0.8849 "Relaxed Marie Kondo Now Says S