In [35]:
import os
import weaviate
# from weaviate.classes.init import Auth
from dotenv import load_dotenv
load_dotenv()

weaviate_url = os.getenv("WEAVIATE_URL") 
weaviate_api_key = os.getenv("WEAVIATE_API_KEY")
HF_TOKEN = os.getenv("HF_TOKEN")



client = weaviate.Client(
    url=weaviate_url , auth_client_secret=weaviate.AuthApiKey(weaviate_api_key),
    additional_headers={
         "X-HuggingFace-Api-Key": HF_TOKEN
    },
)

print(client.is_ready())


True


In [36]:

client.schema.get()

{'classes': []}

In [37]:

client.schema.delete_all()

In [38]:
schema = {
    "classes": [
        {
            "class": "RAG",
            "description": "Documents for RAG",
            "vectorizer": "text2vec-huggingface",
            "moduleConfig": {"text2vec-huggingface": {"model": "sentence-transformers/all-MiniLM-L6-v2", "type": "text"}},
            "properties": [
                {
                    "dataType": ["text"],
                    "description": "The content of the paragraph",
                    "moduleConfig": {
                        "text2vec-huggingface": {
                            "skip": False,
                            "vectorizePropertyName": False,
                        }
                    },
                    "name": "content",
                },
            ],
        },
    ]
}

     

client.schema.create(schema)
     

client.schema.get()

{'classes': [{'class': 'RAG',
   'description': 'Documents for RAG',
   'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
    'cleanupIntervalSeconds': 60,
    'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
   'moduleConfig': {'text2vec-huggingface': {'model': 'sentence-transformers/all-MiniLM-L6-v2',
     'type': 'text',
     'useCache': True,
     'useGPU': False,
     'vectorizeClassName': True,
     'waitForModel': False}},
   'multiTenancyConfig': {'autoTenantActivation': False,
    'autoTenantCreation': False,
    'enabled': False},
   'properties': [{'dataType': ['text'],
     'description': 'The content of the paragraph',
     'indexFilterable': True,
     'indexRangeFilters': False,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-huggingface': {'skip': False,
       'vectorizePropertyName': False}},
     'name': 'content',
     'tokenization': 'word'}],
   'replicationConfig': {'asyncEnabled': False,
    'deletionStrategy': 'NoAutomate

In [39]:
from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever
     

retriever = WeaviateHybridSearchRetriever(
    alpha = 0.5,               # defaults to 0.5, which is equal weighting between keyword and semantic search
    client = client,           # keyword arguments to pass to the Weaviate client
    index_name = "RAG",  # The name of the index to use
    text_key = "content",         # The name of the text key to use
    attributes = [], # The attributes to return in the results
    create_schema_if_missing=True,
)

In [40]:
from langchain_community.document_loaders import PyPDFLoader
     

loader = PyPDFLoader("data.pdf")
docs = loader.load()
print(docs)

# from langchain.text_splitter import RecursiveCharacterTextSplitter
     

# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=100)
     

# docs = text_splitter.split_documents(docs)


print(len(docs))
     

[Document(metadata={'producer': 'Adobe PDF Library 10.0.1', 'creator': 'Adobe InDesign CS6 (Windows)', 'creationdate': '2023-08-03T16:08:03+05:30', 'moddate': '2023-08-03T16:08:53+05:30', 'source': 'data.pdf', 'total_pages': 18, 'page': 0, 'page_label': '1'}, page_content='UNIT 5 INTRODUCTION TO ONLINE ANALYTICAL  \nPROCESSING\nStructure         \n5.0 Introduction \n5.1 \n Objectives \n5.2 \n OLAP and its Need\n5.3 \n Characteristics of O\nLAP\n5.4 \n OLAP and Multidim\nensional Analysis\n 5.4.1\n Multidimens\nional Logical Data Modeling and its Users\n 5.4.2\n Multidimens\nional Structure\n 5.4.3\n Multidimens\nional Operations \n \n5.5 \n OLAP Functions\n5.6 \n Data Warehouse an\nd OLAP: Hypercube and Multicubes\n5.7 \n Applications of OL\nAP\n5.8 \n Steps in the OLAP Creation Process\n5.9 \n Advantages of OLAP\n5.10\n OLAP Architecture\ns - MOLAP, ROLAP, HOLAP, DOLAP \n5.11\n \nSummary\n \n5.12\n Solutions/Answers   \n \n5.13\n Further Readings\n \n5.0 INTRODUCTION \nIn the earlier 

In [41]:
retriever.add_documents(docs)

['34b85dfa-aaf2-4056-b10f-3e4bbd9d1b83',
 'f5831ab3-f60a-4d06-a358-b29ff9b6396a',
 '82f86943-df5a-4cdd-9472-881d79291e76',
 '459cfa5a-c961-4ec3-b824-02b456b14cf4',
 '6dfeb124-e42a-4b4c-84b0-92c21e652b90',
 '0f2b5b95-e332-4eed-b9dc-64e1836131fb',
 'f2f30967-74a2-47c8-a591-82b7271f8489',
 '8389ce49-443f-4e9d-9775-4c7fdb452802',
 '719d1801-5421-490d-a0e8-14fd9befe39e',
 'c40f15b3-3b41-4b7a-bf12-64bc595dd5a5',
 '06e838e1-2928-4a48-9e57-9e1958da8b59',
 'db367324-f83d-4ebb-96fa-bcc0f79ae21a',
 '74d460f8-dc43-4971-b404-cdc4721f2b06',
 '22b98786-5ec4-4198-9588-1c06f91ca760',
 '6469b026-f0e2-4f0c-8e94-66c30090a91f',
 '4b4ea56c-c0ae-4fcd-9d4e-f788af807059',
 '260f7b4d-8051-4994-b048-d13820da4aef',
 '5bcb4952-c488-4fec-b189-5603abc91e20']

In [42]:
print(retriever.invoke("what is the best OLAP practices for creating 3 4 tables ?")[0].page_content)

81
Introduction to Online 
Analytical Processing
5.9 ADVANTAGE S OF OLAP
The SQL functions like Group By, Aggregating functions are quite complex to 
operate in relational databases as compared to multidimensional databases. OLAP 
can pre-compute the queries can save in sub cubes. The hypercubes also make the 
computation task faster and saves time. OLAP has proved to an extremely scalable 
and user – friendly method which is able to perfectly cater to its entire customer 
needs ranging from small to large companies. 
Some listed benefits of using OLAP are as follows:
•	 Data
	
Processing
	
at
	
a
	
faster
	
speed
The speed of query execution has been tremendous since the use of OLAP 
technology and is now counted as one of the primary benefits for it. This prevents 
the customers from spending a lot of time and money on heavy calculations and 
creating complex reports.
•	 Accessibility
The cube enables the various kinds of data like – transactional data from various 
resources, inform

In [76]:
import os
from groq import Groq

# Initialize Groq client with API key
client = Groq(
    api_key="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",
)
from dotenv import load_dotenv
load_dotenv()

# Function to get OLAP best practices based on user query
def get_olap_best_practices(user_query):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"The user has asked: '{user_query}'. Based on this, provide the best OLAP (Online Analytical Processing) practices. you should answer just related to OLAP and dont include any user query info just give best OLAP practices based on user query "
                           "Consider data modeling, indexing, partitioning, query optimization, and performance tuning for large-scale analytical workloads.",
                           
            }
        ],
        model="llama-3.3-70b-versatile",
    )
    
    # Get response text
    response_text = chat_completion.choices[0].message.content

    # Clean up unnecessary formatting (removing ** and #)
    cleaned_response = response_text.replace("**", "").replace("#", "").replace("```","")

    return cleaned_response.strip() 

# Example user query
user_query = "Give me a database table schema for my student management system"
response = get_olap_best_practices(user_query)
import re

def clean_text(text):
    """Remove problematic escape sequences and extra formatting."""
    text = text.replace("**", "").replace("#", "")  # Remove markdown formatting
    text = text.replace("\\", "")  # Remove unnecessary backslashes
    text = re.sub(r'\s+', ' ', text).strip()  # Normalize spaces
    return text

# Use the function before invoking the retriever
cleaned_response = clean_text(response)
print(retriever.invoke(cleaned_response)[0].page_content)


88
Etl , O l AP and t rends
Features ROLAP MOLAP HOLAP
Storage space 
requirement
Data is stored in 
relational tables. 
Comparatively 
Large storage 
space requirement 
Data is stored in 
multidimensional 
tables. Medium 
storage space 
requirements
It uses both 
ROLAP, 
MOLAP. Small 
storage space 
requirements. No 
duplicate of data
Latency Low latency High latency Medium latency 
Query response 
time
Slow query 
response time 
Fast query 
response time.
Medium query 
response time 
Volume of data Used for large 
volumes of data
Limited volume of 
data
Can be used in 
both scenarios
Retreival of data Complex SQL 
queries are used
Sparse Matrix is 
used
Both
Data View Static view of data Dynamic view of 
data
Both static and 
dynamic view of 
data
2)
 Limitations of OLA
P cube are:
	 •	 OLAP
	
requires
	
a
	
star/snowflake
	
schema:
	 •	 	There
	is
	a
	limited
	number
	of
	dimensions
	(fields)
	a
	single
	OLAP
	
cube.
	
•	 	It
	is
	nearly	impossible
	to
	access
	transactional
	data
	

In [80]:

from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank

In [85]:
compressor = CohereRerank(cohere_api_key="nbDqU1hTVxWmXGbLYI6OnYhp4Cx40MZ5hOmO5oKX")
     

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
    )

compressed_docs = compression_retriever.get_relevant_documents(user_query)
compressed_docs

[Document(metadata={'relevance_score': 0.12646118}, page_content='79\nIntroduction to Online \nAnalytical Processing\n5.6  Data Warehouse and OLAP: Hypercube and Multi   \nCubes\nThe OLAP cube is a data structure optimized for very quick data analysis. The OLAP \nCube consists of numeric facts called measures which are categorized by dimensions. \nOLAP Cube is also called the hypercube. So, we can say that multidimensional \nDatabases can we see hypercube and multi cube. Multidimensional cubes have \nsmaller multiple cubes and in hypercube it seems there is one cube as logically all \nthe data seems to be as one unit of cube.  Hypercube have multiple same dimensions \nlogically. The differences of Multi cube and Hyper cube are shown in Table 1 below:Table 1: Differences between Multi cube and Hyper cube\nMulti Cube Hyper Cube\nMetadata Each dimension can belong to \nmany cubes\nEach dimension belongs to one \ncube only\nDimension Not necessary all the dimensions \nshould belong to some

In [None]:
compressed_docs = compression_retriever.get_relevant_documents(user_query)
text_content = "\n\n".join(doc.page_content for doc in compressed_docs)
print(text_content)


79
Introduction to Online 
Analytical Processing
5.6  Data Warehouse and OLAP: Hypercube and Multi   
Cubes
The OLAP cube is a data structure optimized for very quick data analysis. The OLAP 
Cube consists of numeric facts called measures which are categorized by dimensions. 
OLAP Cube is also called the hypercube. So, we can say that multidimensional 
Databases can we see hypercube and multi cube. Multidimensional cubes have 
smaller multiple cubes and in hypercube it seems there is one cube as logically all 
the data seems to be as one unit of cube.  Hypercube have multiple same dimensions 
logically. The differences of Multi cube and Hyper cube are shown in Table 1 below:Table 1: Differences between Multi cube and Hyper cube
Multi Cube Hyper Cube
Metadata Each dimension can belong to 
many cubes
Each dimension belongs to one 
cube only
Dimension Not necessary all the dimensions 
should belong to some cube
Every dimension owned by a 
hypercube
Measure 
Computation
Complex, data can b