## Importing required Libraries

In [1]:
import os
from dotenv import load_dotenv
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Document
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.core import Settings
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


## Loading API keys (for LLM model)

In [2]:
load_dotenv()
GEMINI_KEY=os.getenv('GEMINI_KEY')

## Initializing Embedding and LLM models

In [3]:
embed_model_name = "models/embedding-001"
embed_model = GeminiEmbedding(model_name=embed_model_name, api_key=GEMINI_KEY)
model=Gemini(model='models/gemini-1.5-pro',api_key=GEMINI_KEY)

In [15]:
embed_model

GeminiEmbedding(model_name='models/embedding-001', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x000002BEEA333EF0>, num_workers=None, title=None, task_type='retrieval_document', api_key='AIzaSyA4m0Rjxt7mSqWvPnsSUp-Sak5vCHHkYCw')

## Fetching all Files from Data Directory

In [None]:
reader=SimpleDirectoryReader(input_dir='../Data')
docs=reader.load_data('../Data')

Loading files: 100%|██████████| 29/29 [00:25<00:00,  1.13file/s]


## Concatenating all text from different files

In [5]:
text=""
for i in range(len(docs)):
    text+=docs[i].text

## Initializing all concatenated string as Document Instance

In [6]:
docs=Document(text_resource={'text':text})

## Configuring Default LLM Model and Embedding layer Model

In [7]:
Settings.llm=model
Settings.embed_model=embed_model
Settings.transformations=[SentenceSplitter(chunk_overlap=20, chunk_size=1024)]

## Storing each word as a vector index

In [8]:
index=VectorStoreIndex.from_documents(documents=[docs],embed_model=Settings._embed_model,transformations=Settings._transformations ,show_progress=True)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing nodes: 100%|██████████| 1/1 [00:00<00:00,  1.81it/s]
Generating embeddings: 100%|██████████| 125/125 [01:02<00:00,  1.99it/s]


## Storing the vector generated above in "storage" directory

In [9]:
index.storage_context.persist()

## Setting LLM Model as query engine

In [10]:
query_engine=index.as_query_engine(llm=Settings._llm)

## Passing Query

In [13]:
response=query_engine.query("can you explain market basket analysis?")

## Displaying Response

In [14]:
for sentence in response.response.split("."):
    if sentence!="\n":
        print(f"{sentence.strip()}.")

Market basket analysis studies items purchased or grouped within a single or multiple, sequential transactions.
Discovering the relationships between these items, and the strength of those relationships, provides valuable insights.
This information can then be used to make recommendations, cross-sell, up-sell, and offer coupons.
Retailers use this technique to uncover associations between items by identifying combinations that frequently appear together in transactions.
In other words, it helps retailers understand the relationships between the items customers buy.
