In [2]:
import numpy as np

In [3]:
from dotenv import load_dotenv
import os

In [4]:
load_dotenv()
EURI_API_KEY = os.getenv("EURI_API_KEY")

In [5]:
from euriai.langchain import EuriaiChatModel
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage

chat_model = EuriaiChatModel(api_key= EURI_API_KEY, model = "gpt-4.1-nano", max_token = 10000)

In [6]:
message = [
    SystemMessage(content="You are expert in writing story on mix of language of English, Hindi, Hinglish. Generate long story of more than 15000 sentences."),
    HumanMessage(content='Generate story Artificial Intelligence.')
]

response = chat_model.invoke(message)


In [7]:
print(len(response.content))

3566


In [8]:
# Saving the generated data

file_path = "Data\Generated_data.txt"
with open(file_path, 'w', encoding='utf-8') as f:
    f.write(response.content)

In [9]:
# Reading the data
with open(file_path, 'r', encoding='utf-8') as f:
    record = f.read()

In [10]:
len(record)

3566

In [11]:
# Reading first 500 characters
record[:500]

'Certainly! Here\'s a comprehensive, detailed story about Artificial Intelligence, blending English, Hindi, and Hinglish. Due to platform limitations, I can\'t generate a story with more than 15,000 sentences in one go, but I will provide a lengthy, engaging, and richly detailed story that captures the essence and evolution of AI. If you\'d like, I can continue with more parts subsequently.\n\n---\n\n**Title: "AI ki Kahani: Ek Digital Yug ka Safar"**\n\n**Part 1: Shuruaat**\n\nEk chhoti si gaon mein, jahan '

In [12]:
# Cleaning the data
import re
import unicodedata

def clean_data(raw_text:str):
    text = raw_text.lower() # lower case
    text = re.sub(r'http\S+|www\.\S+', '', text) # remove URLs
    text = re.sub(r'\S+@\S+', "",text) # Remove emails
    text = unicodedata.normalize("NFKD", text).encode('ascii', "ignore").decode('utf-8') # Normalize unicode
    text = re.sub(r"[^a-z0-9.,!?;:\s]","", text) # Remove unwanted characters (keep words, numbers, spaces, . , ? !)
    text = re.sub(r"\s+", " ", text).strip() # Remove any extra whitespace

    return text


In [13]:
clean_text = clean_data(record)
print("First 10 character of cleaned text:\n", clean_text[:100])
print('Length of cleaned text is:', len(clean_text))

First 10 character of cleaned text:
 certainly! heres a comprehensive, detailed story about artificial intelligence, blending english, hi
Length of cleaned text is: 3495


In [14]:
# Create Chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,      # Max size of each chunk
    chunk_overlap=100,   # Overlap between chunks (helps with context continuity)
    separators=["\n\n", "\n", " ", ""] # Order of splitting
)

text = text_splitter.split_text(clean_text)

In [15]:
# Print Chunks
text

['certainly! heres a comprehensive, detailed story about artificial intelligence, blending english, hindi, and hinglish. due to platform limitations, i cant generate a story with more than 15,000 sentences in one go, but i will provide a lengthy, engaging, and richly detailed story that captures the',
 'in one go, but i will provide a lengthy, engaging, and richly detailed story that captures the essence and evolution of ai. if youd like, i can continue with more parts subsequently. title: ai ki kahani: ek digital yug ka safar part 1: shuruaat ek chhoti si gaon mein, jahan log traditional methods',
 'ek digital yug ka safar part 1: shuruaat ek chhoti si gaon mein, jahan log traditional methods se kaam karte the, wahan ek chhota sa ladka tha jiska naam tha arjun. arjun bahut hi curious tha, usko naye inventions ke baare mein jaanne ka bahut shauk tha. uski dadi usko bachpan se hi kehti thi,',
 'naye inventions ke baare mein jaanne ka bahut shauk tha. uski dadi usko bachpan se hi kehti t

In [16]:
# Length of the chunks

print("Lenghth of Chunks are:", len(text))

Lenghth of Chunks are: 17


In [17]:
# Create Embedding

import requests
import numpy as np

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURI_API_KEY}"
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding

In [18]:
final_embedding = []
meta = []
for i,chunk in enumerate(text):
    chunk_embedding = generate_embeddings(chunk)
    final_embedding.append(chunk_embedding)
    meta.append(str(i) +":"+ chunk)

In [19]:
final_embedding

[array([ 0.01465355,  0.00718234, -0.00158714, ..., -0.00866608,
         0.01199464,  0.01704328], shape=(1536,)),
 array([ 0.01100504,  0.00120018,  0.0001763 , ..., -0.0029123 ,
         0.02861445,  0.00629247], shape=(1536,)),
 array([ 0.02751531, -0.01066681, -0.0463195 , ..., -0.01044472,
         0.03494322, -0.00215002], shape=(1536,)),
 array([ 0.00837143, -0.01700465, -0.03312873, ..., -0.03067739,
         0.04759875, -0.01288736], shape=(1536,)),
 array([-0.00417439,  0.00715213,  0.00517625, ...,  0.00558255,
         0.05383297,  0.02504636], shape=(1536,)),
 array([0.01864259, 0.01075206, 0.00749478, ..., 0.00777485, 0.03280413,
        0.02350111], shape=(1536,)),
 array([-0.00224661,  0.03362154, -0.0304688 , ..., -0.01098281,
         0.05191663,  0.00623069], shape=(1536,)),
 array([-0.01989829,  0.02452444, -0.01916045, ...,  0.00767706,
         0.04363804,  0.02356407], shape=(1536,)),
 array([-0.00080263, -0.00672622,  0.02045838, ...,  0.01420262,
         0.04

In [20]:
meta

['0:certainly! heres a comprehensive, detailed story about artificial intelligence, blending english, hindi, and hinglish. due to platform limitations, i cant generate a story with more than 15,000 sentences in one go, but i will provide a lengthy, engaging, and richly detailed story that captures the',
 '1:in one go, but i will provide a lengthy, engaging, and richly detailed story that captures the essence and evolution of ai. if youd like, i can continue with more parts subsequently. title: ai ki kahani: ek digital yug ka safar part 1: shuruaat ek chhoti si gaon mein, jahan log traditional methods',
 '2:ek digital yug ka safar part 1: shuruaat ek chhoti si gaon mein, jahan log traditional methods se kaam karte the, wahan ek chhota sa ladka tha jiska naam tha arjun. arjun bahut hi curious tha, usko naye inventions ke baare mein jaanne ka bahut shauk tha. uski dadi usko bachpan se hi kehti thi,',
 '3:naye inventions ke baare mein jaanne ka bahut shauk tha. uski dadi usko bachpan se hi

In [21]:
#stack embedding vertically
final_embedding = np.vstack(final_embedding)

In [22]:
final_embedding

array([[ 0.01465355,  0.00718234, -0.00158714, ..., -0.00866608,
         0.01199464,  0.01704328],
       [ 0.01100504,  0.00120018,  0.0001763 , ..., -0.0029123 ,
         0.02861445,  0.00629247],
       [ 0.02751531, -0.01066681, -0.0463195 , ..., -0.01044472,
         0.03494322, -0.00215002],
       ...,
       [-0.01993785, -0.00925393,  0.01972687, ...,  0.02522413,
         0.03040493,  0.02232899],
       [ 0.04260934, -0.01467254,  0.05209413, ...,  0.02187039,
         0.0142633 ,  0.00102988],
       [ 0.03837973,  0.01316455,  0.00731645, ...,  0.02418986,
         0.03030378,  0.0072405 ]], shape=(17, 1536))

In [23]:
print("Shape of Embedding is:", final_embedding.shape)

Shape of Embedding is: (17, 1536)


In [24]:
# storing EMbedding as float32
final_embedding = np.array(final_embedding, dtype='float32')

In [25]:
# Storing Embedding into FAISS VectorDB

import faiss
emb_dim = final_embedding.shape[1]

faiss.normalize_L2(final_embedding)
index = faiss.IndexFlatIP(emb_dim)
index.add(final_embedding)

In [26]:
index_path = "AI_story.faiss"
meta_path = "AI_story_metadata.jsonl"

faiss.write_index(index, index_path)

In [27]:
import json

with open(meta_path, 'w') as f:
    for item in meta:
        f.write(json.dumps(item) + '\n')

In [28]:
# Retrival Information

Query = 'what Vikram and Priya has developed?'

query_vec = generate_embeddings(Query).astype('float32').reshape(1,-1)
faiss.normalize_L2(query_vec)

In [29]:
index.search(query_vec,5)

(array([[0.40009052, 0.37205186, 0.31380188, 0.3097468 , 0.30818993]],
       dtype=float32),
 array([[9, 8, 3, 0, 2]]))

In [31]:
# Checling the info from meta

meta[9]

'9:par research ki. uski mehnat rang laayi, aur usko ek bade tech company mein internship mili. company ke scientists ne uski ideas ko bahut pasand kiya. arjun ne apni team ke saath milkar ek aipowered language translator banaya, jo hindi, english, aur hinglish mein baat kar sakta tha. yeh invention'

Implementing a similarity search in FAISS for a small dataset of 20 product descriptions. Show top-5 most similar products for a query

In [32]:
from euriai.langchain import EuriaiChatModel

chat_model = EuriaiChatModel(api_key= EURI_API_KEY, model = "gpt-4.1-nano", max_token = 8000)

message = [
    SystemMessage(content="You are product expert who know about different products."),
    HumanMessage(content='Generate 20 products description in detail'),
]

response = chat_model.invoke(message)

In [34]:
print(response.content)

Certainly! Here are detailed descriptions for 20 diverse products:

1. **Wireless Noise-Canceling Headphones**
Experience immersive sound with these premium wireless headphones featuring active noise cancellation. Designed with plush ear cups and adjustable headbands, they provide all-day comfort. The built-in microphone ensures crystal-clear calls, while touch controls allow seamless music management. With up to 30 hours of battery life, enjoy uninterrupted listening wherever you go.

2. **Smart Wi-Fi Robot Vacuum Cleaner**
Keep your home spotless with this intelligent robot vacuum. Equipped with advanced mapping and navigation technology, it efficiently cleans carpets and hard floors. The app control feature allows scheduling and monitoring remotely. Its HEPA filter captures fine dust particles, promoting healthier air quality. Quiet operation and automatic docking make maintenance effortless.

3. **4K Ultra HD Action Camera**
Capture your adventures in stunning 4K resolution with th

In [35]:
len(response.content)

5421

In [36]:
file_path = "Data\Product_description.txt"
with open(file_path, 'w', encoding='utf-8') as f:
    f.write(response.content)

In [37]:
# Reading the data from file
file_path = "Data\Product_description.txt"
with open(file_path, 'r', encoding='utf-8') as f:
    record = f.read()

In [38]:
len(record)

5421

In [40]:
# Clean the data

clean_text = clean_data(record)

print('Length of clean data:', len(clean_text))
print('First 500 character of clean data:\n', clean_text[:500])

Length of clean data: 5310
First 500 character of clean data:
 certainly! here are detailed descriptions for 20 diverse products: 1. wireless noisecanceling headphones experience immersive sound with these premium wireless headphones featuring active noise cancellation. designed with plush ear cups and adjustable headbands, they provide allday comfort. the builtin microphone ensures crystalclear calls, while touch controls allow seamless music management. with up to 30 hours of battery life, enjoy uninterrupted listening wherever you go. 2. smart wifi robot


In [46]:
print(clean_text)

certainly! here are detailed descriptions for 20 diverse products: 1. wireless noisecanceling headphones experience immersive sound with these premium wireless headphones featuring active noise cancellation. designed with plush ear cups and adjustable headbands, they provide allday comfort. the builtin microphone ensures crystalclear calls, while touch controls allow seamless music management. with up to 30 hours of battery life, enjoy uninterrupted listening wherever you go. 2. smart wifi robot vacuum cleaner keep your home spotless with this intelligent robot vacuum. equipped with advanced mapping and navigation technology, it efficiently cleans carpets and hard floors. the app control feature allows scheduling and monitoring remotely. its hepa filter captures fine dust particles, promoting healthier air quality. quiet operation and automatic docking make maintenance effortless. 3. 4k ultra hd action camera capture your adventures in stunning 4k resolution with this durable action ca

In [47]:
# Create Chunk

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,      # Max size of each chunk
    chunk_overlap=100,   # Overlap between chunks (helps with context continuity)
    separators=["\n\n", "\n", " ", ""] # Order of splitting
)

text = text_splitter.split_text(clean_text)

In [48]:
text

['certainly! here are detailed descriptions for 20 diverse products: 1. wireless noisecanceling headphones experience immersive sound with these premium wireless headphones featuring active noise cancellation. designed with plush ear cups and adjustable headbands, they provide allday comfort. the',
 'designed with plush ear cups and adjustable headbands, they provide allday comfort. the builtin microphone ensures crystalclear calls, while touch controls allow seamless music management. with up to 30 hours of battery life, enjoy uninterrupted listening wherever you go. 2. smart wifi robot vacuum',
 '30 hours of battery life, enjoy uninterrupted listening wherever you go. 2. smart wifi robot vacuum cleaner keep your home spotless with this intelligent robot vacuum. equipped with advanced mapping and navigation technology, it efficiently cleans carpets and hard floors. the app control feature',
 'and navigation technology, it efficiently cleans carpets and hard floors. the app control fea

In [49]:
# Length of the chunks

print("Lenghth of Chunks are:", len(text))

Lenghth of Chunks are: 27


In [51]:
# Generate Embedding for chunks

final_embedding = []
meta = []
for i,chunk in enumerate(text):
    chunk_embedded = generate_embeddings(chunk)
    final_embedding.append(chunk_embedded)
    meta.append(str(i) +":"+ chunk)

In [55]:
final_embedding

[array([ 0.0157843 ,  0.00453959, -0.05025312, ..., -0.00895727,
        -0.02823209, -0.02199536], shape=(1536,)),
 array([ 0.00442547, -0.01156065, -0.01525517, ..., -0.03151349,
        -0.00264702, -0.02029537], shape=(1536,)),
 array([-0.00368442, -0.00941263, -0.00828155, ..., -0.03838965,
        -0.00160843, -0.00573101], shape=(1536,)),
 array([-0.03996108,  0.00953557, -0.00274634, ..., -0.00595396,
        -0.01346314, -0.01459001], shape=(1536,)),
 array([ 0.02189562,  0.03102398, -0.01712653, ...,  0.00224328,
        -0.00988594,  0.00964376], shape=(1536,)),
 array([ 0.03235667, -0.00159868,  0.00290432, ...,  0.00052479,
         0.00236901,  0.01023622], shape=(1536,)),
 array([-0.00291591, -0.03070696,  0.01052878, ..., -0.0060487 ,
        -0.00574864, -0.03279846], shape=(1536,)),
 array([-0.02501325,  0.0051896 , -0.026748  , ..., -0.00754852,
         0.00750163, -0.01054917], shape=(1536,)),
 array([-0.00020785,  0.04598984, -0.04249461, ...,  0.00366194,
       

In [54]:
meta

['0:certainly! here are detailed descriptions for 20 diverse products: 1. wireless noisecanceling headphones experience immersive sound with these premium wireless headphones featuring active noise cancellation. designed with plush ear cups and adjustable headbands, they provide allday comfort. the',
 '1:designed with plush ear cups and adjustable headbands, they provide allday comfort. the builtin microphone ensures crystalclear calls, while touch controls allow seamless music management. with up to 30 hours of battery life, enjoy uninterrupted listening wherever you go. 2. smart wifi robot vacuum',
 '2:30 hours of battery life, enjoy uninterrupted listening wherever you go. 2. smart wifi robot vacuum cleaner keep your home spotless with this intelligent robot vacuum. equipped with advanced mapping and navigation technology, it efficiently cleans carpets and hard floors. the app control feature',
 '3:and navigation technology, it efficiently cleans carpets and hard floors. the app con

In [56]:
# storing EMbedding as float32
final_embedding = np.array(final_embedding, dtype='float32')

In [57]:
final_embedding.shape

(27, 1536)

In [58]:
# Storing Embedding in Faiss Vector DB

emb_dim = final_embedding.shape[1]

# Normalize for cosine similarity
faiss.normalize_L2(final_embedding)
index = faiss.IndexFlatIP(emb_dim)
index.add(final_embedding)

In [59]:
index_path = "Product_Embedded.faiss"
meta_path  ="Product_Metadata.jsonl"

faiss.write_index(index, index_path)

In [60]:
with open(meta_path, 'w') as f:
    for item in meta:
        f.write(json.dumps(item) + '\n')

In [61]:
Query = "Give 5 Electric Home Appliences."


In [62]:
query_vec = generate_embeddings(Query).astype('float32').reshape(1,-1)
faiss.normalize_L2(query_vec)

In [63]:
# Find Top 5 result
index.search(query_vec,5)

(array([[0.30778304, 0.29268652, 0.29037344, 0.27717963, 0.27513546]],
       dtype=float32),
 array([[24,  2, 15,  7,  3]]))

In [66]:
meta[24]

'24:with smartphones and earbuds. its fastcharging capability ensures quick powerup, and nonslip rubber base keeps it stable on surfaces. the minimalist design complements any desk or bedside table. 15. portable solar power bank stay powered offgrid with this highcapacity solar charger. equipped with'

Inserting the data into Qdrant. Write Query to retrieve the nearest neighbors

In [67]:
from qdrant_client import QdrantClient

qdrant_client = QdrantClient(
    url="https://60a74340-3868-4e5a-a150-ef5e73dbbda6.us-east4-0.gcp.cloud.qdrant.io:6333", 
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.Tskmr3KCFm3M5ujhNhv3cYwh5BVpQgjE50d1mEufp1Q",
)

print(qdrant_client.get_collections())

collections=[]


In [68]:
collection_name = "Product_Information"

In [69]:
from qdrant_client import models
qdrant_client.recreate_collection(
    collection_name=collection_name,
    vectors_config = models.VectorParams(size=1536, distance=models.Distance.COSINE)
)

  qdrant_client.recreate_collection(


True

In [70]:
points = []
for idx, (chunk, emb) in enumerate(zip(text, final_embedding)):
    point = models.PointStruct(
        id = idx,
        vector = emb.astype("float32").tolist(),
        payload = {"text": chunk}
    )
    points.append(point)

In [71]:
points

[PointStruct(id=0, vector=[0.015784304589033127, 0.00453959172591567, -0.05025312304496765, -0.013910721056163311, -0.031414616852998734, -0.07591865956783295, 0.03372451663017273, 0.08859743177890778, 0.0112928356975317, -0.000942406477406621, 0.011741983704268932, -0.010715361684560776, -0.02289365790784359, 0.01924915239214897, 0.015129834413528442, 0.026114685460925102, -0.0358290895819664, 0.0009039081633090973, -0.0038562470581382513, 0.04234813526272774, 0.05104875564575195, 0.0576447993516922, -0.011543074622750282, 0.014539526775479317, 0.0319279283285141, 0.016708264127373695, -0.02017311193048954, 0.04399073123931885, 0.03326253592967987, 0.04008956998586655, -0.02764178439974785, -0.01475768443197012, 0.018440688028931618, -0.03238990902900696, -0.00787290371954441, -0.03770267218351364, 0.010875770822167397, -0.029874686151742935, 0.019197821617126465, -0.05127974599599838, 0.020108947530388832, -0.0005401793750934303, -0.008193722926080227, 0.0004222783027216792, -0.01208

In [72]:
qdrant_client.upsert(
    collection_name = collection_name,
    points = points
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [73]:
query_vec = query_vec.astype("float32").tolist()

In [77]:
qdrant_client.search(collection_name=collection_name, query_vector=query_vec[0], limit=5, with_payload=True)

  qdrant_client.search(collection_name=collection_name, query_vector=query_vec[0], limit=5, with_payload=True)


[ScoredPoint(id=24, version=0, score=0.307783, payload={'text': 'with smartphones and earbuds. its fastcharging capability ensures quick powerup, and nonslip rubber base keeps it stable on surfaces. the minimalist design complements any desk or bedside table. 15. portable solar power bank stay powered offgrid with this highcapacity solar charger. equipped with'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=2, version=0, score=0.29268652, payload={'text': '30 hours of battery life, enjoy uninterrupted listening wherever you go. 2. smart wifi robot vacuum cleaner keep your home spotless with this intelligent robot vacuum. equipped with advanced mapping and navigation technology, it efficiently cleans carpets and hard floors. the app control feature'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=15, version=0, score=0.29037344, payload={'text': 'make it suitable for indoor and outdoor use. 9. electric standing desk enhance your workspace with this a

Comparison between FAISS and Qdrant on the basis of Retrieval time

In [80]:
query_vec = generate_embeddings(Query).astype('float32').reshape(1,-1)
faiss.normalize_L2(query_vec)

In [81]:
import time

start = time.time()
index.search(query_vec, 5)
end = time.time()

print("Search time of FAISS: ", (end - start)*1000, 'ms')

Search time of FAISS:  0.0 ms


In [82]:
start = time.time()
qdrant_client.query_points(
    collection_name=collection_name,
    query=query_vec[0],
    limit=5
)
end = time.time()
print("Search time of Qdrant: ", (end - start)*1000, 'ms')

Search time of Qdrant:  1032.517671585083 ms


Qdrant takes more time than FAISS.