In [2]:
import pandas as pd
import csv

In [4]:
s = pd.read_csv('intents.csv')

In [5]:
s.head(3)

Unnamed: 0,intents
0,"{'tag': 'abstraction', 'patterns': ['Explain d..."
1,"{'tag': 'error', 'patterns': ['What is a synta..."
2,"{'tag': 'documentation', 'patterns': ['Explain..."


In [11]:
import pandas as pd
import ast

df = pd.read_csv('intents.csv')

tags = []
responses = []
questions = []

for index, row in df.iterrows():
    intent_dict = ast.literal_eval(row['intents'])
    
    tags.append(intent_dict['tag'])
    
    responses.append(' | '.join(intent_dict['responses']))
    
    questions.append(' | '.join(intent_dict['patterns']))

result_df = pd.DataFrame({
    'Tags': tags,
    'Responses': responses,
    'Questions': questions
})

print(result_df.head())

result_df.to_csv('processed_intents.csv', index=False)

            Tags                                          Responses  \
0    abstraction  Data abstraction is a technique used in comput...   
1          error  A syntax error is an error in the structure of...   
2  documentation  Program documentation is written information t...   
3        testing  Software testing is the process of evaluating ...   
4  datastructure  A data structure is a way of organizing and st...   

                                           Questions  
0  Explain data abstraction. | What is data abstr...  
1  What is a syntax error | Explain syntax error ...  
2  Explain program documentation. Why is it impor...  
3                          What is software testing?  
4               How do you explain a data structure?  


In [6]:
import re
def clean_text(text):
  text = text.lower()
  text = re.sub(r'[^a-zA-Z0-9\s]','', text)
  text = re.sub(r'\s+', ' ', text)
  return text

In [7]:
df = pd.read_csv('processed_intents.csv')

In [8]:
df.head(2)

Unnamed: 0,Tags,Responses,Questions
0,abstraction,Data abstraction is a technique used in comput...,Explain data abstraction. | What is data abstr...
1,error,A syntax error is an error in the structure of...,What is a syntax error | Explain syntax error ...


In [9]:
df.isnull().sum()

Tags         0
Responses    0
Questions    0
dtype: int64

In [14]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Downloading sentence_transformers-4.1.0-py3-none-any.whl (345 kB)
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-4.1.0


In [10]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:  12%|#1        | 10.5M/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [12]:
print(df.columns)

Index(['Tags', 'Responses', 'Questions'], dtype='object')


In [13]:
embeddings = model.encode(df['Responses'].values)

In [14]:
import numpy as np
embeddings = np.array(embeddings)

In [15]:
np.save('response_embeddings.npy', embeddings)

In [16]:
embeddings = np.load('response_embeddings.npy')

In [17]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp312-cp312-win_amd64.whl.metadata (5.0 kB)
Downloading faiss_cpu-1.11.0-cp312-cp312-win_amd64.whl (15.0 MB)
   ---------------------------------------- 0.0/15.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/15.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/15.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/15.0 MB ? eta -:--:--
    --------------------------------------- 0.3/15.0 MB ? eta -:--:--
    --------------------------------------- 0.3/15.0 MB ? eta -:--:--
    --------------------------------------- 0.3/15.0 MB ? eta -:--:--
    --------------------------------------- 0.3/15.0 MB ? eta -:--:--
   - -------------------------------------- 0.5/15.0 MB 399.6 kB/s eta 0:00:37
   - -------------------------------------- 0.5/15.0 MB 399.6 kB/s eta 0:00:37
   - -------------------------------------- 0.5/15.0 MB 399.6 kB/s eta 0:00:37
   -- ------------------------------------

In [18]:
import faiss

dimentions = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimentions) 

In [19]:
faiss_index.add(embeddings)

In [20]:
faiss.write_index(faiss_index, 'faiss_index.faiss')

In [21]:
def get_similar_hadith(query, model, faiss_index, count=5):
  query_embedding = model.encode([query])
  distance, indices = faiss_index.search(query_embedding, count)

  for i in range(count):
    print(f'Response {i+1}')
    print(f"Distance: {distance[0][i]}")
    print(df['Responses'].iloc[indices[0][i]])

In [22]:
get_similar_hadith("What is a degree of relation in DBMS?", model, faiss_index, 2)

Response 1
Distance: 0.25925275683403015
The degree of a relation in a database management system (DBMS) refers to the number of attributes it has. A relation with a single attribute is called a unary relation, a relation with two attributes is called a binary relation, and a relation with three or more attributes is called a ternary or higher-order relation. The degree of a relation is an important concept in database design, as it affects the structure and organization of the data.
Response 2
Distance: 0.9372696280479431
A database management system (DBMS) is a software application that is used to create, manage, and manipulate databases. A database is a collection of data that is organized in a specific way, allowing for efficient retrieval and manipulation of the data. A DBMS provides a set of tools and interfaces that allow users to create, modify, and query the database, as well as to control access to the data and maintain the integrity and consistency of the data. DBMSs are wid