In [1]:
from langchain_chroma import Chroma
from sentence_transformers import SentenceTransformer
from chromadb.utils import embedding_functions
from langchain.embeddings import HuggingFaceEmbeddings

# Create the embedding function
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Initialize Chroma with the correct embedding function
vector_store = Chroma(
    collection_name="chatbot",
    embedding_function=embedding_function,
    persist_directory="./chroma_langchain_db"
)

# Access the underlying ChromaDB client
chroma_client = vector_store._client

# List all collections
collections = chroma_client.list_collections()

# Print the names of all collections
for collection in collections:
    print(f"Collection name: {collection.name}")
    print(f"Number of documents: {collection.count()}")
    print("---")

  from tqdm.autonotebook import tqdm, trange

  embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


Collection name: chatbot
Number of documents: 36
---


In [2]:
# Delete a specific collection by name
collection_name_to_delete = "chatbot"
chroma_client.delete_collection(collection_name_to_delete)
print(f"Collection '{collection_name_to_delete}' has been deleted.")

Collection 'chatbot' has been deleted.


In [9]:
import json

with open('../data/en/tuyen_sinh_en.json','r') as file:
    data = json.load(file)
data    

[{'id': 'admissions_1',
  'text': "The International University, established in December 2003, is the only multidisciplinary public university in Vietnam that teaches and conducts research entirely in English. The university offers undergraduate and postgraduate programs, focusing on economics, management, and engineering technology. With a model that meets international standards, the university collaborates with prestigious universities from the United States, Europe, and the Asia-Pacific region. The university's goal is to become a leading research university in Vietnam and the region, providing high-quality human resources to meet the demands of integration.",
  'category': 'Admissions',
  'topic': 'University Information',
  'metadata': {'question': 'Information about the university?',
   'keywords': ['university', 'information', 'programs']}},
 {'id': 'admissions_2',
  'text': 'The International University offers 23 training programs granted by the International University itself

In [10]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="The International University offers 23 training programs granted by the International University itself, and 20 training programs in affiliation with prestigious partners.",
    metadata={"topic": "major"},
    id=1,
)

document_2 = Document(
    page_content="The International University, established in December 2003, is the only multidisciplinary public university in Vietnam that teaches and conducts research entirely in English. The university offers undergraduate and postgraduate programs, focusing on economics, management, and engineering technology. With a model that meets international standards, the university collaborates with prestigious universities from the United States, Europe, and the Asia-Pacific region. The university's goal is to become a leading research university in Vietnam and the region, providing high-quality human resources to meet the demands of integration.",
    metadata={"topic": "admission"},
    id=2,
)

document_3 = Document(
    page_content="The university offers a wide range of majors, including: Marketing, Statistics, Economics, Chemical Engineering, Chemistry (Biochemistry), Food Technology, Biotechnology, Accounting, Finance and Banking, Environmental Engineering, Aerospace Engineering, Financial Engineering and Risk Management (Applied Mathematics), Construction Management, Civil Engineering, Electronics and Telecommunications Engineering, Biomedical Engineering, Control and Automation Engineering, Industrial Systems Engineering, Logistics and Supply Chain Management, Data Science, Information Technology, Computer Science, English Language, and Business Administration.",
    metadata={"topic": "admission"},
    id=3,
)
documents = [
    document_1,
    document_2,
    document_3,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['f95d6835-e5e5-42dc-abac-6ef13e56763d',
 '9b2f85fe-89d0-4c9c-9515-effe3c4ffb6c',
 '2af02e33-ca2f-4a21-a430-6b8e36de5d42']

In [11]:
results = vector_store.similarity_search_by_vector(
    embedding=embedding_function.embed_query("How many major"), k=1
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

* The university offers a wide range of majors, including: Marketing, Statistics, Economics, Chemical Engineering, Chemistry (Biochemistry), Food Technology, Biotechnology, Accounting, Finance and Banking, Environmental Engineering, Aerospace Engineering, Financial Engineering and Risk Management (Applied Mathematics), Construction Management, Civil Engineering, Electronics and Telecommunications Engineering, Biomedical Engineering, Control and Automation Engineering, Industrial Systems Engineering, Logistics and Supply Chain Management, Data Science, Information Technology, Computer Science, English Language, and Business Administration. [{'source': 'tweet'}]
