In [1]:
# from langchain_community.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
loader = TextLoader('../speech.txt')
data = loader.load()
data

[Document(metadata={'source': '../speech.txt'}, page_content='Overview: In this project, the student model learns from multiple teacher models, each specializing in different aspects of the task. For example, one teacher could specialize in object detection, another in object classification, and a third in feature extraction.\nNovelty: The key is designing a custom loss function that weights the contributions of each teacher differently based on the task at hand. This adaptive weighting would allow the student model to benefit from diverse perspectives.\nExample: For an object detection task, you could have:\nTeacher 1 (YOLOv5): Focuses on fast bounding box detection.\nTeacher 2 (EfficientNet): Focuses on object classification accuracy.\nTeacher 3 (ResNet-50): Helps the student extract robust features from the image.\n\nAttended no do thoughts me on dissuade scarcely. Own are pretty spring suffer old denote his. By proposal speedily mr striking am. But attention sex questions applauded

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=20
)
splits = text_splitter.split_documents(data)
splits[0]

Document(metadata={'source': '../speech.txt'}, page_content='Overview: In this project, the student model learns from multiple teacher models, each specializing in different aspects of the task. For example, one teacher could specialize in object detection, another in object classification, and a third in feature extraction.\nNovelty: The key is designing a custom loss function that weights the contributions of each teacher differently based on the task at hand. This adaptive weighting would allow the student model to benefit from diverse perspectives.')

In [6]:
splits[1]

Document(metadata={'source': '../speech.txt'}, page_content='Example: For an object detection task, you could have:\nTeacher 1 (YOLOv5): Focuses on fast bounding box detection.\nTeacher 2 (EfficientNet): Focuses on object classification accuracy.\nTeacher 3 (ResNet-50): Helps the student extract robust features from the image.')

In [9]:
embeddings=OllamaEmbeddings(model='gemma:2b')
vectorDB = Chroma.from_documents(documents=splits, embedding=embeddings)
vectorDB

<langchain_chroma.vectorstores.Chroma at 0x14efe26b890>

In [10]:
query="what is teacher model and student model?"
docs = vectorDB.similarity_search(query)
docs[0].page_content

'Example: For an object detection task, you could have:\nTeacher 1 (YOLOv5): Focuses on fast bounding box detection.\nTeacher 2 (EfficientNet): Focuses on object classification accuracy.\nTeacher 3 (ResNet-50): Helps the student extract robust features from the image.'

### Save into the local

In [11]:
vectorDB=Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory='./chromaDB')

## Load form the local

In [12]:
db2 = Chroma(persist_directory='./chromaDB', embedding_function=embeddings)
db2.similarity_search(query)

[Document(metadata={'source': '../speech.txt'}, page_content='Example: For an object detection task, you could have:\nTeacher 1 (YOLOv5): Focuses on fast bounding box detection.\nTeacher 2 (EfficientNet): Focuses on object classification accuracy.\nTeacher 3 (ResNet-50): Helps the student extract robust features from the image.'),
 Document(metadata={'source': '../speech.txt'}, page_content='Overview: In this project, the student model learns from multiple teacher models, each specializing in different aspects of the task. For example, one teacher could specialize in object detection, another in object classification, and a third in feature extraction.\nNovelty: The key is designing a custom loss function that weights the contributions of each teacher differently based on the task at hand. This adaptive weighting would allow the student model to benefit from diverse perspectives.'),
 Document(metadata={'source': '../speech.txt'}, page_content='Am if number no up period regard sudden be

## Convert into the retriever

In [13]:
retriever = db2.as_retriever()

In [14]:
retriever.invoke(query)[0].page_content

'Example: For an object detection task, you could have:\nTeacher 1 (YOLOv5): Focuses on fast bounding box detection.\nTeacher 2 (EfficientNet): Focuses on object classification accuracy.\nTeacher 3 (ResNet-50): Helps the student extract robust features from the image.'