### **1. Install Required Libraries:**

In [8]:
pip install chromadb sentence-transformers --quiet

### **2. Implement the TextEmbedder Class**

In [6]:
import csv
import json
import uuid
from sentence_transformers import SentenceTransformer
import chromadb

class TextEmbedder:
    def __init__(self, model_name='all-MiniLM-L6-v2', collection_name='marketing_scraped_data'):
        # Initialize the embedding model
        self.embedding_model = SentenceTransformer(model_name)
        # Initialize ChromaDB client and create a collection
        self.client = chromadb.Client()
        self.collection = self.client.create_collection(name=collection_name)

    def chunk_text_with_overlap(self, text, chunk_size=500, overlap=50):
        """
        Splits text into overlapping chunks of a specified maximum length and overlap.

        Args:
            text (str): The input text to be chunked.
            chunk_size (int): The desired size of each chunk.
            overlap (int): The number of words to overlap between consecutive chunks.

        Yields:
            str: The next chunk of text.
        """
        words = text.split()
        start = 0
        while start < len(words):
            end = start + chunk_size
            chunk = ' '.join(words[start:end])
            yield chunk
            start += chunk_size - overlap

    def generate_embedding(self, text):
        """
        Generates an embedding for the given text.

        Args:
            text (str): The input text to be embedded.

        Returns:
            list: The embedding vector as a list of floats.
        """
        return self.embedding_model.encode(text).tolist()

    def ingest_data(self, csv_file_path):
        """
        Reads data from a CSV file, processes it, and stores embeddings in ChromaDB.

        Args:
            csv_file_path (str): Path to the input CSV file containing the text data.
        """
        with open(csv_file_path, mode="r", encoding="utf-8") as file:
            reader = csv.DictReader(file)
            for row in reader:
                scrapped_text = row.get("Scrapped Text", "")
                if scrapped_text:
                    for chunk in self.chunk_text_with_overlap(scrapped_text):
                        embedding = self.generate_embedding(chunk)
                        metadata = {key: row[key] for key in row if key != "Scrapped Text"}
                        unique_id = str(uuid.uuid4())  # Generate a unique ID
                        self.collection.add(
                            ids=[unique_id],
                            documents=[chunk],
                            embeddings=[embedding],
                            metadatas=[metadata]
                        )

    def vector_search(self, query_text, n_results=3):
        """
        Performs a vector search to find documents similar to the query text.

        Args:
            query_text (str): The query text to search for similar documents.
            n_results (int): The number of top similar documents to retrieve.

        Returns:
            dict: A dictionary containing the search results.
        """
        query_embedding = self.generate_embedding(query_text)
        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=n_results
        )
        return results


### **3. call the TextEmbedder Class**

In [7]:
# Initialize the TextEmbedder
embedder = TextEmbedder()

# Ingest data from the CSV file
csv_file_path = "/content/scrapping_for_manual_searched_resources.csv"
embedder.ingest_data(csv_file_path)

# Perform a vector search
query_text = "Most used marketing strategies"
search_results = embedder.vector_search(query_text)

# Display the results
for document, metadata in zip(search_results['documents'][0], search_results['metadatas'][0]):
    print(f"Document: {document}")
    print(f"Metadata: {metadata}\n")


Document: marketing tactics in your budget and forecast? * Commercial Awareness How do you foster innovation and creativity in your marketing channel and distribution design and execution? Show more Show less Others also viewed * Marketing Mix: Understanding the 4 Ps of Marketing Get Ahead by LinkedIn News 2y * Marketing Strategy Evolution: Past, Present, and Future Hussein Hesham 1y * What Is the Purpose of Marketing? Kevin Rausch 5mo * The 4 Ps of Marketing: What They Are and How to Use Them Jacob Li 1y * The Best Marketing Strategy Frameworks Chris Talintyre 5mo * Marketing Tips – Marketing Mix 7P or 4P Elements Paul Humphreys 4mo * From Purpose to Performance: Decoding the Ten Ps for a Resilient Marketing Strategy Kiran S. 1y * Marketing Strategy: A Beginner's Guide Jason W. 5mo * ELEMENTS OF A MARKETING STRATEGY: COMPETITOR ANALYSIS Nicholas Curle 1y * Simplifying Marketing Success: Five Moves to Amplify Your Strategy!! Jitender Singh Dahiya 8mo Show more Show less Explore topics 