In [1]:
import nltk
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab')

# 1. Initialize the SBERT model to generate embeddings
# model_path = "D:\\mr_document\\all_models\\all-MiniLM-L6-v2\\"
model = SentenceTransformer('all-MiniLM-L6-v2')  # A lightweight SBERT model

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Scientist\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Scientist\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


True

In [18]:
# Example paragraph
paragraph = """
Machine learning has transformed the way businesses operate and make decisions. By analyzing vast amounts of data, these models uncover hidden patterns and trends that would otherwise be difficult to identify. For instance, e-commerce companies use machine learning to analyze customer behavior, personalize shopping experiences, and optimize supply chain management. In marketing, AI-powered tools help businesses target the right audience with tailored ads, improving conversion rates. Additionally, machine learning is being used to predict market trends, enabling companies to make more informed strategic decisions. This technological shift is driving a new era of business intelligence and efficiency, making organizations more competitive and agile.
In the field of autonomous vehicles, machine learning plays a crucial role in enabling self-driving cars to navigate and make real-time decisions. These vehicles rely on an array of sensors, such as cameras, lidar, and radar, to perceive their environment. Machine learning models process this data to recognize objects, predict their movement, and make decisions to avoid collisions. By continuously learning from vast amounts of driving data, autonomous vehicles can improve their performance over time, making them safer and more reliable. As the technology advances, autonomous vehicles have the potential to reduce traffic accidents, improve traffic flow, and create new transportation solutions.
In the entertainment industry, AI-driven tools are being used to create personalized content recommendations for users. Streaming services like Netflix, Spotify, and YouTube rely on machine learning algorithms to analyze a user’s viewing or listening history and suggest content that aligns with their preferences. These algorithms consider a wide range of factors, including genre, actors, and even the time of day, to refine their recommendations. AI is also used in video production, where it can automate tasks such as video editing and special effects generation. With AI, creators can produce content more efficiently and deliver experiences tailored to individual tastes, revolutionizing how media is consumed.
AI in healthcare has led to advancements in diagnostics, treatment planning, and drug discovery. Machine learning models are now capable of analyzing medical images, such as X-rays and MRIs, to detect diseases like cancer, pneumonia, and heart conditions with greater precision than human doctors. In drug discovery, AI models help researchers identify potential compounds that could lead to new treatments, speeding up the process of bringing new drugs to market. AI is also used to personalize treatment plans, analyzing a patient's medical history and genetic data to recommend the most effective treatments. These breakthroughs are improving patient outcomes and transforming healthcare delivery.
"""

# Step 3: Tokenize the paragraph into sentences
sentences = nltk.sent_tokenize(paragraph)

# Step 4: Encode the sentences into embeddings
sentence_embeddings = model.encode(sentences)

# Convert to numpy array for FAISS
sentence_embeddings = np.array(sentence_embeddings).astype('float32')

# Create the FAISS index (using L2 distance)
dimension = sentence_embeddings.shape[1]  # Dimension of the sentence embeddings
index = faiss.IndexFlatL2(dimension)

# Add sentence embeddings to the FAISS index
index.add(sentence_embeddings)

def retrieve_similar_sentences(query, k=1):
    # Encode the query sentence
    query_embedding = model.encode([query])
    query_embedding = np.array(query_embedding).astype('float32')
    
    # Perform the retrieval using FAISS
    _, indices = index.search(query_embedding, k)
    
    # Retrieve the top k similar sentences
    similar_sentences = [sentences[i] for i in indices[0]]
    return similar_sentences


In [None]:
query = "What is self-driving?"
retrieved_sentences = retrieve_similar_sentences(query)

# Output the retrieved sentences
print("Query:", query)
print("\nRetrieved Sentences:")
for i, sentence in enumerate(retrieved_sentences):
    print(f"{i+1}. {sentence}")


Query: What is self-driving?

Retrieved Sentences:
1. In the field of autonomous vehicles, machine learning plays a crucial role in enabling self-driving cars to navigate and make real-time decisions.
