In [1]:
!pip install sentence-transformers

^C


In [None]:
# Import required libraries
from sentence_transformers import SentenceTransformer
import numpy as np

# Simple test to ensure everything works
print("✓ Imports successful!")

In [54]:
from sentence_transformers import SentenceTransformer

# Load the model - this may take a few seconds
model = SentenceTransformer('all-MiniLM-L6-v2')

print("Model loaded successfully!")


Model loaded successfully!


In [56]:
# Create an embedding for a simple sentence
test_sentence = "This is a test sentence to understand embeddings."
embedding = model.encode(test_sentence)

# Look at the embedding's properties
print(f"Embedding shape: {embedding.shape}")
print(f"First 5 values: {embedding[:5]}")


Embedding shape: (384,)
First 5 values: [ 0.03313288 -0.0281372   0.10922699  0.02421217  0.04646194]


In [58]:
# Create embeddings for a few test sentences
test_sentences = [
    "Python is a programming language",
    "Programming languages are used to write software",
    "Pythons are large snakes"
]

# Generate embeddings for all sentences at once
embeddings = model.encode(test_sentences)

# Output some details about the embeddings
print(f"Number of embeddings: {len(embeddings)}")
print(f"Shape of each embedding: {embeddings[0].shape}")


Number of embeddings: 3
Shape of each embedding: (384,)


In [52]:
# Initialize our collection of notes
notes = [
    "Python lists are mutable sequences used to store collections of items. They can contain mixed types and are defined using square brackets.",
    "Lists in Python can be modified after creation. Common operations include append(), extend(), and insert().",
    "Data structures are fundamental building blocks in programming. They help organize and store data efficiently.",
    "Arrays in NumPy provide efficient storage and operations for numerical data. They are widely used in scientific computing.",
    "Object-oriented programming in Python uses classes and objects. Classes define the structure and behavior of objects.",
    "The pandas library is built on top of NumPy and provides powerful data manipulation tools through DataFrames.",
    "Version control with Git helps track changes in code. Common commands include commit, push, and pull.",
    "Python functions are defined using the def keyword. They can accept parameters and return values.",
]

# Print the number of notes we have
print(f"Created {len(notes)} notes")

Created 8 notes


In [7]:
# Print each note with its length
for i, note in enumerate(notes):
    print(f"\nNote {i+1} (Length: {len(note)} characters):")
    print(f"{'='*50}")
    print(note)
    


Note 1 (Length: 138 characters):
Python lists are mutable sequences used to store collections of items. They can contain mixed types and are defined using square brackets.

Note 2 (Length: 107 characters):
Lists in Python can be modified after creation. Common operations include append(), extend(), and insert().

Note 3 (Length: 110 characters):
Data structures are fundamental building blocks in programming. They help organize and store data efficiently.

Note 4 (Length: 122 characters):
Arrays in NumPy provide efficient storage and operations for numerical data. They are widely used in scientific computing.

Note 5 (Length: 117 characters):
Object-oriented programming in Python uses classes and objects. Classes define the structure and behavior of objects.

Note 6 (Length: 109 characters):
The pandas library is built on top of NumPy and provides powerful data manipulation tools through DataFrames.

Note 7 (Length: 101 characters):
Version control with Git helps track changes in code.

In [9]:
# Quick analysis of our notes
note_lengths = [len(note.split()) for note in notes]

print(f"Average words per note: {sum(note_lengths)/len(note_lengths):.1f}")
print(f"Shortest note: {min(note_lengths)} words")
print(f"Longest note: {max(note_lengths)} words")

Average words per note: 16.8
Shortest note: 15 words
Longest note: 22 words


In [35]:
# Convert all notes to embeddings
note_embeddings = model.encode(notes)

print(f"Created embeddings for {len(notes)} notes")
print(f"Each embedding has shape: {note_embeddings[0].shape}")


Created embeddings for 8 notes
Each embedding has shape: (384,)


In [60]:
import numpy as np

# Calculate cosine similarity between two embeddings
def calculate_similarity(emb1, emb2):
    return np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))

# Get similarities between all pairs of sentences
for i in range(len(test_sentences)):
    for j in range(i + 1, len(test_sentences)):
        similarity = calculate_similarity(embeddings[i], embeddings[j])
        print(f"\nSimilarity between:\n'{test_sentences[i]}' and\n'{test_sentences[j]}': {similarity:.3f}")



Similarity between:
'Python is a programming language' and
'Programming languages are used to write software': 0.657

Similarity between:
'Python is a programming language' and
'Pythons are large snakes': 0.497

Similarity between:
'Programming languages are used to write software' and
'Pythons are large snakes': 0.119


In [37]:
# Create the search function
def search_notes(query, top_k=3):
    # Convert search query to embedding
    query_embedding = model.encode(query)
    
    # Calculate similarities with all notes
    similarities = np.dot(note_embeddings, query_embedding) / (
        np.linalg.norm(note_embeddings, axis=1) * np.linalg.norm(query_embedding)
    )
    
    # Get top k matches
    top_idx = np.argsort(similarities)[::-1][:top_k]
    
    # A list of dictionaries
    results = []
    for idx in top_idx:
        results.append({
            'note': notes[idx],
            'similarity': similarities[idx]
        })
        
    # Return matching notes with their similarity scores
    return results


In [39]:
# Function to display search results nicely
def display_results(query, results):
    print(f"\nSearch Query: '{query}'")
    print("=" * 50)
    for i, r in enumerate(results, 1):
        print(f"\n{i}. Match ({r['similarity']:.2%} similar):")
        print(r['note'])


In [43]:
# Try some example searches
queries = [
    "How do Python lists work?",
    "Tell me about data structures",
    "What is object oriented programming?"
]

for query in queries:
    results = search_notes(query)
    display_results(query, results)



Search Query: 'How do Python lists work?'

1. Match (79.09% similar):
Python lists are mutable sequences used to store collections of items. They can contain mixed types and are defined using square brackets.

2. Match (72.10% similar):
Lists in Python can be modified after creation. Common operations include append(), extend(), and insert().

3. Match (52.82% similar):
Object-oriented programming in Python uses classes and objects. Classes define the structure and behavior of objects.

Search Query: 'Tell me about data structures'

1. Match (84.95% similar):
Data structures are fundamental building blocks in programming. They help organize and store data efficiently.

2. Match (36.30% similar):
Object-oriented programming in Python uses classes and objects. Classes define the structure and behavior of objects.

3. Match (34.01% similar):
Arrays in NumPy provide efficient storage and operations for numerical data. They are widely used in scientific computing.

Search Query: 'What is o

In [47]:
my_query = "What's the difference between lists and arrays?"
results = search_notes(my_query)
display_results(my_query, results)



Search Query: 'What's the difference between lists and arrays?'

1. Match (59.03% similar):
Python lists are mutable sequences used to store collections of items. They can contain mixed types and are defined using square brackets.

2. Match (48.82% similar):
Arrays in NumPy provide efficient storage and operations for numerical data. They are widely used in scientific computing.

3. Match (47.15% similar):
Lists in Python can be modified after creation. Common operations include append(), extend(), and insert().
