**Setup and Installation**

In [None]:
!pip install -q langchain
!pip install -q langchain_community
!pip install -q langchain-openai
!pip install -q sentence-transformers
!pip install -q numpy
!pip install -q python-dotenv

**Import required libraries**

In [4]:
import os
import numpy as np
from getpass import getpass
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings

**Setting up API Keys**

In [None]:
# Securely input your OpenAI API key
openai_api_key = getpass('Enter your OpenAI API key: ')
os.environ['OPENAI_API_KEY'] = openai_api_key

**Working with OpenAI Embeddings**

In [None]:
#Let's start by initializing and using OpenAI's embedding model:

try:
    # Initialize the embedding model
    embeddings_model = OpenAIEmbeddings(
        model="text-embedding-3-large",
        openai_api_key=openai_api_key
    )
    print("OpenAI Embeddings model initialized successfully!")
except Exception as e:
    print(f"Error initializing OpenAI Embeddings: {str(e)}")

"""### 2.1 Single Query Embedding

Let's embed a single piece of text and examine its properties:
"""

try:
    # Embed a single query
    query = "What is machine learning?"
    query_embedding = embeddings_model.embed_query(query)

    # Examine the embedding
    print(f"Query text: '{query}'")
    print(f"Embedding dimension: {len(query_embedding)}")
    print(f"First 5 values: {query_embedding[:5]}")
    print(f"Vector magnitude: {np.linalg.norm(query_embedding):.4f}")
except Exception as e:
    print(f"Error generating query embedding: {str(e)}")

# Multiple Document Embeddings: Now let's embed multiple documents:

# Sample documents
documents = [
    "Machine learning is a subset of artificial intelligence.",
    "Deep learning is a type of machine learning.",
    "Neural networks are used in deep learning.",
    "Python is a popular programming language."
]

try:
    # Generate embeddings for all documents
    doc_embeddings = embeddings_model.embed_documents(documents)

    print(f"Number of documents: {len(doc_embeddings)}")
    print(f"Embedding dimension: {len(doc_embeddings[0])}")

    # Display first document and its embedding shape
    print(f"\nFirst document: '{documents[0]}'")
    print(f"Its embedding shape: {np.array(doc_embeddings[0]).shape}")
except Exception as e:
    print(f"Error generating document embeddings: {str(e)}")

# Computing Similarities:Let's implement cosine similarity to compare embeddings:

def cosine_similarity(vec1, vec2):
    """Compute cosine similarity between two vectors."""
    dot_product = np.dot(vec1, vec2)
    norm1 = np.linalg.norm(vec1)
    norm2 = np.linalg.norm(vec2)
    return dot_product / (norm1 * norm2)

# Compare similarities between documents
try:
    print("Computing similarities between documents:\n")
    for i in range(len(documents)):
        for j in range(i+1, len(documents)):
            sim = cosine_similarity(doc_embeddings[i], doc_embeddings[j])
            print(f"Similarity between:\n'{documents[i]}' and\n'{documents[j]}':\n{sim:.4f}\n")
except Exception as e:
    print(f"Error computing similarities: {str(e)}")

**Using HuggingFace Embeddings**

In [None]:
# Now let's try using a different embedding model from HuggingFace:

try:
    # Initialize HuggingFace embeddings
    hf_embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2"
    )
    print("HuggingFace Embeddings model initialized successfully!")

    # Generate embeddings using HuggingFace
    hf_query_embedding = hf_embeddings.embed_query(query)
    hf_doc_embeddings = hf_embeddings.embed_documents(documents)

    print(f"\nHuggingFace embedding dimension: {len(hf_query_embedding)}")
except Exception as e:
    print(f"Error working with HuggingFace embeddings: {str(e)}")

# Comparing Different Models: Let's compare the similarities computed by different models:

def compare_similarities(doc1, doc2, model1, model2):
    """Compare similarities computed by two different embedding models."""
    try:
        # Get embeddings from both models
        emb1_1 = model1.embed_query(doc1)
        emb1_2 = model1.embed_query(doc2)
        emb2_1 = model2.embed_query(doc1)
        emb2_2 = model2.embed_query(doc2)

        # Compute similarities
        sim1 = cosine_similarity(emb1_1, emb1_2)
        sim2 = cosine_similarity(emb2_1, emb2_2)

        return sim1, sim2
    except Exception as e:
        print(f"Error comparing similarities: {str(e)}")
        return None, None

# Compare two similar documents
doc1 = "Machine learning is fascinating."
doc2 = "I love studying artificial intelligence."

try:
    openai_sim, hf_sim = compare_similarities(doc1, doc2, embeddings_model, hf_embeddings)

    if openai_sim is not None and hf_sim is not None:
        print(f"Documents being compared:")
        print(f"Doc1: '{doc1}'")
        print(f"Doc2: '{doc2}'\n")
        print(f"OpenAI similarity: {openai_sim:.4f}")
        print(f"HuggingFace similarity: {hf_sim:.4f}")
except Exception as e:
    print(f"Error in comparison: {str(e)}")

**Best Practices Demo**

In [None]:
#Let's demonstrate some best practices for working with embeddings:

def batch_embed_documents(documents, embeddings_model, batch_size=5):
    """Embed documents in batches with error handling."""
    all_embeddings = []

    for i in range(0, len(documents), batch_size):
        batch = documents[i:i + batch_size]
        try:
            batch_embeddings = embeddings_model.embed_documents(batch)
            all_embeddings.extend(batch_embeddings)
            print(f"Processed batch {i//batch_size + 1}")
        except Exception as e:
            print(f"Error processing batch {i//batch_size + 1}: {str(e)}")
            continue

    return all_embeddings

# Generate some sample documents
sample_docs = [f"This is sample document number {i}" for i in range(12)]

try:
    # Process in batches
    print("\nProcessing documents in batches:")
    batch_embeddings = batch_embed_documents(sample_docs, embeddings_model)
    print(f"\nTotal number of embeddings generated: {len(batch_embeddings)}")
except Exception as e:
    print(f"Error in batch processing: {str(e)}")

"""## 5. Saving and Loading Embeddings

Let's demonstrate how to save and load embeddings:
"""

import pickle
import tempfile
import os

def save_embeddings(embeddings, filepath):
    """Save embeddings to a file."""
    try:
        with open(filepath, 'wb') as f:
            pickle.dump(embeddings, f)
        print(f"Embeddings saved to {filepath}")
    except Exception as e:
        print(f"Error saving embeddings: {str(e)}")

def load_embeddings(filepath):
    """Load embeddings from a file."""
    try:
        with open(filepath, 'rb') as f:
            embeddings = pickle.load(f)
        print(f"Embeddings loaded from {filepath}")
        return embeddings
    except Exception as e:
        print(f"Error loading embeddings: {str(e)}")
        return None

# Create a temporary file to save embeddings
temp_filepath = os.path.join(tempfile.gettempdir(), 'embeddings.pkl')

try:
    # Save embeddings
    save_embeddings(doc_embeddings, temp_filepath)

    # Load embeddings
    loaded_embeddings = load_embeddings(temp_filepath)

    if loaded_embeddings is not None:
        print(f"\nVerifying loaded embeddings:")
        print(f"Original embeddings shape: {np.array(doc_embeddings).shape}")
        print(f"Loaded embeddings shape: {np.array(loaded_embeddings).shape}")

        # Clean up
        os.remove(temp_filepath)
        print(f"\nTemporary file removed")
except Exception as e:
    print(f"Error in save/load demo: {str(e)}")