# Create FAISS Index with Gemini Embeddings
This notebook demonstrates how to generate document embeddings using the Google Gemini model and build a FAISS index for semantic search.

In [4]:
%pip install -qU langchain-mistralai

Note: you may need to restart the kernel to use updated packages.


In [1]:
# Import Required Libraries
import numpy as np
import pandas as pd
import faiss
import os
from dotenv import load_dotenv
from langchain_mistralai import MistralAIEmbeddings

In [2]:
# Load Environment Variables and API Keys
load_dotenv()
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
if not MISTRAL_API_KEY:
    raise ValueError("MISTRAL_API_KEY not found in environment. Please set it in your .env file.")

In [4]:
# Load and Preprocess Data
df = pd.read_excel("data/Sample data.xlsx")
df["text"] = df.apply(lambda row: f"{row['Brand']} brand ran {row['Category']} campaign using {row['Tactic']} in {row['Timeperiod']}. Spend: {row['$ Spend (MM)']}, Contribution: {row['$ Contribution']}, ROI: {row['ROI']}, Incremental ROI: {row['iROI']}", axis=1)

# Validate data preprocessing
if df.empty:
    raise ValueError("Data source is empty or invalid. Please check the file.")

document_texts = list(df["text"])

In [5]:
embed_model = MistralAIEmbeddings(
    model="mistral-embed"
)
try:
    raw_doc_embeddings = embed_model.embed_documents(document_texts)
except Exception as e:
    raise RuntimeError(f"Error generating embeddings: {e}")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# Normalize Embeddings
from numpy.linalg import norm
try:
    doc_embeddings = [np.array(e) / norm(e) for e in raw_doc_embeddings]
    doc_embeddings_np = np.stack(doc_embeddings)
except Exception as e:
    raise RuntimeError(f"Error normalizing embeddings: {e}")

In [7]:
# Create and Populate FAISS Index
actual_dim = doc_embeddings_np.shape[1]
print(f"doc_embeddings_np shape: {doc_embeddings_np.shape}, embedding dimension: {actual_dim}")
if doc_embeddings_np.dtype != np.float32:
    doc_embeddings_np = doc_embeddings_np.astype(np.float32)
try:
    index = faiss.IndexFlatL2(actual_dim)
    index.add(doc_embeddings_np)
    print(f"FAISS index created with {index.ntotal} vectors of dimension {actual_dim}.")
except Exception as e:
    raise RuntimeError(f"Error creating FAISS index: {e}")

doc_embeddings_np shape: (124, 1024), embedding dimension: 1024
FAISS index created with 124 vectors of dimension 1024.


In [8]:
# Save and Load FAISS Index
try:
    faiss.write_index(index, "faiss_mmm_index.index")
    print("FAISS index saved to faiss_mmm_index.index.")

    # To load the index later:
    loaded_index = faiss.read_index("faiss_mmm_index.index")
    print(f"Loaded FAISS index with {loaded_index.ntotal} vectors.")
except Exception as e:
    raise RuntimeError(f"Error saving or loading FAISS index: {e}")

FAISS index saved to faiss_mmm_index.index.
Loaded FAISS index with 124 vectors.
