In [2]:
!pip install faiss-cpu



In [53]:
import json
import numpy as np
import faiss
import os
from PIL import Image
import matplotlib.pyplot as plt
import concurrent.futures
import time
import re
from IPython.display import display, Image as IPImage

In [28]:
def create_faiss_index_from_json(json_file_path):
    """
    Create a FlatIP FAISS index binary file from a JSON file containing feature vectors.
    The binary file will be saved in the same directory as the JSON file with the name 'faiss_frame_embedding_cosine.bin'.

    Args:
        json_file_path (str): Path to the JSON file containing feature vectors.
    """
    # Determine the directory and output file path
    directory = os.path.dirname(json_file_path)
    bin_file_path = os.path.join(directory, 'faiss_frame_embedding_cosine.bin')

    # Initialize variables
    vectors = []
    dimension = None  # To be determined from the first vector

    # Read vectors from the JSON file line by line
    with open(json_file_path, 'r') as file:
        for line in file:
            try:
                entry = json.loads(line.strip())
                embedding = entry.get('frame_embedding')[0]

                # Determine the dimension from the first vector
                if dimension is None:
                    dimension = len(embedding)

                # Normalize the vector for cosine similarity
                embedding = np.array(embedding, dtype='float32')
                norm = np.linalg.norm(embedding)
                if norm != 0:
                    embedding /= norm

                # Append the normalized vector to the list
                vectors.append(embedding)

            except json.JSONDecodeError as e:
                print(f"Error decoding JSON line: {e}")

    # Convert list of vectors to a NumPy array
    vectors = np.array(vectors, dtype='float32')

    # Check if vectors are loaded correctly
    if vectors.size == 0:
        raise ValueError("No valid vectors found in JSON file.")

    # Create a FlatIP FAISS index using cosine similarity
    index = faiss.IndexFlatIP(dimension)

    # Add all vectors to the index at once
    index.add(vectors)

    # Save FAISS index to binary file
    faiss.write_index(index, bin_file_path)
    print(f"FAISS FlatIP index saved to {bin_file_path}")



In [None]:
create_faiss_index_from_json('/content/drive/MyDrive/Frame Embedding/frame_embeddings.json')