# Getting the Correct Data

In [31]:
import pandas as pd
import faiss

In [21]:
df=pd.read_excel("nepa2_inventory.xlsx")

In [2]:
df.head()

# converting into embeddings

In [1]:
import numpy as np
from sentence_transformers import SentenceTransformer

# Load model and generate embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
correct_products = df['Display_Name'].to_list()

# Check for valid strings
correct_products = [product for product in correct_products if isinstance(product, str) and product.strip()]

# Generate embeddings
correct_embeddings = model.encode(correct_products)

# Check the structure of embeddings
print(correct_embeddings[:5])  # Optional: To verify the structure

# Convert to numpy array
correct_embeddings = np.array(correct_embeddings, dtype='float32')


# Saving Vector Embeddings

In [142]:

# Open a file in write mode
with open('Inventory_Vectors.txt', 'w') as file:
    # Write the opening bracket for the outer list
    file.write('[\n')
    
    # Iterate over each inner list
    for inner_list in correct_embeddings:
        # Convert each inner list to a formatted string with brackets and newline
        formatted_line = '  [' + ', '.join(map(str, inner_list)) + '],\n'
        # Write the formatted line to the file
        file.write(formatted_line)
    
    # Write the closing bracket for the outer list
    file.write(']\n')


# putting into vector database

In [146]:
import ast  


def read_list_from_file(filename):
    with open(filename, 'r') as file:
        # Read the entire file content
        file_content = file.read()
    
 
    vector = ast.literal_eval(file_content)
    
    return vector


correct_embeddings = read_list_from_file('Inventory_Vectors.txt')


In [148]:
correct_embeddings=np.array(correct_embeddings)

In [149]:
# Step 1: Create an FAISS index
index = faiss.IndexFlatL2(correct_embeddings.shape[1])  # L2 distance (Euclidean)
index.add(correct_embeddings)  # Add vectors to the index

# Finding best Possible words

In [153]:
incorrect_product_name=str(input("Enter Incorrect Product Name"))

In [154]:

# Function to find the closest product name
def correct_product_name(incorrect_product_name):
    incorrect_embedding = model.encode([incorrect_product_name])[0].astype('float32')
    D, I = index.search(np.array([incorrect_embedding]), k=10)  # Search for the closest vector
    return I


top_10_indices=correct_product_name(incorrect_product_name)

Top_10_Matching_Product = [correct_products[i] for i in top_10_indices[0]]

In [155]:
Top_10_Matching_Product 

['FUME DISPLAY',
 'FUME 5PK SPLIFFBOYZ (Melon Kiwi)',
 'FUME 5PK SPLIFFBOYZ (Tobacco)',
 'FUME 5PK SPLIFFBOYZ (Sour Apple)',
 'FUME 5PK SPLIFFBOYZ (Grape)',
 '.FUME 12CT BLUE RAZZ ENERGY DRINK',
 'FUME 5PK LIMITED (Strawberry Diesel)',
 'FUME 5PK LIMITED (Watermelon Og)',
 'FUME 5PK SUPREME (Banana Kandy Kush)',
 'FUME 5PK LIMITED (Forbidden Fruit)']