# In these approach we use the different product names in the input sentence and we get the better accuracy 


In [12]:
import csv
import nltk
from nltk.tokenize import word_tokenize
import spacy

# Download the 'punkt' resource
nltk.download('punkt')

# Download the English model for spaCy
spacy.cli.download("en_core_web_sm")

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

def get_item_names_from_csv(csv_file):
    item_names = []
    with open(csv_file, 'r') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            item_names.append(row['sku_name'])
    return item_names

def compare_with_item_names(recognized_text, item_names):
    recognized_items = []
    tokenized_text = word_tokenize(recognized_text.lower())
    for item in item_names:
        item_tokens = word_tokenize(item.lower())
        if all(token in tokenized_text for token in item_tokens):
            recognized_items.append(item)
    return recognized_items

def extract_sku_name_and_quantity(input_text):
    input_text = input_text.lower()
    tokens = word_tokenize(input_text)
    
    # Find the index of the quantity token
    quantity_index = -1
    for i, token in enumerate(tokens):
        if token.isnumeric() or token in ['pieces', 'packs', 'bags']:
            quantity_index = i
            break
    
    # Extract the SKU name and quantity
    sku_name_tokens = tokens[:quantity_index]
    
    # Remove common words like 'i', 'want', 'to', 'buy' from SKU name
    common_words = ['i', 'want', 'to', 'buy']
    sku_name_tokens = [token for token in sku_name_tokens if token not in common_words]
    
    sku_name = ' '.join(sku_name_tokens)
    quantity = ' '.join(tokens[quantity_index:])
    
    return sku_name.strip(), quantity.strip()

def print_output(item_available, recognized_items, sku_name, quantity):
    if item_available:
        print("Item is available in the store.")
    else:
        print("Item is not available in the store.")

    for recognized_item in recognized_items:
        print()
        print("Recognized Item:", recognized_item)
        sku_name = recognized_item.split(",")[0]
        quantity = recognized_item.split(",")[1]
        print("SKU Name:", sku_name.strip())
        print("Quantity:", quantity.strip())

# Provide the path to your CSV file
csv_file = 'store.csv'

# Get item names from CSV
item_names = get_item_names_from_csv(csv_file)

# Define the input sentence
input_sentence = "Omkram Deepam oil 900ml,1 bottle Dove shampoo,1 bottle Omkram Deepam oil 800ml,1 bottle Omkam Deepam oil 400ml,1 bottle Omkaram Deepam oil 200ml,1 bottle Omkaram Deepam oil 100ml,1 bottle REvive Liquid 400ml, 1 bottle EVA DEO SPRAY 40ml,1 bottle EVA DEO SPRAY 40ml, 1bottle Mortin Instant Liquid Vapouriser 45ml, 1 bottle CampuRE Camphor Bar Bathing Bar 75gm,1 bar Mangalam Bhimseni Jar 250gm,1 jar Mangalam Bhimseni Jar 50gm,1 jar Mangalam Camphor Tablet Jar 20gm, 1 jar Mangalam Camphor Tablet Pouch 100gm,1 pouch Livon Serum For Men and Women 20ml, 1 bottle Eva Perfume Talc FREsh 20gm, 1 pack Eva Perfume Talc Sweet 20gm,1 pack TTK Eva Talc Fantasy 50gm,1 bottle Eva Talc Cool Splash 100gm, 1 pack"

# Compare recognized text with item names
recognized_items = compare_with_item_names(input_sentence, item_names)

# Extract SKU name and quantity
sku_name, quantity = extract_sku_name_and_quantity(input_sentence)

# Check if recognized item is available in the store
item_available = bool(recognized_items)

# Print the results
print_output(item_available, recognized_items, sku_name, quantity)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rishi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
Item is available in the store.

Recognized Item: Omkaram Deepam oil 800ml,1 bottle
SKU Name: Omkaram Deepam oil 800ml
Quantity: 1 bottle

Recognized Item: Omkaram Deepam oil 400ml,1 bottle
SKU Name: Omkaram Deepam oil 400ml
Quantity: 1 bottle

Recognized Item: Omkaram Deepam oil 200ml,1 bottle
SKU Name: Omkaram Deepam oil 200ml
Quantity: 1 bottle

Recognized Item: Omkaram Deepam oil 100ml,1 bottle
SKU Name: Omkaram Deepam oil 100ml
Quantity: 1 bottle

Recognized Item: REvive Liquid 400ml, 1 bottle
SKU Name: REvive Liquid 400ml
Quantity: 1 bottle

Recognized Item: EVA DEO SPRAY 40ml,1 bottle
SKU Name: EVA DEO SPRAY 40ml
Quantity: 1 bottle

Recognized Item: EVA DEO SPRAY 40ml, 1bottle 
SKU Name: EVA DEO SPRAY 40ml
Quantity: 1bottle

Recognized Item: Mortin Instant Liquid Vapouriser 45ml, 1 bottle
SKU Name: Mortin Instant Liquid Vapouriser 45ml
Quantity: 1 bottle

Recognized 

# The output we will save in the json format

In [1]:
import csv
import nltk
from nltk.tokenize import word_tokenize
import spacy
import json

# Download the 'punkt' resource
nltk.download('punkt')

# Download the English model for spaCy
spacy.cli.download("en_core_web_sm")

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

def get_item_names_from_csv(csv_file):
    item_names = []
    with open(csv_file, 'r') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            item_names.append(row['sku_name'])
    return item_names

def compare_with_item_names(recognized_text, item_names):
    recognized_items = []
    tokenized_text = word_tokenize(recognized_text.lower())
    for item in item_names:
        item_tokens = word_tokenize(item.lower())
        if all(token in tokenized_text for token in item_tokens):
            recognized_items.append(item)
    return recognized_items

def extract_sku_name_and_quantity(input_text):
    input_text = input_text.lower()
    tokens = word_tokenize(input_text)
    
    # Find the index of the quantity token
    quantity_index = -1
    for i, token in enumerate(tokens):
        if token.isnumeric() or token in ['pieces', 'packs', 'bags']:
            quantity_index = i
            break
    
    # Extract the SKU name and quantity
    sku_name_tokens = tokens[:quantity_index]
    
    # Remove common words like 'i', 'want', 'to', 'buy' from SKU name
    common_words = ['i', 'want', 'to', 'buy']
    sku_name_tokens = [token for token in sku_name_tokens if token not in common_words]
    
    sku_name = ' '.join(sku_name_tokens)
    quantity = ' '.join(tokens[quantity_index:])
    
    return sku_name.strip(), quantity.strip()

import json

def save_output_to_json(output_data, json_file):
    recognized_items = output_data["recognized_items"]
    
    output_list = []

    for recognized_item in recognized_items:
        recognized_item_dict = {}
        recognized_item_dict["Recognized Item"] = recognized_item
        recognized_item_dict["SKU Name"] = recognized_item.split(",")[0].strip()
        recognized_item_dict["Quantity"] = recognized_item.split(",")[1].strip()
        output_list.append(recognized_item_dict)

    output_data["recognized_items"] = output_list

    with open(json_file, 'w') as file:
        json.dump(output_data, file, indent=4)



# Provide the path to your CSV file
csv_file = 'store.csv'

# Get item names from CSV
item_names = get_item_names_from_csv(csv_file)

# Define the input sentence
input_sentence = "Omkram Deepam oil 900ml,1 bottle Dove shampoo,1 bottle Omkram Deepam oil 800ml,1 bottle Omkam Deepam oil 400ml,1 bottle Omkaram Deepam oil 200ml,1 bottle Omkaram Deepam oil 100ml,1 bottle REvive Liquid 400ml, 1 bottle EVA DEO SPRAY 40ml,1 bottle EVA DEO SPRAY 40ml, 1bottle Mortin Instant Liquid Vapouriser 45ml, 1 bottle CampuRE Camphor Bar Bathing Bar 75gm,1 bar Mangalam Bhimseni Jar 250gm,1 jar Mangalam Bhimseni Jar 50gm,1 jar Mangalam Camphor Tablet Jar 20gm, 1 jar Mangalam Camphor Tablet Pouch 100gm,1 pouch Livon Serum For Men and Women 20ml, 1 bottle Eva Perfume Talc FREsh 20gm, 1 pack Eva Perfume Talc Sweet 20gm,1 pack TTK Eva Talc Fantasy 50gm,1 bottle Eva Talc Cool Splash 100gm, 1 pack"

# Compare recognized text with item names
recognized_items = compare_with_item_names(input_sentence, item_names)

# Extract SKU name and quantity
sku_name, quantity = extract_sku_name_and_quantity(input_sentence)

# Check if recognized item is available in the store
item_available = bool(recognized_items)

# Create the output data
output_data = {
    "item_available": item_available,
    "recognized_items": recognized_items,
    "sku_name": sku_name,
    "quantity": quantity
}

# Provide the path to the JSON file
json_file = 'output.json'

# Save the output to JSON
save_output_to_json(output_data, json_file)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rishi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


# These is the another approach while we saying the items names in the noisy environment it will get the better accurate 


In [11]:
import csv
import nltk
from nltk.tokenize import word_tokenize
import spacy
import speech_recognition as sr

# Download the 'punkt' resource
nltk.download('punkt')

# Download the English model for spaCy
spacy.cli.download("en_core_web_sm")

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

def get_item_names_from_csv(csv_file):
    item_names = []
    with open(csv_file, 'r') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            item_names.append(row['sku_name'])
    return item_names

# Rest of your code...

def compare_with_item_names(recognized_text, item_names):
    recognized_items = []
    tokenized_text = word_tokenize(recognized_text.lower())
    for item in item_names:
        item_tokens = word_tokenize(item.lower())
        if all(token in tokenized_text for token in item_tokens):
            recognized_items.append(item)
    
    return recognized_items

def extract_sku_name_and_quantity(input_text):
    input_text = input_text.lower()
    tokens = word_tokenize(input_text)
    
    # Find the index of the quantity token (e.g., '900ml')
    quantity_index = -1
    for i, token in enumerate(tokens):
        if token.isnumeric():
            quantity_index = i
            break
    
    # Extract the SKU name and quantity
    sku_name_tokens = tokens[:quantity_index]
    
    # Remove common words like 'i', 'want', 'to', 'buy' from SKU name
    common_words = ['i', 'want', 'to', 'buy']
    sku_name_tokens = [token for token in sku_name_tokens if token not in common_words]
    
    sku_name = ' '.join(sku_name_tokens)
    quantity = tokens[quantity_index]
    
    return sku_name.strip(), quantity

# ...

# Provide the path to your CSV file
csv_file = 'Item2.csv'

# Get item names from CSV
item_names = get_item_names_from_csv(csv_file)

# Rest of your code...

# Initialize the speech recognizer
recognizer = sr.Recognizer()

# Use the microphone as the audio source
with sr.Microphone() as source:
    print("Speak the item you want to buy:")
    audio = recognizer.listen(source)

# Convert speech to text
try:
    recognized_text = recognizer.recognize_google(audio)
    print("Recognized Text:", recognized_text)
except sr.UnknownValueError:
    print("Speech recognition could not understand audio")
except sr.RequestError:
    print("Could not request results from the speech recognition service")

# Compare recognized text with item names
recognized_items = compare_with_item_names(recognized_text, item_names)

# Extract SKU name and quantity
sku_name, quantity = extract_sku_name_and_quantity(recognized_text)

# Add recognized item to the list
recognized_items.append(recognized_text)

# Print the results
print("Recognized Items:", recognized_items)
print("SKU Name:", sku_name)
print("Quantity:", quantity)

# ...


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rishi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
Speak the item you want to buy:
Recognized Text: omkaram Deepam oil 900 ml
Recognized Items: ['omkaram Deepam oil 900 ml']
SKU Name: omkaram deepam oil
Quantity: 900


# We will use the different Recognition API's like Google Speech Recognition (recognize_google) and CMU Sphinx (recognize_sphinx)

In [17]:
import csv
import nltk
from nltk.tokenize import word_tokenize
import spacy
import speech_recognition as sr

# Download the 'punkt' resource
nltk.download('punkt')

# Download the English model for spaCy
spacy.cli.download("en_core_web_sm")

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

def get_item_names_from_csv(csv_file):
    item_names = []
    with open(csv_file, 'r') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            item_names.append(row['sku_name'])
    return item_names

# Rest of your code...

def compare_with_item_names(recognized_text, item_names):
    recognized_items = []
    tokenized_text = word_tokenize(recognized_text.lower())
    for item in item_names:
        item_tokens = word_tokenize(item.lower())
        if all(token in tokenized_text for token in item_tokens):
            recognized_items.append(item)
    
    return recognized_items

def extract_sku_name_and_quantity(input_text):
    input_text = input_text.lower()
    tokens = word_tokenize(input_text)
    
    # Find the index of the quantity token (e.g., '900ml')
    quantity_index = -1
    for i, token in enumerate(tokens):
        if token.isnumeric():
            quantity_index = i
            break
    
    # Extract the SKU name and quantity
    sku_name_tokens = tokens[:quantity_index]
    
    # Remove common words like 'i', 'want', 'to', 'buy' from SKU name
    common_words = ['i', 'want', 'to', 'buy']
    sku_name_tokens = [token for token in sku_name_tokens if token not in common_words]
    
    sku_name = ' '.join(sku_name_tokens)
    quantity = tokens[quantity_index]
    
    return sku_name.strip(), quantity

# ...

# Provide the path to your CSV file
csv_file = 'Item2.csv'

# Get item names from CSV
item_names = get_item_names_from_csv(csv_file)

# Rest of your code...

# Initialize the speech recognizer
recognizer = sr.Recognizer()

# Use the microphone as the audio source
with sr.Microphone() as source:
    print("Speak the item you want to buy:")
    audio = recognizer.listen(source)

# Convert speech to text
try:
    recognized_text = recognizer.recognize_google(audio)
    print("Recognized Text:", recognized_text)
except sr.UnknownValueError:
    print("Speech recognition could not understand audio")
except sr.RequestError:
    print("Could not request results from the speech recognition service")

# Compare recognized text with item names
recognized_items = compare_with_item_names(recognized_text, item_names)

# Extract SKU name and quantity
sku_name, quantity = extract_sku_name_and_quantity(recognized_text)

# Add recognized item to the list
recognized_items.append(recognized_text)

# Print the results
print("Recognized Items:", recognized_items)
print("SKU Name:", sku_name)
print("Quantity:", quantity)

# ...
import speech_recognition as sr

def recognize_speech(audio, engine):
    recognizer = sr.Recognizer()
    if engine == "google":
        try:
            recognized_text = recognizer.recognize_google(audio)
            print("Recognized Text (Google):", recognized_text)
        except sr.UnknownValueError:
            print("Speech recognition could not understand audio")
        except sr.RequestError:
            print("Could not request results from the Google speech recognition service")
    elif engine == "sphinx":
        try:
            recognized_text = recognizer.recognize_sphinx(audio)
            print("Recognized Text (Sphinx):", recognized_text)
        except sr.UnknownValueError:
            print("Speech recognition could not understand audio")
        except sr.RequestError:
            print("Sphinx recognizer error")
    else:
        print("Invalid speech recognition engine")

# Initialize the speech recognizer
recognizer = sr.Recognizer()

# Use the microphone as the audio source
with sr.Microphone() as source:
    print("Speak the item you want to buy:")
    audio = recognizer.listen(source)

# Recognize speech using different engines
recognize_speech(audio, "google")
recognize_speech(audio, "sphinx")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rishi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
Speak the item you want to buy:
Recognized Text: omkaram Deepam oil 900 ml
Recognized Items: ['omkaram Deepam oil 900 ml']
SKU Name: omkaram deepam oil
Quantity: 900
Speak the item you want to buy:
Recognized Text (Google): omkaram Deepam oil 900 ml
Sphinx recognizer error
