In [1]:
!pip install fuzzywuzzy

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [None]:
import json
import pandas as pd
import spacy
from spacy.training import Example
from tabulate import tabulate
from fuzzywuzzy import process  # Import fuzzy matching

# Load the product prices from CSV dataset
product_prices = pd.read_csv("/content/clue_shoes_wc-product-export-8-4-2024-1712571967159.csv")

# Load the training data from JSON file
with open('/content/invoice_5(final).json', 'r') as f:
    data = json.load(f)

# Initialize spaCy NLP model
nlp = spacy.blank("en")

# Add named entity labels to the pipeline
ner = nlp.add_pipe("ner")

# Define labels
ner.add_label("PERSON")
ner.add_label("PRODUCT")
ner.add_label("QUANTITY")

# Convert data to spaCy format
examples = []
for item in data:
    text = item['text']
    entities = [(ent['start'], ent['end'], ent['label']) for ent in item['entities']]
    examples.append(Example.from_dict(nlp.make_doc(text), {"entities": entities}))

# Disable other pipeline components during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
with nlp.disable_pipes(*other_pipes):
    # Training loop
    optimizer = nlp.begin_training()
    for itn in range(20):  # Increased number of iterations
        for example in examples:
            nlp.update([example], drop=0.2, losses={})  # Adjusted dropout rate

# Function to calculate total cost for each product
def calculate_total_cost(products, quantities):
    total_cost = 0
    for product, quantity in zip(products, quantities):
        price = product_prices.loc[product_prices['Name'] == product, 'Regular price'].values[0]
        total_cost += price * int(quantity)
    return total_cost


# Function to generate invoice
def generate_invoice(text):
    doc = nlp(text)
    customer = ""
    products = []
    quantities = []
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            customer = ent.text
        elif ent.label_ == "PRODUCT":
            # Check if the product name is a single word
            if len(ent.text.split()) == 1:
                # Exact matching for single-word products
                matched_product = ent.text
            else:
                # Fuzzy matching for multi-word products
                matched_product = process.extractOne(ent.text, product_prices['Name'])[0]
            products.append(matched_product)
        elif ent.label_ == "QUANTITY":
            quantities.append(ent.text)
    total_cost = calculate_total_cost(products, quantities)
    invoice = {"customer": customer, "products": products, "quantities": quantities, "total_cost": total_cost}
    return invoice

def format_invoice(invoice):
    table_data = []
    total_cost = 0
    customer_name = invoice.get('customer', '')
    formatted_invoice = f"Invoice\n\nCustomer: {customer_name}\n\n"
    for i, (product, quantity) in enumerate(zip(invoice['products'], invoice['quantities']), start=1):
        price_per_unit = product_prices.loc[product_prices['Name'] == product, 'Regular price'].values[0]
        subtotal = int(quantity) * price_per_unit
        total_cost += subtotal
        table_data.append([i, product, quantity, f"Rs. {price_per_unit}", f"Rs. {subtotal}"])

    table_headers = ["Number", "Product Name", "Quantity", "Per Unit Price", "Subtotal"]
    table_data.append(["", "", "", "", ""])  # Add an empty row
    table_data.append(["", "", "", "", f"Total Cost for Each Product: Rs. {total_cost}"])  # Add total cost row

    formatted_invoice += tabulate(table_data, headers=table_headers, tablefmt="grid")
    return formatted_invoice



In [None]:
# Save the trained model
nlp.to_disk("invoice_ner_model_2")

In [None]:
 # Load the trained model
nlp = spacy.load("invoice_ner_model_2")

In [None]:
import shutil

# Zip the model directory
shutil.make_archive("invoice_ner_model_2", "zip", "invoice_ner_model_2")

'/content/invoice_ner_model_2.zip'