<a href="https://colab.research.google.com/github/Sidhtang/india-index-/blob/main/hs_code_prediction_using_bmodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

def create_hs_code_dataset(num_samples=100000):
    # Simulating a dataset with product descriptions and HS codes
    products = [
        "Cotton T-shirt", "Leather shoes", "Smartphone", "Laptop computer",
        "Wooden chair", "Stainless steel watch", "Electric toothbrush",
        "Plastic water bottle", "Glass vase", "Aluminum cookware set"
    ]

    hs_codes = [
        "6109.10", "6403.99", "8517.12", "8471.30",
        "9401.61", "9102.11", "8509.80",
        "3923.30", "7013.99", "7615.10"
    ]

    # Generate random combinations of products and features
    data = []
    for _ in range(num_samples):
        idx = np.random.randint(0, len(products))
        product = products[idx]
        hs_code = hs_codes[idx]

        # Add some variation to the product descriptions
        variants = ["New", "Used", "Refurbished", "Vintage", "Custom"]
        colors = ["Red", "Blue", "Green", "Yellow", "Black", "White"]
        materials = ["Organic", "Synthetic", "Recycled", "Premium"]

        description = f"{np.random.choice(variants)} {np.random.choice(colors)} {np.random.choice(materials)} {product}"

        data.append({
            "description": description,
            "hs_code": hs_code
        })

    df = pd.DataFrame(data)

    # Split the dataset
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

    return train_df, test_df

# Create the dataset
train_df, test_df = create_hs_code_dataset()

# Save the datasets
train_df.to_csv("hs_code_train.csv", index=False)
test_df.to_csv("hs_code_test.csv", index=False)

print(f"Training set shape: {train_df.shape}")
print(f"Test set shape: {test_df.shape}")
print("\nSample data:")
print(train_df.head())

Training set shape: (80000, 2)
Test set shape: (20000, 2)

Sample data:
                                    description  hs_code
75220              Used Red Recycled Smartphone  8517.12
48955    Used Black Premium Electric toothbrush  8509.80
44966  Refurbished Black Premium Cotton T-shirt  6109.10
13568       Used Black Synthetic Cotton T-shirt  6109.10
92727     Vintage Green Synthetic Leather shoes  6403.99


In [18]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Assuming you've saved your trained model and tokenizer
# If not, you'll need to train the model first using the code you provided

def load_trained_model(model_path):
    return load_model(model_path)

def predict_hs_code(text, model, tokenizer, max_len=100):
    # Tokenize the input text
    sequence = tokenizer.texts_to_sequences([text])

    # Pad the sequence
    padded_sequence = pad_sequences(sequence, maxlen=max_len)

    # Make prediction
    prediction = model.predict(padded_sequence)

    # Get the predicted class (HS code)
    predicted_class = np.argmax(prediction, axis=1)[0]

    return predicted_class

# Load the trained model
model_path = '/content/models/lstm_model.h5'  # Replace with your model's path
lstm_model = load_trained_model(model_path)

# Load or create the tokenizer (you need to save and load the tokenizer separately)
# For this example, let's assume we're creating a new tokenizer and fitting it on some data
# In practice, you should save and load the tokenizer used during training
train_df = pd.read_csv("hs_code_train.csv")  # Load your training data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_df['description'])

# Load the label encoder or create a mapping of HS codes
hs_codes = train_df['hs_code'].unique()

# Function to get the actual HS code from the predicted class
def get_hs_code(predicted_class):
    return hs_codes[predicted_class]

# Example usage
while True:
    text = input("Enter a product description (or 'quit' to exit): ")
    if text.lower() == 'quit':
        break

    predicted_class = predict_hs_code(text, lstm_model, tokenizer)
    predicted_hs_code = get_hs_code(predicted_class)

    print(f"Predicted HS Code: {predicted_hs_code}")

print("Thank you for using the HS Code Predictor!")



Enter a product description (or 'quit' to exit): black leather jacket
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350ms/step
Predicted HS Code: 6109.1
Enter a product description (or 'quit' to exit): Used Blue Recycled Wooden chair
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Predicted HS Code: 7615.1
Enter a product description (or 'quit' to exit): quit
Thank you for using the HS Code Predictor!
