In [4]:
!pip install pandas openpyxl




In [5]:
from google.colab import files
uploaded = files.upload()


Saving Copy of HSN_SAC.xlsx to Copy of HSN_SAC.xlsx


In [9]:
import pandas as pd
from google.colab import files

# Assume the file is uploaded using files.upload() prior to calling this function
# uploaded = files.upload() # This line should be run in a separate cell before this one

def load_hsn_data(file_path):
    df = pd.read_excel(file_path, dtype=str)
    # Strip whitespace from column names
    df.columns = df.columns.str.strip()
    # Print the actual column names to help debug
    print("Columns in the DataFrame:", df.columns)
    # Check if 'HSNCode' is in the columns before attempting to drop NA
    if "HSNCode" in df.columns:
        df.dropna(subset=["HSNCode"], inplace=True)
        df["HSNCode"] = df["HSNCode"].astype(str).str.strip()
    else:
        # Raise a more informative error or handle the missing column
        raise KeyError("Column 'HSNCode' not found in the Excel file. Available columns are: " + ", ".join(df.columns))
    return df

# Make sure the file "Copy of HSN_SAC.xlsx" is uploaded and in the correct location
hsn_df = load_hsn_data("Copy of HSN_SAC.xlsx")
hsn_df.head()

Columns in the DataFrame: Index(['HSNCode', 'Description'], dtype='object')


Unnamed: 0,HSNCode,Description
0,1,LIVE ANIMALS
1,101,"LIVE HORSES, ASSES, MULES AND HINNIES."
2,1011010,"LIVE HORSES, ASSES, MULES AND HINNIES PURE-BRE..."
3,1011020,"LIVE HORSES, ASSES, MULESANDHINNIES PURE-BRED ..."
4,1011090,"LIVE HORSES, ASSES, MULES AND HINNIES PURE-BRE..."


In [10]:
def is_valid_format(hsn_code):
    return hsn_code.isdigit() and len(hsn_code) in [2, 4, 6, 8]

def validate_hsn_code(hsn_code, df):
    if not is_valid_format(hsn_code):
        return {"code": hsn_code, "status": "Invalid Format", "description": None}

    match = df[df["HSNCode"] == hsn_code]
    if not match.empty:
        return {"code": hsn_code, "status": "Valid", "description": match.iloc[0]["Description"]}
    else:
        return {"code": hsn_code, "status": "Not Found", "description": None}


In [11]:
def get_parent_codes(hsn_code):
    return [hsn_code[:i] for i in [2, 4, 6] if len(hsn_code) > i]

def check_hierarchy(hsn_code, df):
    parents = get_parent_codes(hsn_code)
    missing = [code for code in parents if code not in df["HSNCode"].values]
    return missing if missing else "All parent codes found"


In [15]:
def run_agent():
    codes = input("Enter HSN codes (comma-separated): ").split(",")
    codes = [code.strip() for code in codes]

    for code in codes:
        result = validate_hsn_code(code, hsn_df)
        print("--------------------------------------------------")
        if result["status"] == "Valid":
            print(f"✅ HSN Code: {code}")
            print(f"Description: {result['description']}")

            hierarchy_check = check_hierarchy(code, hsn_df)
            if isinstance(hierarchy_check, list):
                print(f"⚠️ Missing Parent Codes: {', '.join(hierarchy_check)}")
            else:
                print("✔️ All Parent Codes Present")
        elif result["status"] == "Not Found":
            print(f"❌ HSN Code: {code} not found in dataset.")
        else:
            print(f"⚠️ HSN Code: {code} has invalid format. Must be numeric with 2, 4, 6, or 8 digits.")
        print("--------------------------------------------------\n")

run_agent()


Enter HSN codes (comma-separated): 0101
--------------------------------------------------
✅ HSN Code: 0101
Description: LIVE HORSES, ASSES, MULES AND HINNIES.
✔️ All Parent Codes Present
--------------------------------------------------

