In [None]:
# 📁 Directory Structure Assumed:
# HTSAgent/
# ├── main.py
# ├── config.py
# ├── modules/
# │   ├── pdf_loader.py
# │   ├── rag_agent.py
# │   ├── tariff_calculator.py
# ├── agents/
# │   └── agent.py
# ├── data/
# │   ├── general_notes.pdf
# │   └── section_i.csv
# └── output/
#     └── landed_cost_duties.csv

# ============================ config.py ============================
DEFAULT_GENERAL_NOTES_PDF_PATH = "data/general_notes.pdf"
DEFAULT_SECTION_I_CSV_PATH = "data/section_i.csv"
VECTOR_DB_PATH = "data/vector_db"

# ============================ pdf_loader.py ============================
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def extract_text(pdf_path):
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return splitter.split_documents(docs)

# ============================ rag_agent.py ============================
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import OpenAI
from config import VECTOR_DB_PATH

import os

embeddings = HuggingFaceEmbeddings()

def build_qa_bot(chunks):
    if not os.path.exists(VECTOR_DB_PATH):
        vectorstore = FAISS.from_documents(chunks, embeddings)
        vectorstore.save_local(VECTOR_DB_PATH)
    else:
        vectorstore = FAISS.load_local(VECTOR_DB_PATH, embeddings)

    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=OpenAI(temperature=0),
        retriever=vectorstore.as_retriever()
    )
    return qa_chain

# ============================ tariff_calculator.py ============================
import pandas as pd
import re

def parse_duty_advanced(duty_str, cif_value, unit_weight=None, quantity=None):
    if pd.isna(duty_str) or duty_str.strip() == "":
        return 0.0
    duty_str = duty_str.strip().lower()
    if "free" in duty_str:
        return 0.0

    match = re.search(r"([\d.]+)\s*%", duty_str)
    if match:
        return float(match.group(1)) / 100

    match = re.search(r"([\d.]+)\s*\¢/kg", duty_str)
    if match and unit_weight is not None:
        cents_per_kg = float(match.group(1))
        return (cents_per_kg * unit_weight) / (100 * cif_value)

    match = re.search(r"\$([\d.]+)/unit", duty_str)
    if match and quantity is not None:
        dollars_per_unit = float(match.group(1))
        return (dollars_per_unit * quantity) / cif_value

    return 0.0

def calculate_duty(hts_csv_path, product_cost, freight, insurance, unit_weight, quantity):
    df = pd.read_csv(hts_csv_path)
    cif_value = product_cost + freight + insurance

    df["CIF Value"] = cif_value
    df["Product Cost"] = product_cost
    df["Freight"] = freight
    df["Insurance"] = insurance

    for col in ["General Rate of Duty", "Special Rate of Duty", "Column 2 Rate of Duty"]:
        parsed_col = f"{col} Parsed (%)"
        amount_col = f"{col} Duty Amount"
        df[parsed_col] = df[col].apply(lambda x: parse_duty_advanced(x, cif_value, unit_weight, quantity))
        df[amount_col] = df[parsed_col] * cif_value

    df_filtered = df[
        (df["General Rate of Duty Duty Amount"] > 0) |
        (df["Special Rate of Duty Duty Amount"] > 0) |
        (df["Column 2 Rate of Duty Duty Amount"] > 0)
    ]

    output_path = "output/landed_cost_duties.csv"
    df_filtered.to_csv(output_path, index=False)
    return output_path

# ============================ agent.py ============================
def get_agent():
    from langchain.agents import Tool, initialize_agent
    from langchain.llms import OpenAI

    tools = []  # Add Tool()s as needed (e.g. RAG, Calculator)
    return initialize_agent(
        tools=tools,
        llm=OpenAI(temperature=0),
        agent="zero-shot-react-description",
        verbose=True
    )

# ============================ main.py ============================
from modules.pdf_loader import extract_text
from modules.rag_agent import build_qa_bot
from modules.tariff_calculator import calculate_duty
from config import DEFAULT_GENERAL_NOTES_PDF_PATH, DEFAULT_SECTION_I_CSV_PATH

print("📦 Loading TariffBot...")

pdf_path = input(f"Enter General Notes PDF path [default: {DEFAULT_GENERAL_NOTES_PDF_PATH}]: ") or DEFAULT_GENERAL_NOTES_PDF_PATH
csv_path = input(f"Enter Section I CSV path [default: {DEFAULT_SECTION_I_CSV_PATH}]: ") or DEFAULT_SECTION_I_CSV_PATH

print("📄 Extracting and chunking General Notes...")
chunks = extract_text(pdf_path)
qa_chain = build_qa_bot(chunks)

print("🔢 Running Tariff Duty Calculator...")
cost = float(input("Enter product cost (FOB): "))
freight = float(input("Enter freight cost: "))
insurance = float(input("Enter insurance cost: "))
weight = float(input("Enter total weight (kg): "))
qty = int(input("Enter total quantity (units): "))

result_path = calculate_duty(csv_path, cost, freight, insurance, weight, qty)
print(f"✅ Duty calculations saved to: {result_path}")

print("🧠 TariffBot is ready!")
