In [None]:
import pandas as pd
import numpy as np
import traceback
import logging
import re
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from transformers import pipeline

# Configure logging
logging.basicConfig(filename='chatbot.log', level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Load a pre-trained language model for code and query understanding
nlp_model = pipeline("text2text-generation", model="google/flan-t5-base")

# Dataset placeholder
df = None  # To be loaded by the user
column_mapping = {}  # Column mappings provided by the user

# Optimized predefined query handler
def predefined_query_handler(query):
    query = query.lower()
    operations = {
        "average": "df['{col}'].mean()",
        "total": "df['{col}'].sum()",
        "highest": "df['{col}'].max()"
    }
    for col in df.columns:
        for op in operations:
            if re.search(fr"{op} {col.lower()}", query):
                return operations[op].format(col=col)
    return None

def parse_query(query, dataset_context):
    """
    Parse the query to identify intents and tasks.
    Use an advanced NLU model for context-based understanding.
    """
    input_text = f"Dataset columns: {dataset_context}\nQuery: {query}\nGenerate the required Python code:"
    response = nlp_model(input_text, max_length=256, num_return_sequences=1)
    return response[0]["generated_text"]

def execute_generated_code(code):
    """
    Execute dynamically generated Python code in a safe environment.
    """
    global df
    local_vars = {"df": df, "np": np, "pd": pd, "plt": plt, "LinearRegression": LinearRegression}
    try:
        exec(code, globals(), local_vars)
        result = local_vars.get("result", "Operation completed successfully.")
        logging.info(f"Executed code: {code}")
        return result, None
    except Exception as e:
        logging.error(f"Execution error: {traceback.format_exc()}")
        return None, f"Execution Error:\n{traceback.format_exc()}"

def process_query(query):
    """
    Process the query, generate code, and execute it.
    """
    global df, column_mapping

    if df is None:
        return "No dataset uploaded. Please upload your dataset first."

    # Check predefined queries first
    predefined_code = predefined_query_handler(query)
    if predefined_code:
        logging.info(f"Predefined query matched: {query}")
        return execute_generated_code(predefined_code)[0]

    # Prepare the dataset context
    dataset_context = ", ".join([f"{key} -> {value}" for key, value in column_mapping.items()])
    generated_code = parse_query(query, dataset_context)

    print(f"\nGenerated Code:\n{generated_code}\n")  # Debugging/verification purposes
    logging.info(f"Generated code for query '{query}': {generated_code}")

    # Execute the generated code
    result, error = execute_generated_code(generated_code)
    if error:
        return error
    return result

def chatbot():
    """
    Run the intelligent chatbot interface.
    """
    global df, column_mapping

    df = pd.DataFrame({
        "age": [25, 30, 45, 50],
        "sales": [200, 300, 400, 500],
        "profit": [20, 30, 40, 50]
    })
    column_mapping = {"Age": "age", "Sales": "sales", "Profit": "profit"}
    print("Sample dataset loaded. Column mappings preloaded.")

    print("\nChatbot is ready. Type your queries!")
    while True:
        user_input = input("\nYou: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Chatbot: Goodbye!")
            break
        response = process_query(user_input)
        print(f"Chatbot: {response}")

if _name_ == "_main_":
    chatbot()

In [1]:
import pandas as pd
import numpy as np
import traceback
import logging
import re
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from transformers import pipeline

In [3]:
# Configure logging
logging.basicConfig(filename='chatbot.log', level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Load a pre-trained language model for code and query understanding
nlp_model = pipeline("text2text-generation", model="google/flan-t5-base")

# Dataset placeholder
df = pd.read_excel(r"E:\BE Project\Backend\project_root\data\Online Retail.xlsx")  # To be loaded by the user

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [7]:
column_mapping = {"InvoiceNo": "invoice number", "StockCode": "product code", "Description": "description", "Quantity": "quantity", "InvoiceDate":"invoice date", "UnitPrice":"price", "CustomerID":"customer id", "Country":"country"}

In [None]:
def predefined_query_handler(query):
    query = query.lower()
    operations = {
        "average": "df['{col}'].mean()",
        "total": "df['{col}'].sum()",
        "highest": "df['{col}'].max()"
    }
    for col in df.columns:
        for op in operations:
            if re.search(fr"{op} {col.lower()}", query):
                return operations[op].format(col=col)
    return None

In [None]:
def parse_query(query, dataset_context):
    input_text = f"Dataset columns: {dataset_context}\nQuery: {query}\nGenerate the required Python code:"
    response = nlp_model(input_text, max_length=256, num_return_sequences=1)
    return response[0]["generated_text"]

In [None]:
def execute_generated_code(code):
    global df
    local_vars = {"df": df, "np": np, "pd": pd, "plt": plt, "LinearRegression": LinearRegression}
    try:
        exec(code, globals(), local_vars)
        result = local_vars.get("result", "Operation completed successfully.")
        logging.info(f"Executed code: {code}")
        return result, None
    except Exception as e:
        logging.error(f"Execution error: {traceback.format_exc()}")
        return None, f"Execution Error:\n{traceback.format_exc()}"

In [None]:
def process_query(query):
    global df, column_mapping

    if df is None:
        return "No dataset uploaded. Please upload your dataset first."

    # Check predefined queries first
    predefined_code = predefined_query_handler(query)
    if predefined_code:
        logging.info(f"Predefined query matched: {query}")
        return execute_generated_code(predefined_code)[0]

    # Prepare the dataset context
    dataset_context = ", ".join([f"{key} -> {value}" for key, value in column_mapping.items()])
    generated_code = parse_query(query, dataset_context)

    print(f"\nGenerated Code:\n{generated_code}\n")  
    logging.info(f"Generated code for query '{query}': {generated_code}")

    # Execute the generated code
    result, error = execute_generated_code(generated_code)
    if error:
        return error
    return result

In [None]:
def chatbot():
    global df, column_mapping
    
    print("Sample dataset loaded. Column mappings preloaded.")

    print("\nChatbot is ready. Type your queries!")
    while True:
        user_input = input("\nYou: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Chatbot: Goodbye!")
            break
        response = process_query(user_input)
        print(f"Chatbot: {response}")