In [None]:
!pip install transformers accelerate


In [None]:
from transformers import pipeline
import pandas as pd

# Load the CSV with ERP Software Sales
df = pd.read_csv("/content/drive/MyDrive/erp_sales.csv")

# Load an advanced LLM model
llm = pipeline("text2text-generation", model="google/flan-t5-large")

# Welcome message
print("🎉 Welcome! I'm 'DataBuddy' 🤖, your friendly assistant for ERP software sales insights!")
user_name = input("👋 What's your name? ")

# Personalized greeting message for the user
print(f"Hello, {user_name}! 👋 I'm here to help you analyze your ERP software sales data.")
print("\n💡 Tip: To get the best results, try phrasing your questions clearly and using keywords related to the data (e.g., 'most sold software', 'total sales in 2024', 'sales by region').")
print("Here are some examples of how to ask questions:")
print("- What is the most sold software?")
print("- What were the total sales?")
print("- Show monthly sales")
print("- Sales by license type.")
print("- Compare sales in North America and Europe.")
print("- Which software generated the highest revenue?")
print("- Average quantity sold per software")
print("- Sales figures for sales rep")
print("- Sales between 2024-01-01 and 2024-01-31.")
print("- What was the top selling software on 2024-03-15?")

def ask_question(dataframe):
    prompt = input("💬 Ask me a question about the ERP software sales: ")
    action = llm(f"Given the ERP software sales data: {dataframe.head()}. The possible operations are: 'most sold', 'total sales', 'monthly sales', 'sales by license type', 'sales by region', 'sales by customer type', 'software with the highest revenue', 'average sales per software', 'sales by sales rep', 'sales for a specific period', 'most requested software on a specific date'. Please identify the most likely operation based on the user's question: {prompt}.", max_length=50)[0]["generated_text"].strip()

    if "most sold" in action.lower():
        result = dataframe.groupby("software")["quantity"].sum().idxmax()
        print(f"📦 The most sold software is: {result}")
    elif "total sales" in action.lower():
        total_sales = (dataframe["quantity"] * dataframe["price"]).sum()
        print(f"💰 Total ERP software sales: €{total_sales:.2f}")
    elif "monthly sales" in action.lower():
        dataframe["month"] = pd.to_datetime(dataframe["date"]).dt.month
        monthly_sales = dataframe.groupby("month")["quantity"].sum()
        print(f"📅 Monthly sales:\n{monthly_sales}")
    elif "sales by license type" in action.lower():
        sales_by_license = dataframe.groupby("license")["quantity"].sum()
        print(f"🔑 Sales by license type:\n{sales_by_license}")
    elif "sales by region" in action.lower():
        sales_by_region = dataframe.groupby("region")["quantity"].sum()
        print(f"🌍 Sales by region:\n{sales_by_region}")
    elif "sales by customer type" in action.lower():
        sales_by_customer = dataframe.groupby("customer_type")["quantity"].sum()
        print(f"👥 Sales by customer type:\n{sales_by_customer}")
    elif "software with the highest revenue" in action.lower():
        dataframe["revenue"] = dataframe["quantity"] * dataframe["price"]
        highest_revenue_software = dataframe.groupby("software")["revenue"].sum().idxmax()
        print(f"💸 The software with the highest revenue is: {highest_revenue_software}")
    elif "average sales per software" in action.lower():
        avg_sales_per_software = dataframe.groupby("software")["quantity"].mean()
        print(f"📊 Average sales per software:\n{avg_sales_per_software}")
    elif "sales by sales rep" in action.lower():
        sales_by_rep = dataframe.groupby("sales_rep")["quantity"].sum()
        print(f"👨‍💻 Sales by sales representative:\n{sales_by_rep}")
    elif "sales for a specific period" in action.lower():
        start_date = input("📅 Enter the start date (YYYY-MM-DD): ")
        end_date = input("📅 Enter the end date (YYYY-MM-DD): ")
        dataframe["date"] = pd.to_datetime(dataframe["date"])
        sales_period = dataframe[(dataframe["date"] >= start_date) & (dataframe["date"] <= end_date)].groupby("software")["quantity"].sum()
        print(f"🗓 Sales in the specified period:\n{sales_period}")
    elif "most requested software on a specific date" in action.lower():
        specific_date = input("📅 Enter the date (YYYY-MM-DD): ")
        dataframe["date"] = pd.to_datetime(dataframe["date"])
        most_requested_on_date = dataframe[dataframe["date"] == specific_date].groupby("software")["quantity"].sum().idxmax()
        print(f"📆 The most requested software on {specific_date} is: {most_requested_on_date}")
    else:
        print("🤖 Hmm... I didn't quite understand the operation you're asking for. Could you please rephrase?")

# Main interaction loop
while True:
    print("\n✨ What would you like to do?")
    print("1. Ask a question about the sales data")
    print("2. Exit")

    choice = input("👉 Enter your choice (1 or 2): ")

    if choice == "1":
        ask_question(df.copy())
    elif choice == "2":
        print("\n👋 Goodbye! It was nice helping you.")
        break
    else:
        print("❌ Invalid choice. Please enter 1 or 2.")
