In [None]:
#Install only once 
!pip install pandas matplotlib seaborn



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Replace 'dataset.csv' with the actual path or name of your dataset
df = pd.read_csv('online_retail.csv')


In [None]:
df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,2011-12-09 12:50:00,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2011-12-09 12:50:00,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,2011-12-09 12:50:00,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,2011-12-09 12:50:00,4.15,12680.0,France


In [None]:
# Display the first few rows of the dataset
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [None]:
# Get summary statistics of the dataset
df.describe()

Unnamed: 0,Quantity,UnitPrice,CustomerID
count,541909.0,541909.0,406829.0
mean,9.55225,4.611114,15287.69057
std,218.081158,96.759853,1713.600303
min,-80995.0,-11062.06,12346.0
25%,1.0,1.25,13953.0
50%,3.0,2.08,15152.0
75%,10.0,4.13,16791.0
max,80995.0,38970.0,18287.0


In [None]:
# Check the column names
df.columns

Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

In [None]:
# Check the data types of the columns
df.dtypes

InvoiceNo       object
StockCode       object
Description     object
Quantity         int64
InvoiceDate     object
UnitPrice      float64
CustomerID     float64
Country         object
dtype: object

In [None]:
# Load the online retail store dataset
df = pd.read_csv('online_retail.csv')

def analyze_dataset(df):
    insights = {}

    # Calculate average quantity per invoice
    average_quantity = df.groupby('InvoiceNo')['Quantity'].mean()
    insights['average_quantity'] = average_quantity

    # Calculate monthly sales trend
    df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
    df['MonthYear'] = df['InvoiceDate'].dt.to_period('M')
    monthly_sales = df.groupby('MonthYear')['Quantity'].sum()
    insights['monthly_sales'] = monthly_sales

    # Calculate top customers by total spending
    df['TotalSpending'] = df['Quantity'] * df['UnitPrice']
    top_customers = df.groupby('CustomerID')['TotalSpending'].sum().nlargest(5)
    insights['top_customers'] = top_customers

    # Determine most frequent countries
    frequent_countries = df['Country'].value_counts().nlargest(5)
    insights['frequent_countries'] = frequent_countries

    # Calculate best-selling products
    best_selling_products = df.groupby('StockCode')['Quantity'].sum().nlargest(5)
    insights['best_selling_products'] = best_selling_products

    # Perform order frequency analysis
    df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
    df['OrderDate'] = df['InvoiceDate'].dt.date
    order_frequency = df.groupby('CustomerID')['OrderDate'].nunique()
    insights['order_frequency'] = order_frequency

    return insights

def answer_query(query, insights):
    # Process the query and generate the appropriate response
    response = ""

    if 'average_quantity' in query:
        avg_quantity = insights['average_quantity'].mean()
        response = "The average quantity per invoice is {:.2f}".format(avg_quantity)
    elif 'monthly sales' in query:
        response = "Monthly sales trend:"
        for month, sales in insights['monthly_sales'].items():
            response += "\n{}: {}".format(month, sales)
    elif 'top customers' in query:
        response = "Top customers by total spending:"
        for customer, spending in insights['top_customers'].items():
            response += "\nCustomer ID: {}, Total Spending: {:.2f}".format(customer, spending)
    elif 'frequent countries' in query:
        response = "Most frequent countries:"
        for country, count in insights['frequent_countries'].items():
            response += "\n{}: {} occurrences".format(country, count)
    elif 'best-selling products' in query:
        response = "Best-selling products:"
        for product, quantity in insights['best_selling_products'].items():
            response += "\nProduct: {}, Quantity: {}".format(product, quantity)
    elif 'order frequency' in query:
        response = "Order frequency per customer:"
        for customer, frequency in insights['order_frequency'].items():
            response += "\nCustomer ID: {}, Order Frequency: {}".format(customer, frequency)
    else:
        response = "I'm sorry, I couldn't understand your query."

    response += "\n\n"  # Add two line spaces at the end of the response

    return response



In [None]:
dataset_insights = analyze_dataset(df)

print("Welcome to the online_retail store Data Insights Chatbot!")
while True:
    user_query = input("Enter your query: ")
    response = answer_query(user_query, dataset_insights)
    print(response)


Welcome to the online_retail store Data Insights Chatbot!
Enter your query: average_quantity
The average quantity per invoice is 12.85


Enter your query: monthly sales
Monthly sales trend:
2010-12: 342228
2011-01: 308966
2011-02: 277989
2011-03: 351872
2011-04: 289098
2011-05: 380391
2011-06: 341623
2011-07: 391116
2011-08: 406199
2011-09: 549817
2011-10: 570532
2011-11: 740286
2011-12: 226333


Enter your query: top customers
Top customers by total spending:
Customer ID: 14646.0, Total Spending: 279489.02
Customer ID: 18102.0, Total Spending: 256438.49
Customer ID: 17450.0, Total Spending: 187482.17
Customer ID: 14911.0, Total Spending: 132572.62
Customer ID: 12415.0, Total Spending: 123725.45


Enter your query: frequent countries
Most frequent countries:
United Kingdom: 495478 occurrences
Germany: 9495 occurrences
France: 8557 occurrences
EIRE: 8196 occurrences
Spain: 2533 occurrences


Enter your query: best-selling products
Best-selling products:
Product: 22197, Quantity: 56450
P