In [12]:
import pandas as pd
import matplotlib.pyplot as plt

In [13]:
df = pd.read_csv('data.csv', encoding = 'ISO-8859-1')
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [14]:
def plot_top_products(dataframe):
    # Group the data by product and sum the quantity
    products = dataframe.groupby('Description').sum(numeric_only=False)['Quantity']
    # Sort the products by quantity
    products = products.sort_values(ascending=False)
    # Plot the top 10 products
    products[:10].plot(kind='bar')
    plt.xlabel('Product')
    plt.ylabel('Quantity Sold')
    plt.title('Top 10 Selling Products')
    plt.show()

In [15]:
def plot_top_returned_products(dataframe):
    # Only get rows where quantity is less than zero
    dataframe = dataframe[dataframe['Quantity'] < 0]
    # Group by product and sum the quantity
    products = dataframe.groupby('Description').sum(numeric_only=False)['Quantity']
    # Sort products by quantity, but since it's negative we sort in ascending order
    products = products.sort_values(ascending=True)
    # Plot the top 10 returned products
    ax = products[:10].plot(kind='bar')
    ax.invert_yaxis()
    plt.xlabel('Quantity Returned')
    plt.ylabel('Product')
    plt.title('Top 10 Returned Products')
    plt.show()

In [16]:
def plot_top_customers(dataframe):
    # Group the data by customer and sum the Quantity
    customers = dataframe.groupby('CustomerID').sum(numeric_only=False)['Quantity']
    # Sort the customers by quantity
    customers = customers.sort_values(ascending=False)
    # Plot the top 10 customers
    customers.head(10).plot(kind='bar')
    plt.xlabel('Customer ID')
    plt.ylabel('Quantity Purchased')
    plt.title('Top 10 Customers by Quantity Purchased')
    plt.show()

In [17]:
def plot_top_returned_customers(dataframe):
    # Group by customer and sum the Quantity
    products = dataframe.groupby('CustomerID').sum(numeric_only=False)['Quantity']
    # Sort customer by quantity, but since it's negative we sort in ascending order
    products = products.sort_values(ascending=True)
    # Plot the top 10 returned customers
    ax = products[:10].plot(kind='bar')
    ax.invert_yaxis()
    plt.xlabel('Quantity Returned')
    plt.ylabel('Customers')
    plt.title('Top 10 Returned Customers')
    plt.show()

In [18]:
def plot_top_products_by_country(dataframe):
    # Only get rows where quantity is greater than zero (to ignore returns)
    dataframe = dataframe[dataframe['Quantity'] > 0]
    countries = dataframe['Country'].unique()
    for country in countries:
        # Filter data to include only rows for current country
        df_country = dataframe[dataframe['Country'] == country]
        # Group by product and sum the quantity
        products = df_country.groupby('Description').sum(numeric_only=False)['Quantity']
        # Sort products by quantity
        products = products.sort_values(ascending=False)
        # Plot the top 10 products
        products[:10].plot(kind='bar')
        plt.xlabel('Product')
        plt.ylabel('Quantity Sold')
        plt.title('Top 10 Sold Products in {}'.format(country))
        plt.show()