# 📓 Sales Data Analyzer

In [3]:
import math
from statistics import stdev
from traceback import format_exc

import numpy as np

In [4]:
# sales_data[year,month,product]
years = np.array([2022, 2023, 2024])
months = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
products = np.array(['mobile', 'laptop', 'tablet', 'accessory'])
# Create reproducible random data
np.random.seed(0)
sales_data = np.random.randint(100, 1000, (3, 12, 4))

In [5]:
def get_monthly_sale(year=2024, month=1):
    year = year - 2022
    month = month - 1
    monthly_sale = sales_data[year, month, :]
    print(f"month = {months[month]} , year = {years[year]}")

    print(f"{products[0]} = {monthly_sale[0]}")
    print(f"{products[1]} = {monthly_sale[1]}")
    print(f"{products[2]} = {monthly_sale[2]}")
    print(f"{products[3]} = {monthly_sale[3]}")

In [6]:
def get_yearly_product_sale(year=2024, product=0):
    year = year - 2022
    yearly_product_sale = np.sum(sales_data[year, :, product])
    # merge this 2 print statements into one
    print(f"year = {years[year]} , product = {products[product]} , yearly sale = {yearly_product_sale}")

In [7]:
def get_yearly_total_sale(year=2024):
    year = year - 2022
    yearly_total_sale = np.min(sales_data[year, :, :])
    print(yearly_total_sale)


get_yearly_total_sale()

123


### make questions according to sales data and cover all numpy concepts
    - What is the total sales for each product across all years?
    - What is the average sales for each product across all years?
    - Which product had the highest sales in 2023?
    - Which year had the highest total sales?
    - Which product had the lowest sales in 2021?
    - Which month had the highest sales for laptops in 2024?
    - Which product showed the most consistent sales across all years?
    - What is the percentage increase in sales for each product from 2022 to 2024?
    - What is the correlation between sales of different products across all years?
    - How many months did each product exceed a sales threshold of 500 units in 2023?
    - What is the moving average of sales for each product over the 3 years?
    - Which product had the highest sales growth rate from 2022 to 2024?
    - What is the standard deviation of sales for each product across all years?
    - Which month had the lowest total sales across all products in 2022?
    - What is the median sales for each product across all years?
    - Which product had the highest sales in Q1 of 2023?
    - Which year had the lowest total sales?
    - What is the range of sales for each product across all years?
    - Which product had the highest sales in December across all years?
    - What is the skewness of sales distribution for each product across all years?

In [8]:
# What is the total sales for each product across all years?
sales_data_2d = sales_data.reshape(-1, 4)
total_sale_each_product = np.sum(sales_data_2d[:, :], axis=0)
print(
    f"mobile={total_sale_each_product[0]},laptop={total_sale_each_product[1]},tablet={total_sale_each_product[2]},accessory={total_sale_each_product[3]}")

mobile=20334,laptop=24088,tablet=18519,accessory=18497


In [9]:
# What is the average sales for each product across all years?
total_sale_avg_each_product = [math.floor(i * 100) / 100 for i in np.mean(sales_data_2d[:, :], axis=0)]
print(
    f"mobile={total_sale_avg_each_product[0]},laptop={total_sale_avg_each_product[1]},tablet={total_sale_avg_each_product[2]},accessory={total_sale_avg_each_product[3]}")

mobile=564.83,laptop=669.11,tablet=514.41,accessory=513.8


In [10]:
# Which product had the highest sales in 2023?
highest_sales_2023_index = np.argmax(np.sum(sales_data[1, :, :], axis=0))
print(f"product={products[highest_sales_2023_index]}, sales={np.max(np.sum(sales_data[1, :, :], axis=0))}")

product=laptop, sales=7463


In [11]:
# Which year had the highest total sales?
highest_total_sales_year = np.argmax(np.sum(sales_data[:, :, :], axis=(0, 1)))
print(f"year={years[highest_total_sales_year]}, sales={np.max(np.sum(sales_data[:, :, :], axis=(0, 1)))}")

year=2023, sales=24088


In [12]:
# Which product had the lowest sales in 2022?
lowest_sales_2022 = np.argmin(np.sum(sales_data[0, :, :], axis=0))
print(f"product={products[lowest_sales_2022]}, sales={np.min(np.sum(sales_data[0, :, :], axis=0))}")

product=accessory, sales=5960


In [13]:
# Which month had the highest sales for laptops in 2024?
highest_sales_laptops_2024 = np.argmax(sales_data[2, :, 1])
print(f"month={months[highest_sales_laptops_2024]}, sales={np.max(sales_data[2, :, 1])}")

month=6, sales=981


In [14]:
# Which product showed the most consistent sales across all years?
most_consistent_sales = np.argmin(np.std(sales_data[:, :, :], axis=(0, 1)))
print(f"product={products[most_consistent_sales]}")

product=mobile


In [44]:
# What is the percentage increase in sales for each product from 2022 to 2024?
for i in range(4):
    total_2022 = np.sum(sales_data[0, :, i])
    total_2024 = np.sum(sales_data[2, :, i])
    diff = total_2024 - total_2022
    pr = (diff / total_2022) * 100
    print(f"product = {products[i]}, percentage increase = {pr:.2f}%")
    i+=1

product = mobile, percentage increase = -31.32%
product = laptop, percentage increase = -1.04%
product = tablet, percentage increase = -14.18%
product = accessory, percentage increase = 18.37%


In [52]:
# What is the correlation between sales of different products across all years?
for i in range(4):
    for j in range(i+1,4):
        year_i=sales_data[:,:,i].flatten()
        year_j=sales_data[:,:,j].flatten()
        correlation_product=np.corrcoef(year_i,year_j)
        print(f"product1 = {products[i]} , product2 = {products[j]} , correlation = {correlation_product[0,1]:.2f}")

product1 = mobile , product2 = laptop , correlation = -0.20
product1 = mobile , product2 = tablet , correlation = 0.23
product1 = mobile , product2 = accessory , correlation = -0.25
product1 = laptop , product2 = tablet , correlation = -0.09
product1 = laptop , product2 = accessory , correlation = -0.05
product1 = tablet , product2 = accessory , correlation = -0.20


In [17]:
# How many months did each product exceed a sales threshold of 500 units in 2023?
count = 0
for i in range(4):
    for sale in np.nditer(sales_data[1, :, i]):
        if sale > 500:
            count += 1

    print(f"product={products[i]}, count={count}")
    count = 0

product=mobile, count=6
product=laptop, count=8
product=tablet, count=6
product=accessory, count=5


In [72]:
# What is the moving average of sales for each product over the 3 years?
for i in range(4):
    moving_avg = np.convolve(sales_data[:, :, i].flatten(), np.ones(3) / 3, mode='valid')
    moving_avg = [math.floor(j * 100) / 100 for j in moving_avg]
    print(f"product={products[i]}, moving_avg={moving_avg}")

product=mobile, moving_avg=[1828.0, 1948.0, 1713.0, 2190.0, 1986.0, 2231.0, 2500.0, 2348.0, 2258.0, 1926.0, 2117.0, 1606.0, 1642.0, 1805.0, 2111.0, 1987.0, 1627.0, 1398.0, 1182.0, 831.0, 1075.0, 1280.0, 1320.0, 898.0, 1293.0, 1467.0, 1772.0, 1309.0, 1820.0, 1926.0, 2182.0, 1389.0, 933.0, 1015.0]
product=laptop, moving_avg=[2345.0, 2385.0, 2018.0, 1846.0, 2096.0, 1772.0, 1930.0, 1931.0, 2656.0, 2235.0, 1929.0, 1283.0, 1877.0, 1461.0, 1798.0, 1304.0, 1719.0, 1288.0, 1679.0, 2032.0, 2608.0, 2603.0, 2575.0, 2602.0, 2201.0, 1914.0, 1349.0, 1857.0, 2137.0, 2410.0, 1899.0, 1737.0, 1943.0, 2312.0]
product=tablet, moving_avg=[1913.0, 1354.0, 961.0, 771.0, 1378.0, 1841.0, 2601.0, 2023.0, 1905.0, 1323.0, 1767.0, 1783.0, 1701.0, 1286.0, 1367.0, 1801.0, 2561.0, 2096.0, 1909.0, 1419.0, 1212.0, 829.0, 611.0, 754.0, 1560.0, 1549.0, 1519.0, 1356.0, 1574.0, 1578.0, 1443.0, 1186.0, 1719.0, 1312.0]
product=accessory, moving_avg=[1605.0, 1885.0, 2231.0, 1651.0, 1716.0, 1126.0, 1383.0, 1023.0, 1055.0, 1321.

In [69]:
# Which product had the highest sales growth rate from 2022 to 2024?
for i in range(4):
    growth_rate = (np.sum(sales_data[2, :, i]) - np.sum(sales_data[0, :, i])) / np.sum(sales_data[0, :, i]) * 100
    print(f"product={products[i]}, growth_rate={growth_rate:.2f}%")

product=mobile, growth_rate=-31.32%
product=laptop, growth_rate=-1.04%
product=tablet, growth_rate=-14.18%
product=accessory, growth_rate=18.37%


In [20]:
# What is the standard deviation of sales for each product across all years?
stddev_each_product = [math.floor(i * 100) / 100 for i in np.std(sales_data[:, :, :], axis=(0, 1))]
print(
    f"mobile={stddev_each_product[0]},laptop={stddev_each_product[1]},tablet={stddev_each_product[2]},accessory={stddev_each_product[3]}")

mobile=248.11,laptop=251.87,tablet=268.92,accessory=251.54


In [21]:
# Which month had the lowest total sales across all products in 2022?
lowest_total_sale_2022 = np.argmin(np.sum(sales_data[1, :, :], axis=1))
print(f"month={months[lowest_total_sale_2022]}, sales={np.min(np.sum(sales_data[1, :, :], axis=1))}")

month=4, sales=1511


In [58]:
# What is the median sales for each product across all years?
median_sales=np.median(sales_data[:,:,:],axis=(0,1))
print(f"mobile={median_sales[0]},laptop={median_sales[1]},tablet={median_sales[2]},accessory={median_sales[3]}")

mobile=572.5,laptop=690.0,tablet=450.5,accessory=483.0


In [65]:
# Which product had the highest sales in Q1 of 2023?
highest_sales_Q1_2023=(np.sum(sales_data[1,:3,:],axis=0))
print(f"product={products[np.argmax(highest_sales_Q1_2023)]}, sales={np.max(highest_sales_Q1_2023)}")

product=laptop, sales=1877


In [24]:
# Which year had the lowest total sales?
lowest_total_sales_year = np.argmin(np.sum(sales_data[:, :, :], axis=(1, 2)))
print(f"year={years[lowest_total_sales_year]}, sales={np.min(np.sum(sales_data[:, :, :], axis=(1, 2)))}")

year=2023, sales=25276


In [25]:
# What is the range of sales for each product across all years?
for i in range(4):
    print(f"product={products[i]}, range={np.max(sales_data[:, :, i]) - np.min(sales_data[:, :, i])}")

product=mobile, range=836
product=laptop, range=839
product=tablet, range=841
product=accessory, range=842


In [26]:
# Which product had the highest sales in December across all years?
highest_sales_december = np.argmax(np.sum(sales_data[:, 11, :], axis=0))
print(f"product={products[highest_sales_december]}, sales={np.max(np.sum(sales_data[:, 11, :], axis=0))}")

product=laptop, sales=2101


In [68]:
# What is the skewness of sales distribution for each product across all years?
for i in range(4):
    flattened_data = sales_data[:, :, i].flatten()
    mean = np.mean(flattened_data)
    median = np.median(flattened_data)
    std_dev = np.std(flattened_data)
    print(f"product={products[i]}, mean={mean:.2f}, median={median:.2f}, std_dev={std_dev:.2f}", end=", ")
    if mean>median:
        print("positively skewed")
    elif mean<median:
        print("negatively skewed")
    else:
        print("no skewness")

product=mobile, mean=564.83, median=572.50, std_dev=248.11, negatively skewed
product=laptop, mean=669.11, median=690.00, std_dev=251.88, negatively skewed
product=tablet, mean=514.42, median=450.50, std_dev=268.92, positively skewed
product=accessory, mean=513.81, median=483.00, std_dev=251.54, positively skewed
