# Practice with NumPy

In [71]:
import math
import numpy as np

In [51]:
# sales data set for practise
# id,2021,2022,2023,2024
data = np.array([
    [1, 100, 150, 200, 250],
    [2, 200, 250, 300, 350],
    [3, 30, 380, 410, 450],
    [4, 1400, 450, 500, 550],
    [5, 1000, 550, 600, 650],
])

    # Questions
    - What is the total sales for each year?

In [52]:
total_sales_year = np.sum(data[:, 1:], axis=0)
print("total sales for each year = \n", total_sales_year)

total sales for each year = 
 [2730 1780 2010 2250]


    - What is the average sales for each year?

In [53]:
avg_sales_year = np.mean(data[:, 1:], axis=0)
print("avg sales for each year = \n", avg_sales_year)

avg sales for each year = 
 [546. 356. 402. 450.]


    - What is the total sales for each product across all years?


In [54]:
total_product_sales = np.sum(data[:, 1:], axis=1)
print("total product sales = \n", total_product_sales)

total product sales = 
 [ 700 1100 1270 2900 2800]


    - What is the average sales for each product across all years?

In [55]:
avg_product_sales = np.mean(data[:, 1:], axis=1)
print("avg product sales = \n", avg_product_sales)

avg product sales = 
 [175.  275.  317.5 725.  700. ]


    - Which product had the highest sales in 2023?

In [56]:
max_sale_2023 = np.max(data[:, 3])
print("max sale in 2023 = ", max_sale_2023)

# find id
'''
# np.argmax :
    🔹 Definition
        - np.argmax() returns the index of the maximum value in a NumPy array.
        - "arg" = argument (index), "max" = maximum → index of maximum.

🔹 Syntax
    numpy.argmax(a, axis=None)
    - a → input array
    - axis → dimension along which to find the max index
    - None (default) → array is flattened
    - axis=0 → column-wise max index
    - axis=1 → row-wise max index

🔹 Example
    import numpy as np

    arr = np.array([10, 20, 5, 40, 25])
    print(np.argmax(arr))       # 3
    print(arr[np.argmax(arr)])  # 40

    Output: 3 → because max value 40 is at index 3.

🔹 2D Example
    arr = np.array([[5, 8, 12],
                    [20, 1, 7]])

    print(np.argmax(arr))          # 3 (flattened index)
    print(np.argmax(arr, axis=0))  # [1 0 0] → column-wise
    print(np.argmax(arr, axis=1))  # [2 0]   → row-wise
'''
product_id_2023 = data[np.argmax(data[:, 3]), 0]
print("product id with highest sales in 2023 = ", product_id_2023)

max sale in 2023 =  600
product id with highest sales in 2023 =  5


    - Which year had the highest total sales?

In [57]:
max_total_sales = np.max(np.sum(data[:, 1:], axis=0))
print("max total sale = ", max_total_sales)

max_total_sales_index = np.argmax(np.sum(data[:, 1:], axis=0)) + 1
print("max total sale index= ", max_total_sales_index)

max total sale =  2730
max total sale index=  1


    - Which product had the lowest sales in 2021?

In [59]:
min_sale_2021 = np.min(data[:, 1], axis=0)
print("min sale in 2021 = ", min_sale_2021)

min_sale_product_index_2021 = np.argmin(data[:, 1], axis=0)
print("min sale product index in 2021 = ", min_sale_product_index_2021)

min sale in 2021 =  30
min sale product index in 2021 =  2


    np.std() uses question for this dataset
    - What is the standard deviation of sales for each year?

In [80]:
'''
📌 Standard Deviation (std) — Easy Words
    (1) Meaning
        - Standard deviation tells us how far data points are spread from the mean, in the same units as the data.
        - It is simply the square root of variance.

    (2) Key Ideas
        - If std is big → data values are very spread out.
        - If std is small → data values are close to the mean.
        - Std is always positive.

    (3) Units
        - Same as the data (if data is in cm, std is also in cm).
        - That’s why std is easier to understand than variance.

    (4) Why use it?
        - Easy to interpret in real life (sales, marks, height, etc.).
        - Tells us how much values typically deviate from the average.
        - Used in business, data science, quality control, finance, etc.

    (5) Relation with Variance
        - Variance = squared spread
        - Std = √Variance (normal spread)

✨ Example (Sales = [100, 200, 300])
    - Mean = 200
    - Std = 81.65

- Meaning → Sales usually vary ±81.65 around 200
          → So most sales are between 118 and 282.
'''
std = np.std(data[:, 1:], axis=0)
truncate_value = [math.floor(s * 100) / 100 for s in std]
print("standard deviation = ", truncate_value)
print("average sales = ", avg_sales_year)

standard deviation =  [551.41, 141.92, 141.47, 141.42]
average sales =  [546. 356. 402. 450.]


    np.var() uses question for this dataset
    - What is the variance of sales for each year?

In [81]:
'''
📘 Variance (var) — Easy Notes
    (1) What it means?
        - Variance tells us how far the numbers are spread out from the average.
        - It uses squared differences from the mean.

    (2) Important Points
        - Variance is always positive (≥0).
        - If variance is big → data values are very spread out.
        - If variance is small → data values are close to each other.
        - Unit is in square form (e.g., sales²).

    (3) Why we need it?
        - To avoid + and – values canceling out.
        - To show outliers (extreme values) clearly.
        - It is the base for:
            - Standard Deviation (just √variance)
            - Correlation, ANOVA, ML error calculations.

    (4) Problem
        - Not easy to explain to non-technical people because of squared units.

👉 In short:
    - Variance = Mathematical spread (squared)
    - Standard Deviation = Human-friendly spread (same units as data)
'''
var=np.var(data[:,1:],axis=0)
print("Variance of sale ",var)

Variance of sale  [304064.  20144.  20016.  20000.]


    np.cumsum() uses question for this dataset
    - What is the cumulative sales for each product across the years?


In [82]:
'''
📗 Cumulative Sum (cumsum) — Simple Explanation
    (1) Meaning
        - Cumulative sum is the running total of a sequence of numbers.
        - Each value in the cumulative sum is the sum of all previous values plus the current one.

    (2) Example
        - For sales data [100, 200, 300]:
            - Cumulative sum = [100, 300, 600]
            - Explanation:
                - First value: 100
                - Second value: 100 + 200 = 300
                - Third value: 100 + 200 + 300 = 600

    (3) Why use it?
        - To see how totals grow over time.
        - Useful in finance, sales tracking, and data analysis.

    (4) In NumPy
        - Use np.cumsum() to calculate cumulative sums easily.
'''
cumulative_sales = np.cumsum(data[:, 1:], axis=1)
print("cumulative sales for each product across the years = \n", cumulative_sales)

cumulative sales for each product across the years = 
 [[ 100  250  450  700]
 [ 200  450  750 1100]
 [  30  410  820 1270]
 [1400 1850 2350 2900]
 [1000 1550 2150 2800]]
