In [None]:
# 2.1 What is NumPy & Why It Is Required
# NumPy (Numerical Python) is a high-performance numerical computing library in Python designed for:

# Fast mathematical operations

# Large data handling

# Scientific/analytics computations

# Instead of Python lists (slow, memory-heavy), NumPy provides:

# ndarray → a fast, fixed-type, multidimensional array

# vectorized operations → fast calculations without loops

# Broadcasting → automatic expansion of arrays for operations

# Integration with Pandas, Matplotlib, Scikit-learn

# Why NumPy Is Required in Data Analytics

# Works efficiently with large datasets

# Performs sum, mean, correlation in milliseconds

# Used in ML algorithms, image processing, simulations

# Provides foundation for Pandas and many analytics frameworks


# Why NumPy is Essential for Data Analytics

# Works 50–100x faster than Python lists

# Efficient vectorized operations (no loops)

# Supports linear algebra, statistics, random functions

# Used in Data Science, AI, ML, Image Processing, Finance, IoT

# Difference: Python List vs NumPy Array


# | Python List             | NumPy Array           |
# | ----------------------- | --------------------- |
# | Stores mixed data types | Stores single dtype   |
# | Slow for computation    | Fast (C-optimized)    |
# | No broadcasting         | Supports broadcasting |
# | No vector math          | Vectorized operations |


In [None]:
# Problem: Calculate the time taken to square 1 million numbers.
# Concept: NumPy is much faster.

In [23]:
import numpy as np
import time

# Python list
lst = list(range(1_000_000))
start = time.time()
lst_sq = [x*x for x in lst]
print("List time:", time.time() - start)

# NumPy array
arr = np.arange(1_000_000)
start = time.time()
arr_sq = arr ** 2
print("NumPy time:", time.time() - start)

List time: 0.041045427322387695
NumPy time: 0.0024809837341308594


In [1]:
import numpy as np
import time

list_data = list(range(1_000_000))
numpy_data = np.arange(1_000_000)

start = time.time()
sum(list_data)
print("List Time:", time.time() - start)

start = time.time()
np.sum(numpy_data)
print("NumPy Time:", time.time() - start)

List Time: 0.003551959991455078
NumPy Time: 0.0006670951843261719


In [None]:
#Using array()
import numpy as np
a = np.array([10, 20, 30])

In [None]:
#arange()
np.arange(1, 11, 2)  # 1 to 10 step 2

In [None]:
#linspace()
np.linspace(0, 1, 5)  # 5 numbers between 0 & 1B

In [None]:
#zeros(), ones(), eye()
np.zeros((2,3))
np.ones((3,3))
np.eye(4)    # Identity matrix

In [None]:
#Random Generation
np.random.rand(2,3)        # uniform (0-1)
np.random.randint(10, 50)  # random int
np.random.normal(50, 10, 5)  # mean=50, std=10

In [None]:
#Generate random customer footfall dataset:

customer_count = np.random.randint(50, 200, size=7)
print("Footfall for 7 days:", customer_count)

In [None]:
#1D Slicing
sales = np.array([100, 200, 150, 300, 250])
sales[1:4]
sales[-3:]

In [None]:
#2D Slicing
a = np.array([[10,20,30],
              [40,50,60],
              [70,80,90]])


a[0:2, 1:]   # first two rows, columns 1 onward

In [None]:
#Boolean Indexing
sales = np.array([100, 250, 400, 50])
high_sales = sales[sales > 200]

In [None]:
#Fancy Indexing
a[[0,2], [1,2]]   # picks (0,1) and (2,2)

In [None]:
#Filter Sales > 500
daily_sales = np.array([450, 520, 610, 490, 700])
high_performance_days = daily_sales[daily_sales > 500]

In [None]:
#min: Array Operations
#1. Mathematical Operations
a = np.array([10,20,30])
b = np.array([1,2,3])

a + b
a * b
a / b

In [None]:
#Statistical Functions
sales = np.array([120,140,160])

sales.sum()
sales.mean()
sales.std()
sales.max()
sales.min()

In [None]:
#Apply 10% discount
price = np.array([999, 1299, 799])
new_price = price * 0.90

In [35]:
#Reshaping & Manipulation
#1. reshape()
arr = np.arange(12).reshape(3,4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [28]:
#ravel() / flatten()
arr.ravel()
arr.flatten()


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [29]:
#transpose()
arr.T

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [33]:
#Stacking

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
result1= np.hstack([a, b])
result2= np.vstack([a, b])
result1
result2

array([[1, 2, 3],
       [4, 5, 6]])

In [37]:
#Splitting
import numpy as np

arr = np.array([10, 20, 30, 40])
result = np.split(arr, 2)

print(result)

[array([10, 20]), array([30, 40])]


In [None]:
#Linear Algebra Basics
#1. dot() & Matrix Multiplication
a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])

np.dot(a, b)

In [None]:
#Broadcasting Rules
prices = np.array([100, 200, 300])
gst = 0.18

final_price = prices + (prices * gst)

In [None]:
#From Python List
import numpy as np
arr = np.array([10, 20, 30])
print(arr)

In [5]:
#2D Array
import numpy as np
matrix = np.array([[1, 2, 3],
                   [4, 5, 6]])
matrix

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
#Using arange()
np.arange(1, 10, 2)   # 1 to 9 with step 2

In [None]:
#Zeros, Ones
np.zeros((2,3))
np.ones((3,3))

In [2]:
#Random Data (Useful in Analytics)
np.random.randint(10, 100, size=(5))

array([26, 22, 61, 77, 24], dtype=int32)

In [None]:
# create A 1D array of sales for 7 days

# A 2D array: 3 stores × 7 days sales

# A random dataset of 100 customer ages

In [7]:
df = np.array([['d1','d2','d3'],[200,300,450]])
print(df)

age = np.random.randint(18,80, size=(100))
age

[['d1' 'd2' 'd3']
 ['200' '300' '450']]


array([23, 37, 28, 67, 74, 32, 47, 68, 65, 53, 18, 31, 76, 63, 20, 40, 61,
       35, 55, 71, 57, 69, 72, 54, 74, 22, 48, 30, 70, 71, 57, 75, 41, 37,
       21, 54, 65, 33, 33, 56, 65, 34, 56, 66, 18, 71, 41, 37, 59, 79, 28,
       34, 43, 63, 39, 51, 58, 41, 54, 38, 54, 31, 67, 30, 73, 74, 51, 68,
       65, 39, 63, 38, 48, 77, 54, 67, 58, 30, 34, 36, 68, 36, 38, 24, 78,
       21, 35, 57, 33, 45, 20, 44, 40, 70, 42, 23, 46, 24, 42, 62],
      dtype=int32)

In [None]:
# Array Operations (Add, Multiply, Divide, Broadcasting)
# Vectorized Operations

# NumPy does not require loops.

# Element-wise Mathematics

In [None]:
sales = np.array([100, 200, 300])
tax = np.array([10, 20, 30])
total = sales + tax     # element-wise addition
discount = sales * 0.90 # broadcasting

In [None]:
sales = np.array([[100, 150, 200],
                  [80, 120, 160]])

new_sales = sales * 1.10

In [10]:
price = np.array([120, 150, 90, 300])
gst = 0.18

final_price = price + (price * gst)
final_price

array([141.6, 177. , 106.2, 354. ])

In [None]:
# Mathematical Functions (Mean, Max, Min, Sum)

In [None]:
# | Function      | Description        |
# | ------------- | ------------------ |
# | `np.sum()`    | Total sum          |
# | `np.mean()`   | Average            |
# | `np.median()` | Middle value       |
# | `np.max()`    | Maximum            |
# | `np.min()`    | Minimum            |
# | `np.std()`    | Standard Deviation |
# | `np.var()`    | Variance           |


In [None]:
sales = np.array([120, 340, 300, 500, 250])

print("Total Sales:", np.sum(sales))
print("Average Sales:", np.mean(sales))
print("Max Sale:", np.max(sales))
print("Min Sale:", np.min(sales))
print("Std Dev:", np.std(sales))

In [None]:
# Sales Analytics Using NumPy
# Analyze weekly sales data of 3 stores using NumPy.

In [21]:
import numpy as np
sales = np.array([
    [200, 220, 250, 210, 180, 190, 300],  # Store A
    [150, 180, 200, 170, 160, 175, 225],  # Store B
    [300, 320, 290, 310, 330, 340, 350]   # Store C
])

total_sales = np.sum(sales, axis=1) #Total Sales per Store
best_day = np.argmax(sales, axis=1) #Best Selling Day for Each Store
avg_sales = np.mean(sales, axis=0) #Average Weekly Sales
day_total = np.sum(sales, axis=0) #Day-wise Total Sales (All Stores)
grown_sales = sales * 1.10 #Growth Calculation (Broadcasting)

In [14]:
total_sales

array([1550, 1260, 2240])

In [18]:
best_day

array([6, 6, 6])

In [16]:
avg_sales

array([221.42857143, 180.        , 320.        ])

In [22]:
day_total

array([650, 720, 740, 690, 670, 705, 875])

In [20]:
grown_sales

array([[220. , 242. , 275. , 231. , 198. , 209. , 330. ],
       [165. , 198. , 220. , 187. , 176. , 192.5, 247.5],
       [330. , 352. , 319. , 341. , 363. , 374. , 385. ]])