In [2]:
import numpy as np 


In [4]:
# =============================================================================
# NUMPY ARRAY CREATION AND ATTRIBUTES
# =============================================================================

# -----------------------------------------------------------------------------
# FUNCTION: np.array()
# -----------------------------------------------------------------------------
# ENGLISH:
# - What it does: Creates a NumPy array (ndarray) from a Python list, tuple, 
#   or other array-like objects. This is the most common way to create arrays.
# - Why use it: NumPy arrays are faster and more memory-efficient than Python 
#   lists for numerical operations. They support vectorized operations and 
#   broadcasting.
# - Parameters: 
#   * object: Array-like input (list, tuple, etc.)
#   * dtype: (optional) Data type of array elements (int, float, etc.)
# - Returns: ndarray object
# - Common use cases:
#   * Converting Python lists to NumPy arrays for ML/data processing
#   * Creating feature vectors, training data, or weight matrices
#   * Performing mathematical operations on collections of numbers
# - Example:
#   arr1 = np.array([1, 2, 3])           # 1D array
#   arr2 = np.array([[1, 2], [3, 4]])    # 2D array (matrix)
#   arr3 = np.array([1.5, 2.5], dtype=int)  # Specify data type
#
# HINGLISH:
# - Yeh kya karta hai: Python list, tuple ya kisi bhi array-jaisi cheez se 
#   NumPy array (ndarray) banata hai. Array banana ka yeh sabse common tarika hai.
# - Kab use karein: NumPy arrays Python lists se zyada fast aur memory-efficient
#   hote hain numerical operations ke liye. Yeh vectorized operations aur 
#   broadcasting support karte hain.
# - Parameters:
#   * object: Array jaisa input (list, tuple, etc.)
#   * dtype: (optional) Array elements ka data type (int, float, etc.)
# - Returns: ndarray object
# - Common use cases:
#   * Python lists ko NumPy arrays mein convert karna ML/data processing ke liye
#   * Feature vectors, training data, ya weight matrices banana
#   * Numbers ke collections par mathematical operations karna
# - Example:
#   arr1 = np.array([1, 2, 3])           # 1D array
#   arr2 = np.array([[1, 2], [3, 4]])    # 2D array (matrix)
#   arr3 = np.array([1.5, 2.5], dtype=int)  # Data type specify karna
# -----------------------------------------------------------------------------

arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])

# -----------------------------------------------------------------------------
# ATTRIBUTE: type()
# -----------------------------------------------------------------------------
# ENGLISH:
# - What it does: Returns the class/type of the Python object
# - Why use it: To verify that you've created a NumPy array and not a regular
#   Python list. Useful for debugging and type checking.
# - Returns: <class 'numpy.ndarray'> for NumPy arrays
# - Common use cases:
#   * Debugging: Checking if conversion from list to array worked
#   * Type validation before performing NumPy-specific operations
#   * Understanding what kind of object you're working with
# - Example:
#   lst = [1, 2, 3]
#   type(lst)  # Returns: <class 'list'>
#   arr = np.array(lst)
#   type(arr)  # Returns: <class 'numpy.ndarray'>
#
# HINGLISH:
# - Yeh kya karta hai: Python object ka class/type return karta hai
# - Kab use karein: Yeh verify karne ke liye ki aapne NumPy array banaya hai
#   ya regular Python list. Debugging aur type checking ke liye useful hai.
# - Returns: NumPy arrays ke liye <class 'numpy.ndarray'>
# - Common use cases:
#   * Debugging: Check karna ki list se array conversion hua ya nahi
#   * NumPy-specific operations karne se pehle type validation
#   * Samajhna ki aap kis tarah ke object ke saath kaam kar rahe hain
# - Example:
#   lst = [1, 2, 3]
#   type(lst)  # Returns: <class 'list'>
#   arr = np.array(lst)
#   type(arr)  # Returns: <class 'numpy.ndarray'>
# -----------------------------------------------------------------------------

# -----------------------------------------------------------------------------
# ATTRIBUTE: .dtype
# -----------------------------------------------------------------------------
# ENGLISH:
# - What it does: Returns the data type of elements stored in the array
# - Why use it: Knowing data types is crucial for memory management and 
#   numerical precision in ML. Different dtypes use different memory amounts.
# - Returns: Data type object (e.g., int64, float64, int32, etc.)
# - Common use cases:
#   * Memory optimization: int8 uses less memory than int64
#   * Ensuring correct precision for calculations (float32 vs float64)
#   * Preventing type-related bugs in ML models
#   * Checking data types before feeding to neural networks
# - Example:
#   arr_int = np.array([1, 2, 3])        # dtype: int64 (default on 64-bit)
#   arr_float = np.array([1.0, 2.0])     # dtype: float64
#   arr_specific = np.array([1, 2], dtype=np.float32)  # dtype: float32
#   # int8: -128 to 127 (1 byte), int64: much larger range (8 bytes)
#
# HINGLISH:
# - Yeh kya karta hai: Array mein stored elements ka data type return karta hai
# - Kab use karein: Data types jaanna ML mein memory management aur numerical 
#   precision ke liye bahut zaroori hai. Different dtypes different memory use 
#   karte hain.
# - Returns: Data type object (jaise int64, float64, int32, etc.)
# - Common use cases:
#   * Memory optimization: int8, int64 se kam memory use karta hai
#   * Calculations ke liye sahi precision ensure karna (float32 vs float64)
#   * ML models mein type-related bugs se bachna
#   * Neural networks ko data dene se pehle data types check karna
# - Example:
#   arr_int = np.array([1, 2, 3])        # dtype: int64 (64-bit par default)
#   arr_float = np.array([1.0, 2.0])     # dtype: float64
#   arr_specific = np.array([1, 2], dtype=np.float32)  # dtype: float32
#   # int8: -128 se 127 (1 byte), int64: bahut bada range (8 bytes)
# -----------------------------------------------------------------------------

# -----------------------------------------------------------------------------
# ATTRIBUTE: .shape
# -----------------------------------------------------------------------------
# ENGLISH:
# - What it does: Returns a tuple representing the dimensions of the array
# - Why use it: Essential for understanding array structure, especially in ML
#   where you need to know dimensions for matrix operations, reshaping, and
#   ensuring compatibility between layers in neural networks.
# - Returns: Tuple of integers (e.g., (8,) for 1D, (3, 4) for 2D)
# - Common use cases:
#   * Verifying data dimensions before training ML models
#   * Debugging shape mismatches in matrix multiplication
#   * Reshaping data for neural network input layers
#   * Understanding feature dimensions in datasets
# - Example:
#   arr_1d = np.array([1, 2, 3, 4])           # shape: (4,)
#   arr_2d = np.array([[1, 2], [3, 4]])       # shape: (2, 2)
#   arr_3d = np.array([[[1, 2]], [[3, 4]]])   # shape: (2, 1, 2)
#   # In ML: X_train.shape might be (1000, 784) meaning 1000 samples, 
#   # 784 features
#
# HINGLISH:
# - Yeh kya karta hai: Array ke dimensions ko represent karne wala tuple return
#   karta hai
# - Kab use karein: Array structure samajhne ke liye bahut zaroori hai, 
#   khaaskar ML mein jahan aapko matrix operations, reshaping, aur neural 
#   networks mein layers ke beech compatibility ke liye dimensions jaanna 
#   padta hai.
# - Returns: Integers ka tuple (jaise (8,) for 1D, (3, 4) for 2D)
# - Common use cases:
#   * ML models train karne se pehle data dimensions verify karna
#   * Matrix multiplication mein shape mismatches debug karna
#   * Neural network input layers ke liye data reshape karna
#   * Datasets mein feature dimensions samajhna
# - Example:
#   arr_1d = np.array([1, 2, 3, 4])           # shape: (4,)
#   arr_2d = np.array([[1, 2], [3, 4]])       # shape: (2, 2)
#   arr_3d = np.array([[[1, 2]], [[3, 4]]])   # shape: (2, 1, 2)
#   # ML mein: X_train.shape (1000, 784) ho sakta hai matlab 1000 samples,
#   # 784 features
# -----------------------------------------------------------------------------

print(arr, type(arr), arr.dtype, arr.shape)
# Output: [1 2 3 4 5 6 7 8] <class 'numpy.ndarray'> int64 (8,)
# Explanation: Prints array values, confirms it's ndarray type, shows int64 
# data type, and (8,) shape meaning 1D array with 8 elements

[1 2 3 4 5 6 7 8] <class 'numpy.ndarray'> int32 (8,)


In [6]:
# =============================================================================
# 2D NUMPY ARRAY (MATRIX) CREATION
# =============================================================================

# -----------------------------------------------------------------------------
# CONCEPT: 2D Array / Matrix Creation
# -----------------------------------------------------------------------------
# ENGLISH:
# - What it does: Creates a 2-dimensional NumPy array (matrix) using nested 
#   lists. Each inner list becomes a row in the matrix.
# - Why use it: 2D arrays are fundamental in ML for representing datasets,
#   images, weight matrices in neural networks, and performing linear algebra
#   operations like matrix multiplication, dot products, etc.
# - Structure: [[row1], [row2], [row3], ...]
#   * Each inner list must have the same length (rectangular shape required)
#   * Rows are the first dimension, columns are the second dimension
# - Common use cases:
#   * Storing tabular data (like CSV data with rows and columns)
#   * Representing images (though images are often 3D with color channels)
#   * Creating weight matrices for neural network layers
#   * Linear algebra operations (matrix multiplication, eigenvalues, etc.)
#   * Storing feature matrices in ML (rows=samples, columns=features)
# - Example:
#   # Dataset with 3 samples and 4 features each
#   X = np.array([[1.2, 2.3, 3.4, 4.5],    # Sample 1
#                 [5.6, 6.7, 7.8, 8.9],    # Sample 2
#                 [9.0, 1.1, 2.2, 3.3]])   # Sample 3
#   # Shape: (3, 4) means 3 rows, 4 columns
#   
#   # Grayscale image (5x5 pixels)
#   img = np.array([[0, 50, 100, 150, 200],
#                   [25, 75, 125, 175, 225],
#                   [50, 100, 150, 200, 250],
#                   [75, 125, 175, 225, 255],
#                   [100, 150, 200, 250, 255]])
#
# HINGLISH:
# - Yeh kya karta hai: Nested lists use karke 2-dimensional NumPy array 
#   (matrix) banata hai. Har inner list matrix mein ek row ban jati hai.
# - Kab use karein: 2D arrays ML mein bahut fundamental hain - datasets 
#   represent karne ke liye, images ke liye, neural networks mein weight 
#   matrices ke liye, aur linear algebra operations jaise matrix 
#   multiplication, dot products, etc. ke liye.
# - Structure: [[row1], [row2], [row3], ...]
#   * Har inner list ki length same honi chahiye (rectangular shape zaroori)
#   * Rows pehla dimension hain, columns doosra dimension hain
# - Common use cases:
#   * Tabular data store karna (jaise CSV data with rows aur columns)
#   * Images represent karna (lekin images aksar 3D hote hain color channels ke saath)
#   * Neural network layers ke liye weight matrices banana
#   * Linear algebra operations (matrix multiplication, eigenvalues, etc.)
#   * ML mein feature matrices store karna (rows=samples, columns=features)
# - Example:
#   # Dataset with 3 samples aur har ek mein 4 features
#   X = np.array([[1.2, 2.3, 3.4, 4.5],    # Sample 1
#                 [5.6, 6.7, 7.8, 8.9],    # Sample 2
#                 [9.0, 1.1, 2.2, 3.3]])   # Sample 3
#   # Shape: (3, 4) matlab 3 rows, 4 columns
#   
#   # Grayscale image (5x5 pixels)
#   img = np.array([[0, 50, 100, 150, 200],
#                   [25, 75, 125, 175, 225],
#                   [50, 100, 150, 200, 250],
#                   [75, 125, 175, 225, 255],
#                   [100, 150, 200, 250, 255]])
# -----------------------------------------------------------------------------

arr2D = np.array([[1, 2, 3],    # Row 0 (first row)
                  [4, 5, 6],    # Row 1 (second row)
                  [7, 8, 9]])   # Row 2 (third row)

# -----------------------------------------------------------------------------
# ATTRIBUTE: .shape for 2D Arrays
# -----------------------------------------------------------------------------
# ENGLISH:
# - What it does: For 2D arrays, returns a tuple (rows, columns) representing
#   the matrix dimensions
# - Why use it: Critical for understanding data structure in ML. The shape
#   tells you how many samples and features you have, or the dimensions of
#   weight matrices that need to match for matrix operations.
# - Returns: Tuple (rows, columns) e.g., (3, 3) for a 3x3 matrix
# - Shape interpretation in ML:
#   * (m, n) where m = number of samples/rows, n = number of features/columns
#   * For images: (height, width) or (height, width, channels)
#   * For weight matrices: (input_features, output_features)
# - Common use cases:
#   * Verifying dataset dimensions: "Do I have 1000 samples with 20 features?"
#   * Checking matrix multiplication compatibility: A(m,n) @ B(n,p) = C(m,p)
#   * Debugging reshape operations
#   * Ensuring data matches model input requirements
# - Example:
#   X_train = np.array([[1, 2, 3], [4, 5, 6]])  # shape: (2, 3)
#   # 2 samples (rows), 3 features (columns)
#   
#   weights = np.array([[0.1, 0.2],
#                       [0.3, 0.4],
#                       [0.5, 0.6]])  # shape: (3, 2)
#   # Can multiply: X_train @ weights because (2,3) @ (3,2) = (2,2) ✓
#   
#   img_gray = np.array([[0, 255], [128, 64]])  # shape: (2, 2)
#   # 2x2 pixel grayscale image
#
# HINGLISH:
# - Yeh kya karta hai: 2D arrays ke liye, tuple (rows, columns) return karta
#   hai jo matrix dimensions represent karta hai
# - Kab use karein: ML mein data structure samajhne ke liye bahut zaroori hai.
#   Shape aapko batata hai ki kitne samples aur features hain, ya weight 
#   matrices ke dimensions kya hain jo matrix operations ke liye match hone 
#   chahiye.
# - Returns: Tuple (rows, columns) jaise (3, 3) for a 3x3 matrix
# - ML mein shape interpretation:
#   * (m, n) jahan m = samples/rows ki sankhya, n = features/columns ki sankhya
#   * Images ke liye: (height, width) ya (height, width, channels)
#   * Weight matrices ke liye: (input_features, output_features)
# - Common use cases:
#   * Dataset dimensions verify karna: "Kya mere paas 1000 samples hain with 20 features?"
#   * Matrix multiplication compatibility check karna: A(m,n) @ B(n,p) = C(m,p)
#   * Reshape operations debug karna
#   * Data ko model input requirements se match karna
# - Example:
#   X_train = np.array([[1, 2, 3], [4, 5, 6]])  # shape: (2, 3)
#   # 2 samples (rows), 3 features (columns)
#   
#   weights = np.array([[0.1, 0.2],
#                       [0.3, 0.4],
#                       [0.5, 0.6]])  # shape: (3, 2)
#   # Multiply kar sakte hain: X_train @ weights kyunki (2,3) @ (3,2) = (2,2) ✓
#   
#   img_gray = np.array([[0, 255], [128, 64]])  # shape: (2, 2)
#   # 2x2 pixel grayscale image
# -----------------------------------------------------------------------------

print(arr2D, arr2D.shape)
# Output: 
# [[1 2 3]
#  [4 5 6]
#  [7 8 9]] (3, 3)
# 
# Explanation: 
# - Prints the 2D array showing all 9 elements arranged in 3 rows and 3 columns
# - Shape (3, 3) confirms it's a 3x3 square matrix
# - First 3 means 3 rows, second 3 means 3 columns
# - Total elements = 3 × 3 = 9

# =============================================================================
# ADDITIONAL IMPORTANT CONCEPTS FOR 2D ARRAYS
# =============================================================================
# ENGLISH:
# Row vs Column Access:
# - arr2D[0] gives first row: [1, 2, 3]
# - arr2D[:, 0] gives first column: [1, 4, 7]
# - arr2D[1, 2] gives element at row 1, column 2: 6
#
# Matrix Operations:
# - arr2D.T or arr2D.transpose() for transpose (flip rows/columns)
# - arr2D @ arr2D for matrix multiplication
# - arr2D * arr2D for element-wise multiplication
#
# HINGLISH:
# Row vs Column Access:
# - arr2D[0] pehli row deta hai: [1, 2, 3]
# - arr2D[:, 0] pehla column deta hai: [1, 4, 7]
# - arr2D[1, 2] row 1, column 2 par element deta hai: 6
#
# Matrix Operations:
# - arr2D.T ya arr2D.transpose() transpose ke liye (rows/columns flip)
# - arr2D @ arr2D matrix multiplication ke liye
# - arr2D * arr2D element-wise multiplication ke liye
# =============================================================================

[[1 2 3]
 [4 5 6]
 [7 8 9]] (3, 3)


In [8]:
# =============================================================================
# NUMPY ARRAY PROPERTIES - COMPREHENSIVE OVERVIEW
# =============================================================================

# -----------------------------------------------------------------------------
# CREATING A NON-SQUARE 2D ARRAY FOR PROPERTY EXPLORATION
# -----------------------------------------------------------------------------
# ENGLISH:
# - What it does: Creates a 2D array (matrix) with different number of rows
#   and columns (3 rows × 4 columns = non-square matrix)
# - Why use it: Non-square matrices are very common in ML - your dataset
#   usually has different number of samples than features (e.g., 1000 samples
#   with 784 features for MNIST digit images)
# - Structure: This is a 3×4 matrix (3 rows, 4 columns)
# - Common use cases:
#   * Storing datasets where rows=samples, columns=features
#   * Weight matrices connecting layers of different sizes in neural networks
#   * Transformation matrices that change dimensionality
# - Example:
#   # Customer data: 3 customers, 4 features (age, income, score, tenure)
#   customers = np.array([[25, 50000, 720, 2],
#                         [35, 75000, 680, 5],
#                         [45, 90000, 750, 10]])
#   # Shape: (3, 4) - 3 customers, 4 features each
#
# HINGLISH:
# - Yeh kya karta hai: Ek 2D array (matrix) banata hai jismein rows aur 
#   columns ki sankhya alag hai (3 rows × 4 columns = non-square matrix)
# - Kab use karein: Non-square matrices ML mein bahut common hain - aapka
#   dataset mein usually samples aur features ki sankhya alag hoti hai
#   (jaise 1000 samples with 784 features for MNIST digit images)
# - Structure: Yeh ek 3×4 matrix hai (3 rows, 4 columns)
# - Common use cases:
#   * Datasets store karna jahan rows=samples, columns=features
#   * Neural networks mein different sizes ki layers ko connect karne ke liye
#     weight matrices
#   * Transformation matrices jo dimensionality change karti hain
# - Example:
#   # Customer data: 3 customers, 4 features (age, income, score, tenure)
#   customers = np.array([[25, 50000, 720, 2],
#                         [35, 75000, 680, 5],
#                         [45, 90000, 750, 10]])
#   # Shape: (3, 4) - 3 customers, har ek mein 4 features
# -----------------------------------------------------------------------------

test_arra = np.array([[1, 2, 3, 4],      # Row 0: 4 elements
                      [5, 6, 7, 8],      # Row 1: 4 elements
                      [9, 10, 11, 12]])  # Row 2: 4 elements

# =============================================================================
# PROPERTY 1: .shape
# =============================================================================
# ENGLISH:
# - What it does: Returns the dimensions of the array as a tuple
# - Why use it: Most important property for understanding array structure.
#   Essential for debugging shape mismatches in ML operations.
# - Returns: Tuple of integers representing size of each dimension
#   * For this array: (3, 4) means 3 rows, 4 columns
# - How to interpret:
#   * 1D array: (n,) - single dimension with n elements
#   * 2D array: (m, n) - m rows, n columns
#   * 3D array: (l, m, n) - l matrices, each with m rows and n columns
#   * nD array: (d1, d2, d3, ..., dn) - n dimensions
# - Common use cases:
#   * Checking if data matches model input requirements
#   * Verifying matrix multiplication compatibility: (m,n) @ (n,p) → (m,p)
#   * Understanding data layout before reshaping
#   * Debugging "shape mismatch" errors in neural networks
# - Example:
#   arr_1d = np.array([1, 2, 3])                    # shape: (3,)
#   arr_2d = np.array([[1, 2], [3, 4], [5, 6]])    # shape: (3, 2)
#   arr_3d = np.array([[[1, 2]], [[3, 4]]])        # shape: (2, 1, 2)
#   
#   # ML example: MNIST dataset
#   X_train = np.zeros((60000, 784))  # 60000 images, 784 features (28×28)
#   X_train.shape  # Returns: (60000, 784)
#
# HINGLISH:
# - Yeh kya karta hai: Array ke dimensions ko tuple ke roop mein return karta hai
# - Kab use karein: Array structure samajhne ke liye sabse important property.
#   ML operations mein shape mismatches debug karne ke liye essential hai.
# - Returns: Integers ka tuple jo har dimension ka size represent karta hai
#   * Is array ke liye: (3, 4) matlab 3 rows, 4 columns
# - Kaise interpret karein:
#   * 1D array: (n,) - ek hi dimension with n elements
#   * 2D array: (m, n) - m rows, n columns
#   * 3D array: (l, m, n) - l matrices, har ek mein m rows aur n columns
#   * nD array: (d1, d2, d3, ..., dn) - n dimensions
# - Common use cases:
#   * Check karna ki data model input requirements se match kar raha hai
#   * Matrix multiplication compatibility verify karna: (m,n) @ (n,p) → (m,p)
#   * Reshape karne se pehle data layout samajhna
#   * Neural networks mein "shape mismatch" errors debug karna
# - Example:
#   arr_1d = np.array([1, 2, 3])                    # shape: (3,)
#   arr_2d = np.array([[1, 2], [3, 4], [5, 6]])    # shape: (3, 2)
#   arr_3d = np.array([[[1, 2]], [[3, 4]]])        # shape: (2, 1, 2)
#   
#   # ML example: MNIST dataset
#   X_train = np.zeros((60000, 784))  # 60000 images, 784 features (28×28)
#   X_train.shape  # Returns: (60000, 784)
# -----------------------------------------------------------------------------

print(test_arra.shape)
# Output: (3, 4)
# Meaning: 3 rows (first dimension), 4 columns (second dimension)

# =============================================================================
# PROPERTY 2: .size
# =============================================================================
# ENGLISH:
# - What it does: Returns the total number of elements in the array
# - Why use it: Useful for calculating memory usage, understanding total data
#   points, and verifying array operations. Size = product of all dimensions.
# - Returns: Single integer (total element count)
# - Formula: For shape (3, 4), size = 3 × 4 = 12
# - Difference from .shape:
#   * .shape gives dimensions: (3, 4)
#   * .size gives total elements: 12
# - Common use cases:
#   * Calculating memory consumption: size × dtype.itemsize = bytes
#   * Verifying flatten/ravel operations didn't lose data
#   * Checking if two arrays have same number of elements
#   * Understanding data volume for processing time estimates
# - Example:
#   arr = np.array([[1, 2, 3], [4, 5, 6]])
#   arr.shape  # (2, 3)
#   arr.size   # 6 (because 2 × 3 = 6)
#   
#   # Memory calculation
#   arr.size * arr.dtype.itemsize  # 6 × 8 = 48 bytes (for int64)
#   
#   # 3D array
#   arr_3d = np.zeros((2, 3, 4))
#   arr_3d.shape  # (2, 3, 4)
#   arr_3d.size   # 24 (because 2 × 3 × 4 = 24)
#
# HINGLISH:
# - Yeh kya karta hai: Array mein total elements ki sankhya return karta hai
# - Kab use karein: Memory usage calculate karne ke liye, total data points
#   samajhne ke liye, aur array operations verify karne ke liye useful hai.
#   Size = sabhi dimensions ka product.
# - Returns: Single integer (total element count)
# - Formula: Shape (3, 4) ke liye, size = 3 × 4 = 12
# - .shape se difference:
#   * .shape dimensions deta hai: (3, 4)
#   * .size total elements deta hai: 12
# - Common use cases:
#   * Memory consumption calculate karna: size × dtype.itemsize = bytes
#   * Flatten/ravel operations verify karna ki data loss nahi hua
#   * Check karna ki do arrays mein same number of elements hain
#   * Processing time estimate ke liye data volume samajhna
# - Example:
#   arr = np.array([[1, 2, 3], [4, 5, 6]])
#   arr.shape  # (2, 3)
#   arr.size   # 6 (kyunki 2 × 3 = 6)
#   
#   # Memory calculation
#   arr.size * arr.dtype.itemsize  # 6 × 8 = 48 bytes (int64 ke liye)
#   
#   # 3D array
#   arr_3d = np.zeros((2, 3, 4))
#   arr_3d.shape  # (2, 3, 4)
#   arr_3d.size   # 24 (kyunki 2 × 3 × 4 = 24)
# -----------------------------------------------------------------------------

print(test_arra.size)
# Output: 12
# Meaning: Total 12 elements in the array (3 rows × 4 columns = 12)

# =============================================================================
# PROPERTY 3: .dtype
# =============================================================================
# ENGLISH:
# - What it does: Returns the data type of array elements
# - Why use it: Critical for memory optimization and numerical precision in ML.
#   Different dtypes consume different memory and have different precision.
# - Returns: Data type object (int8, int16, int32, int64, float32, float64, etc.)
# - Common data types:
#   * int8: -128 to 127 (1 byte) - good for small integers, saves memory
#   * int16: -32768 to 32767 (2 bytes)
#   * int32: ~-2 billion to 2 billion (4 bytes)
#   * int64: very large range (8 bytes) - default for integers
#   * float32: ~7 decimal digits precision (4 bytes) - common in deep learning
#   * float64: ~15 decimal digits precision (8 bytes) - default for floats
#   * bool: True/False (1 byte)
# - Memory impact:
#   * Array with 1 million int8 elements = 1 MB
#   * Same array with int64 elements = 8 MB (8× larger!)
# - Common use cases:
#   * Reducing model memory footprint by using float32 instead of float64
#   * Ensuring correct data type before feeding to neural networks
#   * Converting data types to prevent overflow/underflow
#   * Matching data types for operations (can't mix int and float sometimes)
# - Example:
#   arr_int = np.array([1, 2, 3])           # dtype: int64 (default)
#   arr_float = np.array([1.0, 2.0, 3.0])  # dtype: float64 (default)
#   arr_int8 = np.array([1, 2, 3], dtype=np.int8)      # dtype: int8
#   arr_float32 = np.array([1.0, 2.0], dtype=np.float32)  # dtype: float32
#   
#   # Type conversion
#   arr_int.astype(np.float32)  # Convert int64 to float32
#
# HINGLISH:
# - Yeh kya karta hai: Array elements ka data type return karta hai
# - Kab use karein: ML mein memory optimization aur numerical precision ke 
#   liye bahut important hai. Different dtypes alag memory consume karte hain
#   aur alag precision dete hain.
# - Returns: Data type object (int8, int16, int32, int64, float32, float64, etc.)
# - Common data types:
#   * int8: -128 se 127 (1 byte) - small integers ke liye, memory bachata hai
#   * int16: -32768 se 32767 (2 bytes)
#   * int32: ~-2 billion se 2 billion (4 bytes)
#   * int64: bahut bada range (8 bytes) - integers ke liye default
#   * float32: ~7 decimal digits precision (4 bytes) - deep learning mein common
#   * float64: ~15 decimal digits precision (8 bytes) - floats ke liye default
#   * bool: True/False (1 byte)
# - Memory impact:
#   * 1 million int8 elements wala array = 1 MB
#   * Same array with int64 elements = 8 MB (8× zyada bada!)
# - Common use cases:
#   * float32 use karke float64 ki jagah model memory footprint kam karna
#   * Neural networks ko data dene se pehle sahi data type ensure karna
#   * Overflow/underflow se bachne ke liye data types convert karna
#   * Operations ke liye data types match karna (kabhi int aur float mix nahi kar sakte)
# - Example:
#   arr_int = np.array([1, 2, 3])           # dtype: int64 (default)
#   arr_float = np.array([1.0, 2.0, 3.0])  # dtype: float64 (default)
#   arr_int8 = np.array([1, 2, 3], dtype=np.int8)      # dtype: int8
#   arr_float32 = np.array([1.0, 2.0], dtype=np.float32)  # dtype: float32
#   
#   # Type conversion
#   arr_int.astype(np.float32)  # int64 ko float32 mein convert karna
# -----------------------------------------------------------------------------

print(test_arra.dtype)
# Output: int64
# Meaning: Each element is a 64-bit integer (8 bytes per element)
# Total memory for this array: 12 elements × 8 bytes = 96 bytes

# =============================================================================
# PROPERTY 4: .ndim
# =============================================================================
# ENGLISH:
# - What it does: Returns the number of dimensions (axes) of the array
# - Why use it: Helps understand array complexity. Essential for knowing if
#   you're working with vectors (1D), matrices (2D), tensors (3D+), etc.
# - Returns: Integer representing number of dimensions
#   * 0D: scalar (single value) - rare, usually just numbers
#   * 1D: vector (list of values) - feature vector, time series
#   * 2D: matrix (table of values) - datasets, images (grayscale)
#   * 3D: tensor (cube of values) - RGB images, video frames, batches
#   * 4D+: higher-order tensors - video batches, medical scans
# - Relationship with .shape:
#   * .ndim = length of .shape tuple
#   * shape (3, 4) → ndim = 2
#   * shape (10, 28, 28, 3) → ndim = 4
# - Common use cases:
#   * Checking if data needs reshaping before processing
#   * Validating input dimensions for neural network layers
#   * Determining if data is batched (extra dimension for batch)
#   * Understanding data structure complexity
# - Example:
#   arr_0d = np.array(42)                    # ndim: 0 (scalar)
#   arr_1d = np.array([1, 2, 3])            # ndim: 1 (vector)
#   arr_2d = np.array([[1, 2], [3, 4]])     # ndim: 2 (matrix)
#   arr_3d = np.zeros((10, 28, 28))         # ndim: 3 (tensor)
#   arr_4d = np.zeros((32, 28, 28, 3))      # ndim: 4 (batch of RGB images)
#   
#   # ML example: Image data
#   single_image = np.zeros((28, 28, 3))     # ndim: 3 (height, width, channels)
#   batch_images = np.zeros((100, 28, 28, 3)) # ndim: 4 (batch, height, width, channels)
#
# HINGLISH:
# - Yeh kya karta hai: Array ke dimensions (axes) ki sankhya return karta hai
# - Kab use karein: Array complexity samajhne mein madad karta hai. Yeh
#   jaanne ke liye essential hai ki aap vectors (1D), matrices (2D), ya 
#   tensors (3D+) ke saath kaam kar rahe hain.
# - Returns: Integer jo dimensions ki sankhya represent karta hai
#   * 0D: scalar (single value) - rare, usually sirf numbers
#   * 1D: vector (values ki list) - feature vector, time series
#   * 2D: matrix (values ki table) - datasets, images (grayscale)
#   * 3D: tensor (values ka cube) - RGB images, video frames, batches
#   * 4D+: higher-order tensors - video batches, medical scans
# - .shape ke saath relationship:
#   * .ndim = .shape tuple ki length
#   * shape (3, 4) → ndim = 2
#   * shape (10, 28, 28, 3) → ndim = 4
# - Common use cases:
#   * Check karna ki processing se pehle data ko reshape karna hai
#   * Neural network layers ke liye input dimensions validate karna
#   * Determine karna ki data batched hai (batch ke liye extra dimension)
#   * Data structure complexity samajhna
# - Example:
#   arr_0d = np.array(42)                    # ndim: 0 (scalar)
#   arr_1d = np.array([1, 2, 3])            # ndim: 1 (vector)
#   arr_2d = np.array([[1, 2], [3, 4]])     # ndim: 2 (matrix)
#   arr_3d = np.zeros((10, 28, 28))         # ndim: 3 (tensor)
#   arr_4d = np.zeros((32, 28, 28, 3))      # ndim: 4 (RGB images ka batch)
#   
#   # ML example: Image data
#   single_image = np.zeros((28, 28, 3))     # ndim: 3 (height, width, channels)
#   batch_images = np.zeros((100, 28, 28, 3)) # ndim: 4 (batch, height, width, channels)
# -----------------------------------------------------------------------------

print(test_arra.ndim)
# Output: 2
# Meaning: This is a 2-dimensional array (matrix with rows and columns)

# =============================================================================
# SUMMARY OF ALL PROPERTIES FOR test_arra
# =============================================================================
# Array: [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
# 
# .shape  → (3, 4)   : 3 rows, 4 columns
# .size   → 12       : Total 12 elements (3 × 4)
# .dtype  → int64    : Each element is 64-bit integer (8 bytes)
# .ndim   → 2        : 2-dimensional array (matrix)
# 
# Memory calculation: 12 elements × 8 bytes = 96 bytes total
# =============================================================================

# =============================================================================
# BONUS: HOW THESE PROPERTIES RELATE IN ML
# =============================================================================
# ENGLISH:
# In Machine Learning, these properties help you understand your data:
# - .ndim tells you the type: 1D=features, 2D=dataset, 3D=images, 4D=batches
# - .shape tells you exact dimensions: (samples, features) or (height, width)
# - .size tells you total data points: affects memory and processing time
# - .dtype tells you precision and memory: float32 vs float64 can halve memory
#
# Example workflow:
# 1. Load data → check .shape to verify dimensions
# 2. Check .dtype → convert to float32 if needed for memory
# 3. Verify .ndim → add/remove dimensions for model compatibility
# 4. Monitor .size → estimate processing time and memory needs
#
# HINGLISH:
# Machine Learning mein, yeh properties aapko data samajhne mein madad karti hain:
# - .ndim type batata hai: 1D=features, 2D=dataset, 3D=images, 4D=batches
# - .shape exact dimensions batata hai: (samples, features) ya (height, width)
# - .size total data points batata hai: memory aur processing time ko affect karta hai
# - .dtype precision aur memory batata hai: float32 vs float64 memory half kar sakta hai
#
# Example workflow:
# 1. Data load karo → dimensions verify karne ke liye .shape check karo
# 2. .dtype check karo → memory ke liye zaroorat ho to float32 mein convert karo
# 3. .ndim verify karo → model compatibility ke liye dimensions add/remove karo
# 4. .size monitor karo → processing time aur memory needs estimate karo
# =============================================================================

(3, 4)
12
int32
2


In [10]:
# =============================================================================
# NUMPY ARRAY RESHAPING OPERATIONS
# =============================================================================

# -----------------------------------------------------------------------------
# CREATING BASE ARRAY FOR RESHAPING DEMONSTRATIONS
# -----------------------------------------------------------------------------
# ENGLISH:
# - What it does: Creates a 2D array with 2 rows and 3 columns (2×3 matrix)
# - Why use it: This array will be used to demonstrate various reshaping
#   operations which are fundamental in ML for transforming data between
#   different formats (e.g., image to vector, batch to single sample)
# - Structure: 2 rows, 3 columns, total 6 elements
# - Note: Total elements (6) must be preserved during reshaping operations
#
# HINGLISH:
# - Yeh kya karta hai: 2 rows aur 3 columns ke saath ek 2D array banata hai
#   (2×3 matrix)
# - Kab use karein: Is array ko various reshaping operations demonstrate
#   karne ke liye use karenge jo ML mein data ko different formats mein
#   transform karne ke liye fundamental hain (jaise image to vector, batch
#   to single sample)
# - Structure: 2 rows, 3 columns, total 6 elements
# - Note: Reshaping operations ke dauraan total elements (6) preserve hone chahiye
# -----------------------------------------------------------------------------

arr_test = np.array([[1, 2, 3],    # Row 0
                     [4, 5, 6]])   # Row 1

print(arr_test, arr_test.shape)
# Output: [[1 2 3]
#          [4 5 6]] (2, 3)
# Original array: 2 rows × 3 columns = 6 total elements

# =============================================================================
# OPERATION 1: .reshape()
# =============================================================================
# ENGLISH:
# - What it does: Changes the shape/dimensions of an array WITHOUT changing
#   the data or total number of elements. Returns a new view of the array.
# - Why use it: Critical for preparing data for neural networks. Different
#   layers expect different input shapes. You often need to convert between
#   formats (matrix → vector, vector → matrix, add batch dimension, etc.)
# - Syntax: array.reshape(new_shape) or array.reshape((dim1, dim2, ...))
# - CRITICAL RULE: new_shape must have same total elements as original
#   * Original: (2, 3) = 6 elements
#   * Valid reshapes: (3, 2), (6,), (1, 6), (6, 1), (1, 2, 3), etc.
#   * Invalid: (2, 2) = 4 elements ❌ (doesn't match 6)
# - Special value -1: NumPy auto-calculates that dimension
#   * arr.reshape(-1) → flattens to 1D
#   * arr.reshape(3, -1) → 3 rows, auto-calculate columns → (3, 2)
#   * arr.reshape(-1, 1) → column vector (6, 1)
# - Common use cases:
#   * Flattening images: (28, 28) → (784,) for fully connected layers
#   * Adding batch dimension: (784,) → (1, 784) for single sample prediction
#   * Preparing data for CNNs: (60000, 784) → (60000, 28, 28, 1)
#   * Converting between row/column vectors: (n,) → (n, 1) or (1, n)
#   * Reshaping model outputs: (batch, classes) → desired format
# - reshape() vs resize():
#   * reshape() returns new view, original unchanged
#   * resize() modifies array in-place (changes original)
# - Example:
#   # Image preprocessing
#   img_flat = np.array([...])  # shape: (784,) - flattened
#   img_2d = img_flat.reshape(28, 28)  # shape: (28, 28) - for visualization
#   
#   # Adding batch dimension
#   single_sample = np.array([1, 2, 3, 4])  # shape: (4,)
#   batched = single_sample.reshape(1, -1)   # shape: (1, 4) - batch of 1
#   
#   # Auto-calculate dimension
#   arr = np.arange(12)  # shape: (12,)
#   arr.reshape(3, -1)   # shape: (3, 4) - auto-calculates 4 columns
#   arr.reshape(-1, 2)   # shape: (6, 2) - auto-calculates 6 rows
#
# HINGLISH:
# - Yeh kya karta hai: Array ka shape/dimensions change karta hai BINA data
#   ya total elements ki sankhya change kiye. Array ka naya view return karta hai.
# - Kab use karein: Neural networks ke liye data prepare karne mein critical
#   hai. Different layers alag input shapes expect karti hain. Aapko aksar
#   formats ke beech convert karna padta hai (matrix → vector, vector → matrix,
#   batch dimension add karna, etc.)
# - Syntax: array.reshape(new_shape) ya array.reshape((dim1, dim2, ...))
# - CRITICAL RULE: new_shape mein same total elements hone chahiye jitne original mein
#   * Original: (2, 3) = 6 elements
#   * Valid reshapes: (3, 2), (6,), (1, 6), (6, 1), (1, 2, 3), etc.
#   * Invalid: (2, 2) = 4 elements ❌ (6 se match nahi karta)
# - Special value -1: NumPy us dimension ko auto-calculate karta hai
#   * arr.reshape(-1) → 1D mein flatten karta hai
#   * arr.reshape(3, -1) → 3 rows, auto-calculate columns → (3, 2)
#   * arr.reshape(-1, 1) → column vector (6, 1)
# - Common use cases:
#   * Images flatten karna: (28, 28) → (784,) fully connected layers ke liye
#   * Batch dimension add karna: (784,) → (1, 784) single sample prediction ke liye
#   * CNNs ke liye data prepare karna: (60000, 784) → (60000, 28, 28, 1)
#   * Row/column vectors ke beech convert karna: (n,) → (n, 1) ya (1, n)
#   * Model outputs reshape karna: (batch, classes) → desired format
# - reshape() vs resize():
#   * reshape() naya view return karta hai, original unchanged rehta hai
#   * resize() array ko in-place modify karta hai (original change hota hai)
# - Example:
#   # Image preprocessing
#   img_flat = np.array([...])  # shape: (784,) - flattened
#   img_2d = img_flat.reshape(28, 28)  # shape: (28, 28) - visualization ke liye
#   
#   # Batch dimension add karna
#   single_sample = np.array([1, 2, 3, 4])  # shape: (4,)
#   batched = single_sample.reshape(1, -1)   # shape: (1, 4) - 1 ka batch
#   
#   # Auto-calculate dimension
#   arr = np.arange(12)  # shape: (12,)
#   arr.reshape(3, -1)   # shape: (3, 4) - 4 columns auto-calculate
#   arr.reshape(-1, 2)   # shape: (6, 2) - 6 rows auto-calculate
# -----------------------------------------------------------------------------

reshaped = arr_test.reshape((3, 2))
# Reshaping from (2, 3) to (3, 2)
# Original layout:     New layout:
# [[1, 2, 3],     →    [[1, 2],
#  [4, 5, 6]]           [3, 4],
#                       [5, 6]]
# Elements are read in row-major order (C-style) and rearranged

print(reshaped, reshaped.shape)
# Output: [[1 2]
#          [3 4]
#          [5 6]] (3, 2)
# New array: 3 rows × 2 columns = 6 total elements (same as original)

# =============================================================================
# UNDERSTANDING RESHAPE ELEMENT ORDERING
# =============================================================================
# ENGLISH:
# How reshape reads and rearranges elements:
# 1. Elements are read in "row-major" (C-style) order: [1,2,3,4,5,6]
# 2. They are then written into new shape in same order
# 3. Original (2,3): [[1,2,3], [4,5,6]] → flattened: [1,2,3,4,5,6]
# 4. New (3,2): Take 2 elements per row → [[1,2], [3,4], [5,6]]
#
# HINGLISH:
# Reshape elements ko kaise read aur rearrange karta hai:
# 1. Elements "row-major" (C-style) order mein read hote hain: [1,2,3,4,5,6]
# 2. Phir same order mein naye shape mein likhe jaate hain
# 3. Original (2,3): [[1,2,3], [4,5,6]] → flattened: [1,2,3,4,5,6]
# 4. New (3,2): Har row mein 2 elements lo → [[1,2], [3,4], [5,6]]
# -----------------------------------------------------------------------------

# =============================================================================
# OPERATION 2: .flatten()
# =============================================================================
# ENGLISH:
# - What it does: Converts a multi-dimensional array into a 1D array (vector).
#   Always returns a COPY of the data (not a view).
# - Why use it: Essential for converting structured data (like images) into
#   vectors for machine learning algorithms that expect 1D input (e.g., 
#   logistic regression, SVMs, fully connected neural network layers)
# - Syntax: array.flatten(order='C')
# - Parameters:
#   * order='C': Row-major (C-style, default) - read rows left-to-right
#   * order='F': Column-major (Fortran-style) - read columns top-to-bottom
#   * order='A': Use 'F' if array is Fortran-contiguous, else 'C'
# - Returns: Always 1D array with shape (n,) where n = total elements
# - flatten() vs ravel():
#   * flatten() → ALWAYS returns a COPY (new memory allocation)
#   * ravel() → returns a VIEW when possible (more memory efficient)
#   * Use flatten() when you need independent copy
#   * Use ravel() when you want to save memory
# - Common use cases:
#   * Preprocessing images for traditional ML: (28, 28) → (784,)
#   * Converting feature matrices to vectors
#   * Preparing data for algorithms expecting 1D input
#   * Flattening CNN feature maps before fully connected layers
#   * Creating feature vectors from multi-dimensional data
# - Example:
#   # Image flattening for ML
#   image = np.random.rand(28, 28)  # shape: (28, 28) - grayscale image
#   features = image.flatten()       # shape: (784,) - feature vector
#   
#   # RGB image flattening
#   rgb_img = np.random.rand(64, 64, 3)  # shape: (64, 64, 3)
#   flat_img = rgb_img.flatten()          # shape: (12288,) = 64×64×3
#   
#   # Different ordering
#   arr = np.array([[1, 2], [3, 4]])
#   arr.flatten(order='C')  # [1, 2, 3, 4] - row-major
#   arr.flatten(order='F')  # [1, 3, 2, 4] - column-major
#   
#   # MNIST example
#   mnist_image = np.zeros((28, 28))  # Single MNIST digit
#   flat_digit = mnist_image.flatten()  # (784,) for neural network input
#
# HINGLISH:
# - Yeh kya karta hai: Multi-dimensional array ko 1D array (vector) mein
#   convert karta hai. Hamesha data ki COPY return karta hai (view nahi).
# - Kab use karein: Structured data (jaise images) ko vectors mein convert
#   karne ke liye essential hai, ML algorithms ke liye jo 1D input expect
#   karte hain (jaise logistic regression, SVMs, fully connected neural
#   network layers)
# - Syntax: array.flatten(order='C')
# - Parameters:
#   * order='C': Row-major (C-style, default) - rows ko left-to-right padhna
#   * order='F': Column-major (Fortran-style) - columns ko top-to-bottom padhna
#   * order='A': Array Fortran-contiguous hai to 'F' use karo, warna 'C'
# - Returns: Hamesha 1D array with shape (n,) jahan n = total elements
# - flatten() vs ravel():
#   * flatten() → HAMESHA COPY return karta hai (new memory allocation)
#   * ravel() → jab possible ho VIEW return karta hai (zyada memory efficient)
#   * flatten() use karo jab aapko independent copy chahiye
#   * ravel() use karo jab memory save karni ho
# - Common use cases:
#   * Traditional ML ke liye images preprocess karna: (28, 28) → (784,)
#   * Feature matrices ko vectors mein convert karna
#   * 1D input expect karne wale algorithms ke liye data prepare karna
#   * Fully connected layers se pehle CNN feature maps flatten karna
#   * Multi-dimensional data se feature vectors banana
# - Example:
#   # ML ke liye image flattening
#   image = np.random.rand(28, 28)  # shape: (28, 28) - grayscale image
#   features = image.flatten()       # shape: (784,) - feature vector
#   
#   # RGB image flattening
#   rgb_img = np.random.rand(64, 64, 3)  # shape: (64, 64, 3)
#   flat_img = rgb_img.flatten()          # shape: (12288,) = 64×64×3
#   
#   # Different ordering
#   arr = np.array([[1, 2], [3, 4]])
#   arr.flatten(order='C')  # [1, 2, 3, 4] - row-major
#   arr.flatten(order='F')  # [1, 3, 2, 4] - column-major
#   
#   # MNIST example
#   mnist_image = np.zeros((28, 28))  # Single MNIST digit
#   flat_digit = mnist_image.flatten()  # (784,) neural network input ke liye
# -----------------------------------------------------------------------------

flattened = arr_test.flatten()
# Converts 2D array [[1,2,3], [4,5,6]] into 1D array [1,2,3,4,5,6]
# Reads elements row by row (row-major order)
# Creates a COPY (modifying flattened won't affect arr_test)

print(flattened, flattened.shape)
# Output: [1 2 3 4 5 6] (6,)
# 1D array with 6 elements

# =============================================================================
# COMPARING THE THREE OPERATIONS
# =============================================================================
# ENGLISH:
# Original arr_test:      (2, 3) shape
# [[1, 2, 3],
#  [4, 5, 6]]
#
# After reshape(3, 2):    (3, 2) shape - Same elements, different arrangement
# [[1, 2],
#  [3, 4],
#  [5, 6]]
#
# After flatten():        (6,) shape - All elements in single row
# [1, 2, 3, 4, 5, 6]
#
# Key differences:
# - reshape(): Changes shape while preserving dimensionality concept
# - flatten(): Always produces 1D array (loses dimensional structure)
# - Both preserve total element count (6 elements throughout)
#
# HINGLISH:
# Original arr_test:      (2, 3) shape
# [[1, 2, 3],
#  [4, 5, 6]]
#
# reshape(3, 2) ke baad:  (3, 2) shape - Same elements, alag arrangement
# [[1, 2],
#  [3, 4],
#  [5, 6]]
#
# flatten() ke baad:      (6,) shape - Saare elements ek hi row mein
# [1, 2, 3, 4, 5, 6]
#
# Key differences:
# - reshape(): Shape change karta hai dimensionality concept preserve rakhte hue
# - flatten(): Hamesha 1D array produce karta hai (dimensional structure kho jata hai)
# - Dono total element count preserve karte hain (throughout 6 elements)
# -----------------------------------------------------------------------------

# =============================================================================
# ADDITIONAL RESHAPING TECHNIQUES
# =============================================================================
# ENGLISH:
# Other useful reshaping methods:
# 
# 1. ravel() - Similar to flatten() but returns view (not copy)
#    flattened_view = arr_test.ravel()  # More memory efficient
#
# 2. reshape with -1 (auto-calculate dimension)
#    auto_reshaped = arr_test.reshape(-1)     # Same as flatten: (6,)
#    col_vector = arr_test.reshape(-1, 1)     # Column vector: (6, 1)
#    row_vector = arr_test.reshape(1, -1)     # Row vector: (1, 6)
#
# 3. transpose() or .T - Swap rows and columns
#    transposed = arr_test.T  # (2,3) → (3,2) but different arrangement
#
# 4. squeeze() - Remove single-dimensional entries
#    arr_with_extra_dim = np.array([[[1, 2, 3]]])  # shape: (1, 1, 3)
#    squeezed = arr_with_extra_dim.squeeze()        # shape: (3,)
#
# 5. expand_dims() - Add new axis/dimension
#    arr_1d = np.array([1, 2, 3])  # shape: (3,)
#    arr_2d = np.expand_dims(arr_1d, axis=0)  # shape: (1, 3) - row vector
#    arr_2d = np.expand_dims(arr_1d, axis=1)  # shape: (3, 1) - column vector
#
# HINGLISH:
# Reshaping ke aur useful methods:
# 
# 1. ravel() - flatten() jaisa hai par view return karta hai (copy nahi)
#    flattened_view = arr_test.ravel()  # Zyada memory efficient
#
# 2. -1 ke saath reshape (dimension auto-calculate)
#    auto_reshaped = arr_test.reshape(-1)     # flatten jaisa: (6,)
#    col_vector = arr_test.reshape(-1, 1)     # Column vector: (6, 1)
#    row_vector = arr_test.reshape(1, -1)     # Row vector: (1, 6)
#
# 3. transpose() ya .T - Rows aur columns swap karna
#    transposed = arr_test.T  # (2,3) → (3,2) lekin alag arrangement
#
# 4. squeeze() - Single-dimensional entries remove karna
#    arr_with_extra_dim = np.array([[[1, 2, 3]]])  # shape: (1, 1, 3)
#    squeezed = arr_with_extra_dim.squeeze()        # shape: (3,)
#
# 5. expand_dims() - Naya axis/dimension add karna
#    arr_1d = np.array([1, 2, 3])  # shape: (3,)
#    arr_2d = np.expand_dims(arr_1d, axis=0)  # shape: (1, 3) - row vector
#    arr_2d = np.expand_dims(arr_1d, axis=1)  # shape: (3, 1) - column vector
# =============================================================================

# =============================================================================
# PRACTICAL ML EXAMPLE: MNIST DIGIT PREPROCESSING
# =============================================================================
# ENGLISH:
# Real-world scenario: Preparing MNIST handwritten digits for neural network
#
# # Load image (28x28 pixels)
# digit_image = np.random.rand(28, 28)  # Simulating grayscale image
# print(f"Original shape: {digit_image.shape}")  # (28, 28)
#
# # For CNN (Convolutional Neural Network) - needs (height, width, channels)
# cnn_input = digit_image.reshape(28, 28, 1)  # Add channel dimension
# print(f"CNN input shape: {cnn_input.shape}")  # (28, 28, 1)
#
# # For Fully Connected Network - needs flattened vector
# fc_input = digit_image.flatten()
# print(f"FC input shape: {fc_input.shape}")  # (784,)
#
# # For batch processing - add batch dimension
# batch_input = digit_image.reshape(1, 28, 28, 1)  # Single image in batch
# print(f"Batch input shape: {batch_input.shape}")  # (1, 28, 28, 1)
#
# HINGLISH:
# Real-world scenario: Neural network ke liye MNIST handwritten digits prepare karna
#
# # Image load karo (28x28 pixels)
# digit_image = np.random.rand(28, 28)  # Grayscale image simulate kar rahe hain
# print(f"Original shape: {digit_image.shape}")  # (28, 28)
#
# # CNN (Convolutional Neural Network) ke liye - (height, width, channels) chahiye
# cnn_input = digit_image.reshape(28, 28, 1)  # Channel dimension add karo
# print(f"CNN input shape: {cnn_input.shape}")  # (28, 28, 1)
#
# # Fully Connected Network ke liye - flattened vector chahiye
# fc_input = digit_image.flatten()
# print(f"FC input shape: {fc_input.shape}")  # (784,)
#
# # Batch processing ke liye - batch dimension add karo
# batch_input = digit_image.reshape(1, 28, 28, 1)  # Batch mein single image
# print(f"Batch input shape: {batch_input.shape}")  # (1, 28, 28, 1)
# =============================================================================

[[1 2 3]
 [4 5 6]] (2, 3)
[[1 2]
 [3 4]
 [5 6]] (3, 2)
[1 2 3 4 5 6] (6,)


In [12]:
# =============================================================================
# NUMPY ARRAY INDEXING AND SLICING TECHNIQUES
# =============================================================================

# =============================================================================
# BASIC 1D ARRAY INDEXING
# =============================================================================
# ENGLISH:
# - What it does: Creates a 1D array and accesses individual elements using
#   their position (index)
# - Why use it: Basic element access is fundamental for data manipulation,
#   feature extraction, and working with individual data points in ML
# - Indexing rules:
#   * Indexing starts at 0 (first element is arr[0])
#   * Negative indexing: arr[-1] is last element, arr[-2] is second-to-last
#   * Out-of-bounds index raises IndexError
# - Common use cases:
#   * Accessing specific features from feature vectors
#   * Getting predictions for specific samples
#   * Extracting labels or targets from datasets
#   * Debugging by checking individual values
# - Example:
#   arr = np.array([10, 20, 30, 40, 50])
#   arr[0]   # 10 - first element
#   arr[2]   # 30 - third element
#   arr[-1]  # 50 - last element
#   arr[-2]  # 40 - second-to-last element
#   arr[4]   # 50 - fifth element (last)
#   # arr[5] would raise IndexError (out of bounds)
#
# HINGLISH:
# - Yeh kya karta hai: 1D array banata hai aur elements ko unki position
#   (index) use karke access karta hai
# - Kab use karein: ML mein data manipulation, feature extraction, aur
#   individual data points ke saath kaam karne ke liye basic element access
#   fundamental hai
# - Indexing rules:
#   * Indexing 0 se shuru hoti hai (pehla element arr[0] hai)
#   * Negative indexing: arr[-1] last element hai, arr[-2] second-to-last hai
#   * Out-of-bounds index IndexError raise karta hai
# - Common use cases:
#   * Feature vectors se specific features access karna
#   * Specific samples ke liye predictions lena
#   * Datasets se labels ya targets extract karna
#   * Individual values check karke debugging karna
# - Example:
#   arr = np.array([10, 20, 30, 40, 50])
#   arr[0]   # 10 - pehla element
#   arr[2]   # 30 - teesra element
#   arr[-1]  # 50 - last element
#   arr[-2]  # 40 - second-to-last element
#   arr[4]   # 50 - paanchva element (last)
#   # arr[5] IndexError raise karega (out of bounds)
# -----------------------------------------------------------------------------

arr = np.array([1, 2, 3, 4, 5])

print(arr[0])
# Output: 1
# Accesses the first element (index 0)

# =============================================================================
# 2D ARRAY INDEXING (MATRIX INDEXING)
# =============================================================================
# ENGLISH:
# - What it does: Accesses elements in multi-dimensional arrays using
#   [row_index][column_index] or [row_index, column_index] notation
# - Why use it: Essential for working with datasets, images, and matrices in
#   ML where you need to access specific rows, columns, or individual cells
# - Two indexing notations:
#   * arr[row][col] - chained indexing (less efficient, two operations)
#   * arr[row, col] - tuple indexing (preferred, single operation, faster)
# - Indexing rules:
#   * First index = row (vertical position)
#   * Second index = column (horizontal position)
#   * Both use 0-based indexing
#   * Negative indices work for both dimensions
# - Common use cases:
#   * Accessing specific pixels in images: image[y, x]
#   * Getting specific feature values: dataset[sample_idx, feature_idx]
#   * Extracting matrix elements for calculations
#   * Accessing weights in neural network weight matrices
#   * Selecting specific data points from tabular data
# - Example:
#   matrix = np.array([[10, 20, 30],
#                      [40, 50, 60],
#                      [70, 80, 90]])
#   matrix[0, 0]    # 10 - top-left corner
#   matrix[0, 2]    # 30 - first row, third column
#   matrix[2, 1]    # 80 - third row, second column
#   matrix[-1, -1]  # 90 - bottom-right corner
#   matrix[1]       # [40, 50, 60] - entire second row
#   matrix[:, 0]    # [10, 40, 70] - entire first column
#
# HINGLISH:
# - Yeh kya karta hai: Multi-dimensional arrays mein elements ko
#   [row_index][column_index] ya [row_index, column_index] notation use
#   karke access karta hai
# - Kab use karein: ML mein datasets, images, aur matrices ke saath kaam
#   karte waqt specific rows, columns, ya individual cells access karne
#   ke liye essential hai
# - Do indexing notations:
#   * arr[row][col] - chained indexing (kam efficient, do operations)
#   * arr[row, col] - tuple indexing (preferred, single operation, faster)
# - Indexing rules:
#   * Pehla index = row (vertical position)
#   * Doosra index = column (horizontal position)
#   * Dono 0-based indexing use karte hain
#   * Negative indices dono dimensions ke liye kaam karte hain
# - Common use cases:
#   * Images mein specific pixels access karna: image[y, x]
#   * Specific feature values lena: dataset[sample_idx, feature_idx]
#   * Calculations ke liye matrix elements extract karna
#   * Neural network weight matrices mein weights access karna
#   * Tabular data se specific data points select karna
# - Example:
#   matrix = np.array([[10, 20, 30],
#                      [40, 50, 60],
#                      [70, 80, 90]])
#   matrix[0, 0]    # 10 - top-left corner
#   matrix[0, 2]    # 30 - pehli row, teesra column
#   matrix[2, 1]    # 80 - teesri row, doosra column
#   matrix[-1, -1]  # 90 - bottom-right corner
#   matrix[1]       # [40, 50, 60] - poori doosri row
#   matrix[:, 0]    # [10, 40, 70] - poora pehla column
# -----------------------------------------------------------------------------

index_arr = np.array([[1, 2, 3, 4],    # Row 0: index 0
                      [5, 6, 7, 8]])   # Row 1: index 1

print(index_arr[1][2])
# Output: 7
# Accessing row 1, column 2
# Better way: index_arr[1, 2] (single operation, more efficient)
# Visual representation:
# [[1, 2, 3, 4],     Row 0
#  [5, 6, 7, 8]]     Row 1 ← We want this row
#     ↑
#  Column 2 - Element is 7

# =============================================================================
# FANCY INDEXING (ADVANCED INDEXING WITH ARRAYS)
# =============================================================================
# ENGLISH:
# - What it does: Uses an array or list of indices to select multiple elements
#   at once from another array. Returns a new array with selected elements.
# - Why use it: Extremely powerful for selecting non-contiguous or specific
#   elements based on conditions, indices, or patterns. Essential for data
#   sampling, feature selection, and batch processing in ML.
# - Syntax: array[index_array] where index_array is list/array of indices
# - Key features:
#   * Can select elements in any order
#   * Can repeat indices to duplicate elements
#   * Can use with multi-dimensional arrays
#   * Always returns a COPY (not a view)
# - Common use cases:
#   * Selecting specific samples from dataset: X_train[selected_indices]
#   * Random sampling: X[np.random.choice(len(X), size=100)]
#   * Feature selection: features[:, important_feature_indices]
#   * Creating mini-batches for training
#   * Reordering data based on custom sequence
#   * Extracting elements at specific positions
# - Example:
#   data = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90])
#   
#   # Select elements at indices 0, 3, 7
#   indices = [0, 3, 7]
#   selected = data[indices]  # [10, 40, 80]
#   
#   # Select in different order
#   indices = [7, 0, 3]
#   reordered = data[indices]  # [80, 10, 40]
#   
#   # Duplicate elements
#   indices = [0, 0, 1, 1]
#   duplicated = data[indices]  # [10, 10, 20, 20]
#   
#   # Random sampling (mini-batch creation)
#   batch_size = 32
#   random_indices = np.random.choice(len(data), size=batch_size, replace=False)
#   mini_batch = data[random_indices]
#   
#   # 2D fancy indexing
#   matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
#   row_indices = [0, 2]    # Select rows 0 and 2
#   selected_rows = matrix[row_indices]  # [[1, 2, 3], [7, 8, 9]]
#
# HINGLISH:
# - Yeh kya karta hai: Indices ki array ya list use karke doosre array se
#   ek saath multiple elements select karta hai. Selected elements ka naya
#   array return karta hai.
# - Kab use karein: Conditions, indices, ya patterns ke basis par
#   non-contiguous ya specific elements select karne ke liye bahut powerful
#   hai. ML mein data sampling, feature selection, aur batch processing ke
#   liye essential hai.
# - Syntax: array[index_array] jahan index_array indices ki list/array hai
# - Key features:
#   * Kisi bhi order mein elements select kar sakte hain
#   * Elements duplicate karne ke liye indices repeat kar sakte hain
#   * Multi-dimensional arrays ke saath use kar sakte hain
#   * Hamesha COPY return karta hai (view nahi)
# - Common use cases:
#   * Dataset se specific samples select karna: X_train[selected_indices]
#   * Random sampling: X[np.random.choice(len(X), size=100)]
#   * Feature selection: features[:, important_feature_indices]
#   * Training ke liye mini-batches banana
#   * Custom sequence ke basis par data reorder karna
#   * Specific positions par elements extract karna
# - Example:
#   data = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90])
#   
#   # Indices 0, 3, 7 par elements select karo
#   indices = [0, 3, 7]
#   selected = data[indices]  # [10, 40, 80]
#   
#   # Alag order mein select karo
#   indices = [7, 0, 3]
#   reordered = data[indices]  # [80, 10, 40]
#   
#   # Elements duplicate karo
#   indices = [0, 0, 1, 1]
#   duplicated = data[indices]  # [10, 10, 20, 20]
#   
#   # Random sampling (mini-batch creation)
#   batch_size = 32
#   random_indices = np.random.choice(len(data), size=batch_size, replace=False)
#   mini_batch = data[random_indices]
#   
#   # 2D fancy indexing
#   matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
#   row_indices = [0, 2]    # Rows 0 aur 2 select karo
#   selected_rows = matrix[row_indices]  # [[1, 2, 3], [7, 8, 9]]
# -----------------------------------------------------------------------------

idx = [0, 2, 4]  # Indices to select: first, third, and fifth elements

print(arr[idx])
# Output: [1 3 5]
# Selects elements at positions 0, 2, 4 from [1, 2, 3, 4, 5]
# arr[0]=1, arr[2]=3, arr[4]=5

# =============================================================================
# BOOLEAN INDEXING (CONDITIONAL SELECTION)
# =============================================================================
# ENGLISH:
# - What it does: Uses boolean conditions to filter and select elements from
#   an array. Creates a boolean mask (True/False array) and returns elements
#   where mask is True.
# - Why use it: Most powerful feature for data filtering in ML. Essential for
#   data cleaning, outlier removal, feature engineering, and conditional
#   operations. Works like SQL WHERE clause or pandas query.
# - How it works:
#   1. Condition creates boolean array: arr > 2 → [False, False, True, True, True]
#   2. Boolean array acts as mask: True = include, False = exclude
#   3. Returns new array with only True elements
# - Key features:
#   * Can use any comparison: >, <, >=, <=, ==, !=
#   * Can combine conditions: & (and), | (or), ~ (not)
#   * Always returns a COPY (not a view)
#   * Result length depends on condition (can be 0 to original length)
# - Common use cases:
#   * Data filtering: Remove outliers, select specific ranges
#   * Feature engineering: Create binary features based on conditions
#   * Data cleaning: Remove invalid values (NaN, negative values, etc.)
#   * Conditional statistics: Mean of positive values, count of outliers
#   * Threshold-based selection: Select predictions above confidence threshold
#   * Class separation: Select samples of specific class
# - Example:
#   ages = np.array([18, 25, 32, 45, 67, 23, 51])
#   
#   # Filter adults over 30
#   adults = ages[ages > 30]  # [32, 45, 67, 51]
#   
#   # Filter specific range (20-40 years old)
#   young_adults = ages[(ages >= 20) & (ages <= 40)]  # [25, 32, 23]
#   
#   # Exclude seniors (not over 65)
#   non_seniors = ages[~(ages > 65)]  # [18, 25, 32, 45, 23, 51]
#   
#   # Multiple conditions with OR
#   extremes = ages[(ages < 25) | (ages > 60)]  # [18, 67, 23]
#   
#   # ML example: Filter predictions
#   predictions = np.array([0.2, 0.8, 0.6, 0.3, 0.9])
#   high_confidence = predictions[predictions > 0.5]  # [0.8, 0.6, 0.9]
#   
#   # Data cleaning: Remove negative values
#   data_with_errors = np.array([5, -1, 10, -3, 15, 20])
#   clean_data = data_with_errors[data_with_errors >= 0]  # [5, 10, 15, 20]
#
# HINGLISH:
# - Yeh kya karta hai: Boolean conditions use karke array se elements filter
#   aur select karta hai. Boolean mask (True/False array) banata hai aur
#   elements return karta hai jahan mask True hai.
# - Kab use karein: ML mein data filtering ke liye sabse powerful feature hai.
#   Data cleaning, outlier removal, feature engineering, aur conditional
#   operations ke liye essential hai. SQL WHERE clause ya pandas query jaisa
#   kaam karta hai.
# - Kaise kaam karta hai:
#   1. Condition boolean array banata hai: arr > 2 → [False, False, True, True, True]
#   2. Boolean array mask ki tarah kaam karta hai: True = include, False = exclude
#   3. Sirf True elements ka naya array return karta hai
# - Key features:
#   * Koi bhi comparison use kar sakte hain: >, <, >=, <=, ==, !=
#   * Conditions combine kar sakte hain: & (and), | (or), ~ (not)
#   * Hamesha COPY return karta hai (view nahi)
#   * Result ki length condition par depend karti hai (0 se original length tak)
# - Common use cases:
#   * Data filtering: Outliers remove karna, specific ranges select karna
#   * Feature engineering: Conditions ke basis par binary features banana
#   * Data cleaning: Invalid values remove karna (NaN, negative values, etc.)
#   * Conditional statistics: Positive values ka mean, outliers ki count
#   * Threshold-based selection: Confidence threshold se upar predictions select karna
#   * Class separation: Specific class ke samples select karna
# - Example:
#   ages = np.array([18, 25, 32, 45, 67, 23, 51])
#   
#   # 30 se zyada umar wale adults filter karo
#   adults = ages[ages > 30]  # [32, 45, 67, 51]
#   
#   # Specific range filter karo (20-40 saal ke log)
#   young_adults = ages[(ages >= 20) & (ages <= 40)]  # [25, 32, 23]
#   
#   # Seniors exclude karo (65 se zyada nahi)
#   non_seniors = ages[~(ages > 65)]  # [18, 25, 32, 45, 23, 51]
#   
#   # OR ke saath multiple conditions
#   extremes = ages[(ages < 25) | (ages > 60)]  # [18, 67, 23]
#   
#   # ML example: Predictions filter karo
#   predictions = np.array([0.2, 0.8, 0.6, 0.3, 0.9])
#   high_confidence = predictions[predictions > 0.5]  # [0.8, 0.6, 0.9]
#   
#   # Data cleaning: Negative values remove karo
#   data_with_errors = np.array([5, -1, 10, -3, 15, 20])
#   clean_data = data_with_errors[data_with_errors >= 0]  # [5, 10, 15, 20]
# -----------------------------------------------------------------------------

# Boolean Indexing Example 1: Greater than condition
print(arr[arr > 2])
# Output: [3 4 5]
# Process:
# 1. arr > 2 creates: [False, False, True, True, True]
# 2. Selects elements where True: [3, 4, 5]

# Boolean Indexing Example 2: Even numbers (modulo operation)
print(arr[arr % 2 == 0])
# Output: [2 4]
# Process:
# 1. arr % 2 gives remainders: [1, 0, 1, 0, 1]
# 2. arr % 2 == 0 creates: [False, True, False, True, False]
# 3. Selects even numbers: [2, 4]

# Boolean Indexing Example 3: Odd numbers (inequality)
print(arr[arr % 2 != 0])
# Output: [1 3 5]
# Process:
# 1. arr % 2 gives remainders: [1, 0, 1, 0, 1]
# 2. arr % 2 != 0 creates: [True, False, True, False, True]
# 3. Selects odd numbers: [1, 3, 5]

# =============================================================================
# ADVANCED BOOLEAN INDEXING PATTERNS
# =============================================================================
# ENGLISH:
# Combining multiple conditions (use parentheses!):
# 
# # AND condition: Both must be True
# result = arr[(arr > 2) & (arr < 5)]  # [3, 4]
# 
# # OR condition: At least one must be True
# result = arr[(arr < 2) | (arr > 4)]  # [1, 5]
# 
# # NOT condition: Inverse of condition
# result = arr[~(arr > 3)]  # [1, 2, 3] - elements NOT greater than 3
# 
# # Complex nested conditions
# result = arr[((arr > 1) & (arr < 4)) | (arr == 5)]  # [2, 3, 5]
# 
# # Using np.where for conditional replacement
# modified = np.where(arr > 3, 100, arr)  # Replace >3 with 100, keep rest
# # Result: [1, 2, 3, 100, 100]
# 
# HINGLISH:
# Multiple conditions combine karna (parentheses use karo!):
# 
# # AND condition: Dono True hone chahiye
# result = arr[(arr > 2) & (arr < 5)]  # [3, 4]
# 
# # OR condition: Kam se kam ek True hona chahiye
# result = arr[(arr < 2) | (arr > 4)]  # [1, 5]
# 
# # NOT condition: Condition ka inverse
# result = arr[~(arr > 3)]  # [1, 2, 3] - elements jo 3 se bade NAHI hain
# 
# # Complex nested conditions
# result = arr[((arr > 1) & (arr < 4)) | (arr == 5)]  # [2, 3, 5]
# 
# # Conditional replacement ke liye np.where
# modified = np.where(arr > 3, 100, arr)  # >3 ko 100 se replace karo, baaki rakho
# # Result: [1, 2, 3, 100, 100]
# =============================================================================

# =============================================================================
# PRACTICAL ML EXAMPLE: DATA FILTERING
# =============================================================================
# ENGLISH:
# Real-world scenario: Cleaning and filtering a dataset
#
# # Simulated dataset: [age, income, credit_score]
# customers = np.array([[25, 45000, 720],
#                       [35, 75000, 680],
#                       [45, -5000, 750],  # Error: negative income
#                       [52, 90000, 850],
#                       [28, 55000, 0]])   # Error: invalid credit score
# 
# # Filter 1: Remove invalid income (negatives)
# valid_income = customers[customers[:, 1] >= 0]
# 
# # Filter 2: Select only valid credit scores (300-850)
# valid_credit = customers[(customers[:, 2] >= 300) & (customers[:, 2] <= 850)]
# 
# # Filter 3: High-value customers (income > 60k AND credit > 700)
# high_value = customers[(customers[:, 1] > 60000) & (customers[:, 2] > 700)]
# 
# # Filter 4: Young professionals (age 25-35 AND income > 50k)
# young_prof = customers[(customers[:, 0] >= 25) & 
#                        (customers[:, 0] <= 35) & 
#                        (customers[:, 1] > 50000)]
#
# HINGLISH:
# Real-world scenario: Dataset ko clean aur filter karna
#
# # Simulated dataset: [age, income, credit_score]
# customers = np.array([[25, 45000, 720],
#                       [35, 75000, 680],
#                       [45, -5000, 750],  # Error: negative income
#                       [52, 90000, 850],
#                       [28, 55000, 0]])   # Error: invalid credit score
# 
# # Filter 1: Invalid income remove karo (negatives)
# valid_income = customers[customers[:, 1] >= 0]
# 
# # Filter 2: Sirf valid credit scores select karo (300-850)
# valid_credit = customers[(customers[:, 2] >= 300) & (customers[:, 2] <= 850)]
# 
# # Filter 3: High-value customers (income > 60k AUR credit > 700)
# high_value = customers[(customers[:, 1] > 60000) & (customers[:, 2] > 700)]
# 
# # Filter 4: Young professionals (age 25-35 AUR income > 50k)
# young_prof = customers[(customers[:, 0] >= 25) & 
#                        (customers[:, 0] <= 35) & 
#                        (customers[:, 1] > 50000)]
# =============================================================================

1
7
[1 3 5]
[3 4 5]
[2 4]
[1 3 5]


In [14]:
# =============================================================================
# NUMPY ARRAY SLICING - EXTRACTING SUBARRAYS
# =============================================================================

# =============================================================================
# CREATING BASE ARRAY FOR SLICING DEMONSTRATIONS
# =============================================================================
# ENGLISH:
# - What it does: Creates a 1D array with 8 sequential elements for
#   demonstrating various slicing operations
# - Why use it: Slicing is fundamental for extracting portions of data,
#   creating training/validation splits, selecting feature ranges, and
#   working with subsequences in time series or sequences
# - Note: Unlike indexing (which returns single elements), slicing returns
#   a VIEW of the original array (not a copy) for efficiency
#
# HINGLISH:
# - Yeh kya karta hai: 8 sequential elements ke saath ek 1D array banata hai
#   various slicing operations demonstrate karne ke liye
# - Kab use karein: Data ke portions extract karne, training/validation
#   splits banana, feature ranges select karne, aur time series ya sequences
#   mein subsequences ke saath kaam karne ke liye slicing fundamental hai
# - Note: Indexing ke unlike (jo single elements return karta hai), slicing
#   efficiency ke liye original array ka VIEW return karta hai (copy nahi)
# -----------------------------------------------------------------------------

arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])
# Array visualization with indices:
# Index:    0  1  2  3  4  5  6  7
# Value:    1  2  3  4  5  6  7  8
# Negative: -8 -7 -6 -5 -4 -3 -2 -1

# =============================================================================
# SLICING SYNTAX: arr[start:stop:step]
# =============================================================================
# ENGLISH:
# General syntax: array[start:stop:step]
# - start: Index where slice begins (inclusive) - default is 0
# - stop: Index where slice ends (exclusive, not included) - default is len(array)
# - step: Increment between indices (default is 1)
# 
# Key rules:
# 1. start is INCLUSIVE - element at start index IS included
# 2. stop is EXCLUSIVE - element at stop index is NOT included
# 3. Omitting values uses defaults: [:] means entire array
# 4. Negative indices count from end: -1 is last element
# 5. Slicing returns a VIEW (not copy) - modifying slice affects original
# 6. Out-of-bounds indices don't raise errors (unlike indexing)
#
# HINGLISH:
# General syntax: array[start:stop:step]
# - start: Index jahan slice shuru hota hai (inclusive) - default 0 hai
# - stop: Index jahan slice khatam hota hai (exclusive, include nahi hota) - default len(array) hai
# - step: Indices ke beech increment (default 1 hai)
# 
# Key rules:
# 1. start INCLUSIVE hai - start index par element include hota hai
# 2. stop EXCLUSIVE hai - stop index par element include NAHI hota
# 3. Values omit karne par defaults use hote hain: [:] matlab poora array
# 4. Negative indices end se count karte hain: -1 last element hai
# 5. Slicing VIEW return karta hai (copy nahi) - slice modify karne se original affect hota hai
# 6. Out-of-bounds indices error nahi raise karte (indexing ke unlike)
# -----------------------------------------------------------------------------

# =============================================================================
# SLICE 1: arr[start:stop] - Range Selection
# =============================================================================
# ENGLISH:
# - What it does: Extracts elements from start index (inclusive) to stop
#   index (exclusive). The stop element itself is NOT included.
# - Syntax: arr[start:stop]
# - Why use it: Most common slicing pattern for selecting a range of elements.
#   Essential for creating subsets, batches, or extracting specific ranges.
# - Formula: Returns elements at indices [start, start+1, ..., stop-1]
# - Length of result: stop - start elements
# - Common use cases:
#   * Creating train/test splits: X_train = X[:800], X_test = X[800:]
#   * Extracting time windows: recent_data = timeseries[-100:]
#   * Selecting feature ranges: features = data[:, 10:50]
#   * Creating mini-batches: batch = data[i:i+batch_size]
#   * Removing first/last elements: middle = arr[1:-1]
# - Example:
#   data = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90])
#   data[2:6]    # [30, 40, 50, 60] - indices 2,3,4,5 (6 excluded)
#   data[0:3]    # [10, 20, 30] - first 3 elements
#   data[5:9]    # [60, 70, 80, 90] - last 4 elements
#   data[-4:-1]  # [60, 70, 80] - 4th-last to 2nd-last (last excluded)
#   
#   # ML example: Train-test split (80-20)
#   dataset = np.arange(1000)
#   split_idx = int(0.8 * len(dataset))  # 800
#   train = dataset[:split_idx]    # First 800 samples
#   test = dataset[split_idx:]     # Last 200 samples
#
# HINGLISH:
# - Yeh kya karta hai: Start index (inclusive) se stop index (exclusive) tak
#   elements extract karta hai. Stop element khud include NAHI hota.
# - Syntax: arr[start:stop]
# - Kab use karein: Elements ki range select karne ka sabse common slicing
#   pattern. Subsets, batches banana, ya specific ranges extract karne ke
#   liye essential hai.
# - Formula: Indices [start, start+1, ..., stop-1] par elements return karta hai
# - Result ki length: stop - start elements
# - Common use cases:
#   * Train/test splits banana: X_train = X[:800], X_test = X[800:]
#   * Time windows extract karna: recent_data = timeseries[-100:]
#   * Feature ranges select karna: features = data[:, 10:50]
#   * Mini-batches banana: batch = data[i:i+batch_size]
#   * Pehle/aakhri elements remove karna: middle = arr[1:-1]
# - Example:
#   data = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90])
#   data[2:6]    # [30, 40, 50, 60] - indices 2,3,4,5 (6 excluded)
#   data[0:3]    # [10, 20, 30] - pehle 3 elements
#   data[5:9]    # [60, 70, 80, 90] - aakhri 4 elements
#   data[-4:-1]  # [60, 70, 80] - 4th-last se 2nd-last (last excluded)
#   
#   # ML example: Train-test split (80-20)
#   dataset = np.arange(1000)
#   split_idx = int(0.8 * len(dataset))  # 800
#   train = dataset[:split_idx]    # Pehle 800 samples
#   test = dataset[split_idx:]     # Aakhri 200 samples
# -----------------------------------------------------------------------------

print(arr[0:5])
# Output: [1 2 3 4 5]
# Extracts elements from index 0 to 4 (5 is excluded)
# Visual: [1, 2, 3, 4, 5, 6, 7, 8]
#          ↑           ↑
#       start=0    stop=5 (not included)
# Returns 5 elements: indices 0,1,2,3,4

# =============================================================================
# SLICE 2: arr[start:] - From Start to End
# =============================================================================
# ENGLISH:
# - What it does: Extracts all elements from start index to the end of array.
#   Omitting the stop value means "go until the end".
# - Syntax: arr[start:]
# - Why use it: Convenient shorthand for getting all remaining elements after
#   a certain point. Very common in data splitting and sequence processing.
# - Equivalent to: arr[start:len(arr)]
# - Common use cases:
#   * Getting test set after train split: X_test = X[train_size:]
#   * Removing first n elements: without_header = data[5:]
#   * Processing from middle onwards: second_half = arr[len(arr)//2:]
#   * Skipping warmup period: actual_data = timeseries[warmup_steps:]
#   * Getting tail of sequence: recent = data[-100:]
# - Example:
#   prices = np.array([100, 102, 98, 105, 110, 108, 115])
#   prices[3:]    # [105, 110, 108, 115] - from index 3 to end
#   prices[-3:]   # [108, 115] - last 3 elements (from 3rd-last to end)
#   prices[0:]    # [100, 102, 98, 105, 110, 108, 115] - entire array
#   
#   # ML example: Create validation set
#   X_train_full = np.arange(1000)
#   val_size = 200
#   X_train = X_train_full[:-val_size]  # First 800
#   X_val = X_train_full[-val_size:]    # Last 200 (from -200 to end)
#
# HINGLISH:
# - Yeh kya karta hai: Start index se array ke end tak saare elements extract
#   karta hai. Stop value omit karne ka matlab hai "end tak jao".
# - Syntax: arr[start:]
# - Kab use karein: Kisi point ke baad ke saare remaining elements lene ka
#   convenient shorthand. Data splitting aur sequence processing mein bahut
#   common hai.
# - Equivalent to: arr[start:len(arr)]
# - Common use cases:
#   * Train split ke baad test set lena: X_test = X[train_size:]
#   * Pehle n elements remove karna: without_header = data[5:]
#   * Middle se aage process karna: second_half = arr[len(arr)//2:]
#   * Warmup period skip karna: actual_data = timeseries[warmup_steps:]
#   * Sequence ki tail lena: recent = data[-100:]
# - Example:
#   prices = np.array([100, 102, 98, 105, 110, 108, 115])
#   prices[3:]    # [105, 110, 108, 115] - index 3 se end tak
#   prices[-3:]   # [108, 115] - aakhri 3 elements (3rd-last se end tak)
#   prices[0:]    # [100, 102, 98, 105, 110, 108, 115] - poora array
#   
#   # ML example: Validation set banana
#   X_train_full = np.arange(1000)
#   val_size = 200
#   X_train = X_train_full[:-val_size]  # Pehle 800
#   X_val = X_train_full[-val_size:]    # Aakhri 200 (-200 se end tak)
# -----------------------------------------------------------------------------

print(arr[1:])
# Output: [2 3 4 5 6 7 8]
# Extracts elements from index 1 to end
# Visual: [1, 2, 3, 4, 5, 6, 7, 8]
#             ↑                 ↑
#          start=1            end
# Returns 7 elements: indices 1,2,3,4,5,6,7

# =============================================================================
# SLICE 3: arr[:stop] - From Beginning to Stop
# =============================================================================
# ENGLISH:
# - What it does: Extracts elements from the beginning (index 0) up to but
#   not including the stop index. Omitting start means "start from beginning".
# - Syntax: arr[:stop]
# - Why use it: Convenient for getting first n elements. Very common for
#   creating training sets, limiting data size, or getting prefixes.
# - Equivalent to: arr[0:stop]
# - Common use cases:
#   * Getting first n samples: X_train = X[:train_size]
#   * Limiting dataset size: limited_data = large_data[:10000]
#   * Getting top-k results: top_10 = sorted_results[:10]
#   * Creating training set: train = data[:int(0.8*len(data))]
#   * Removing last elements: without_tail = arr[:-5]
# - Example:
#   scores = np.array([95, 87, 92, 78, 88, 94, 85, 90])
#   scores[:3]    # [95, 87, 92] - first 3 elements
#   scores[:5]    # [95, 87, 92, 78, 88] - first 5 elements
#   scores[:-2]   # [95, 87, 92, 78, 88, 94] - all except last 2
#   
#   # ML example: Quick prototyping with small subset
#   full_dataset = np.arange(100000)
#   quick_test = full_dataset[:1000]  # Just first 1000 for testing
#   
#   # Getting top predictions
#   probs = np.array([0.1, 0.9, 0.3, 0.8, 0.6])
#   sorted_indices = np.argsort(probs)[::-1]  # Sort descending
#   top_3_indices = sorted_indices[:3]  # Get top 3
#
# HINGLISH:
# - Yeh kya karta hai: Beginning (index 0) se lekar stop index tak (excluding)
#   elements extract karta hai. Start omit karne ka matlab "beginning se shuru".
# - Syntax: arr[:stop]
#- Kab use karein: Pehle n elements lene ka convenient tarika. Training sets
#   banane, data size limit karne, ya prefixes lene ke liye bahut common.
# - Equivalent to: arr[0:stop]
# - Common use cases:
#   * Pehle n samples lena: X_train = X[:train_size]
#   * Dataset size limit karna: limited_data = large_data[:10000]
#   * Top-k results lena: top_10 = sorted_results[:10]
#   * Training set banana: train = data[:int(0.8*len(data))]
#   * Aakhri elements remove karna: without_tail = arr[:-5]
# - Example:
#   scores = np.array([95, 87, 92, 78, 88, 94, 85, 90])
#   scores[:3]    # [95, 87, 92] - pehle 3 elements
#   scores[:5]    # [95, 87, 92, 78, 88] - pehle 5 elements
#   scores[:-2]   # [95, 87, 92, 78, 88, 94] - aakhri 2 ko chhod ke sab
#   
#   # ML example: Small subset ke saath quick prototyping
#   full_dataset = np.arange(100000)
#   quick_test = full_dataset[:1000]  # Testing ke liye sirf pehle 1000
#   
#   # Top predictions lena
#   probs = np.array([0.1, 0.9, 0.3, 0.8, 0.6])
#   sorted_indices = np.argsort(probs)[::-1]  # Descending sort
#   top_3_indices = sorted_indices[:3]  # Top 3 lo
# -----------------------------------------------------------------------------

print(arr[:5])
# Output: [1 2 3 4 5]
# Extracts elements from beginning to index 4 (5 is excluded)
# Visual: [1, 2, 3, 4, 5, 6, 7, 8]
#          ↑           ↑
#       start=0    stop=5 (not included)
# Returns 5 elements: indices 0,1,2,3,4
# Same output as arr[0:5]

# =============================================================================
# SLICE 4: arr[::step] - Every Nth Element
# =============================================================================
# ENGLISH:
# - What it does: Extracts elements at regular intervals (every step-th element)
#   from the entire array. Omitting start and stop means "entire array".
# - Syntax: arr[::step] or arr[start:stop:step]
# - Why use it: Essential for downsampling data, selecting every nth sample,
#   creating alternating patterns, or reversing arrays (step=-1).
# - Step mechanics:
#   * step > 0: Move forward through array
#   * step < 0: Move backward through array (reversal)
#   * step = 2: Every other element (0, 2, 4, 6, ...)
#   * step = 3: Every third element (0, 3, 6, 9, ...)
# - Common use cases:
#   * Downsampling time series: downsampled = signal[::10]
#   * Getting odd/even indices: even = arr[::2], odd = arr[1::2]
#   * Reversing arrays: reversed_arr = arr[::-1]
#   * Selecting alternating features: selected = features[:, ::2]
#   * Creating training batches with stride: batch = data[start::stride]
#   * Decimation in signal processing: reduced = data[::decimation_factor]
# - Example:
#   sequence = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
#   sequence[::2]    # [0, 2, 4, 6, 8] - every 2nd element (even indices)
#   sequence[1::2]   # [1, 3, 5, 7, 9] - every 2nd starting from 1 (odd indices)
#   sequence[::3]    # [0, 3, 6, 9] - every 3rd element
#   sequence[::-1]   # [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - reversed
#   sequence[::-2]   # [9, 7, 5, 3, 1] - every 2nd element, reversed
#   
#   # ML example: Downsampling high-frequency sensor data
#   sensor_data = np.sin(np.linspace(0, 10, 1000))  # 1000 samples
#   downsampled = sensor_data[::10]  # Keep every 10th sample (100 samples)
#   
#   # Creating train/val splits with alternating samples
#   indices = np.arange(1000)
#   train_indices = indices[::2]  # Even indices for training
#   val_indices = indices[1::2]   # Odd indices for validation
#
# HINGLISH:
# - Yeh kya karta hai: Poore array se regular intervals par (har step-th
#   element) elements extract karta hai. Start aur stop omit karne ka matlab
#   "poora array".
# - Syntax: arr[::step] ya arr[start:stop:step]
# - Kab use karein: Data downsample karne, har nth sample select karne,
#   alternating patterns banana, ya arrays reverse karne (step=-1) ke liye
#   essential hai.
# - Step mechanics:
#   * step > 0: Array mein aage move karo
#   * step < 0: Array mein peeche move karo (reversal)
#   * step = 2: Har doosra element (0, 2, 4, 6, ...)
#   * step = 3: Har teesra element (0, 3, 6, 9, ...)
# - Common use cases:
#   * Time series downsample karna: downsampled = signal[::10]
#   * Odd/even indices lena: even = arr[::2], odd = arr[1::2]
#   * Arrays reverse karna: reversed_arr = arr[::-1]
#   * Alternating features select karna: selected = features[:, ::2]
#   * Stride ke saath training batches banana: batch = data[start::stride]
#   * Signal processing mein decimation: reduced = data[::decimation_factor]
# - Example:
#   sequence = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
#   sequence[::2]    # [0, 2, 4, 6, 8] - har 2nd element (even indices)
#   sequence[1::2]   # [1, 3, 5, 7, 9] - 1 se shuru karke har 2nd (odd indices)
#   sequence[::3]    # [0, 3, 6, 9] - har 3rd element
#   sequence[::-1]   # [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - reversed
#   sequence[::-2]   # [9, 7, 5, 3, 1] - har 2nd element, reversed
#   
#   # ML example: High-frequency sensor data downsample karna
#   sensor_data = np.sin(np.linspace(0, 10, 1000))  # 1000 samples
#   downsampled = sensor_data[::10]  # Har 10th sample rakho (100 samples)
#   
#   # Alternating samples ke saath train/val splits banana
#   indices = np.arange(1000)
#   train_indices = indices[::2]  # Training ke liye even indices
#   val_indices = indices[1::2]   # Validation ke liye odd indices
# -----------------------------------------------------------------------------

print(arr[::2])
# Output: [1 3 5 7]
# Extracts every 2nd element (step=2) from entire array
# Visual: [1, 2, 3, 4, 5, 6, 7, 8]
#          ↑     ↑     ↑     ↑
#       idx 0   2     4     6
# Returns 4 elements: indices 0,2,4,6
# Starts at index 0, then jumps by 2 each time

# =============================================================================
# ADVANCED SLICING PATTERNS
# =============================================================================
# ENGLISH:
# Combining start, stop, and step:
# 
# # Get every 2nd element between indices 1 and 7
# arr[1:7:2]  # [2, 4, 6] - indices 1,3,5
# 
# # Get every 3rd element from start to index 8
# arr[:8:3]  # [1, 4, 7] - indices 0,3,6
# 
# # Get every 2nd element from index 2 to end
# arr[2::2]  # [3, 5, 7] - indices 2,4,6
# 
# # Reverse entire array
# arr[::-1]  # [8, 7, 6, 5, 4, 3, 2, 1]
# 
# # Reverse portion of array (indices 2 to 6)
# arr[6:2:-1]  # [7, 6, 5, 4] - reverse from index 6 to 3
# 
# # Get last 4 elements in reverse
# arr[-1:-5:-1]  # [8, 7, 6, 5]
# 
# # Copy entire array (creates actual copy, not view)
# arr_copy = arr[:]  # Full copy
# 
# HINGLISH:
# Start, stop, aur step combine karna:
# 
# # Indices 1 aur 7 ke beech har 2nd element lo
# arr[1:7:2]  # [2, 4, 6] - indices 1,3,5
# 
# # Start se index 8 tak har 3rd element lo
# arr[:8:3]  # [1, 4, 7] - indices 0,3,6
# 
# # Index 2 se end tak har 2nd element lo
# arr[2::2]  # [3, 5, 7] - indices 2,4,6
# 
# # Poora array reverse karo
# arr[::-1]  # [8, 7, 6, 5, 4, 3, 2, 1]
# 
# # Array ke portion ko reverse karo (indices 2 se 6)
# arr[6:2:-1]  # [7, 6, 5, 4] - index 6 se 3 tak reverse
# 
# # Aakhri 4 elements reverse mein lo
# arr[-1:-5:-1]  # [8, 7, 6, 5]
# 
# # Poore array ki copy banao (actual copy, view nahi)
# arr_copy = arr[:]  # Full copy
# =============================================================================

# =============================================================================
# 2D ARRAY SLICING
# =============================================================================
# ENGLISH:
# Slicing works on each dimension independently:
# 
# matrix = np.array([[1, 2, 3, 4],
#                    [5, 6, 7, 8],
#                    [9, 10, 11, 12]])
# 
# # Get first 2 rows, all columns
# matrix[:2, :]  # [[1,2,3,4], [5,6,7,8]]
# 
# # Get all rows, first 3 columns
# matrix[:, :3]  # [[1,2,3], [5,6,7], [9,10,11]]
# 
# # Get rows 1-2, columns 1-3
# matrix[1:3, 1:3]  # [[6,7], [10,11]]
# 
# # Get every other row, every other column
# matrix[::2, ::2]  # [[1,3], [9,11]]
# 
# # Reverse rows, keep columns
# matrix[::-1, :]  # [[9,10,11,12], [5,6,7,8], [1,2,3,4]]
# 
# HINGLISH:
# Har dimension par independently slicing kaam karti hai:
# 
# matrix = np.array([[1, 2, 3, 4],
#                    [5, 6, 7, 8],
#                    [9, 10, 11, 12]])
# 
# # Pehli 2 rows lo, saare columns
# matrix[:2, :]  # [[1,2,3,4], [5,6,7,8]]
# 
# # Saari rows lo, pehle 3 columns
# matrix[:, :3]  # [[1,2,3], [5,6,7], [9,10,11]]
# 
# # Rows 1-2 lo, columns 1-3
# matrix[1:3, 1:3]  # [[6,7], [10,11]]
# 
# # Har doosri row, har doosra column
# matrix[::2, ::2]  # [[1,3], [9,11]]
# 
# # Rows reverse karo, columns same rakho
# matrix[::-1, :]  # [[9,10,11,12], [5,6,7,8], [1,2,3,4]]
# =============================================================================

# =============================================================================
# VIEW vs COPY - CRITICAL CONCEPT
# =============================================================================
# ENGLISH:
# Slicing creates VIEWS (not copies) - modifying slice affects original!
# 
# original = np.array([1, 2, 3, 4, 5])
# slice_view = original[1:4]  # [2, 3, 4] - this is a VIEW
# slice_view[0] = 999  # Modify the slice
# print(original)  # [1, 999, 3, 4, 5] - ORIGINAL CHANGED!
# 
# # To create independent copy, use .copy()
# original = np.array([1, 2, 3, 4, 5])
# slice_copy = original[1:4].copy()  # [2, 3, 4] - this is a COPY
# slice_copy[0] = 999  # Modify the copy
# print(original)  # [1, 2, 3, 4, 5] - original UNCHANGED
# 
# When to use copy():
# - When you need to modify slice without affecting original
# - When storing slices for later use
# - When passing slices to functions that might modify them
# 
# HINGLISH:
# Slicing VIEWS banata hai (copies nahi) - slice modify karne se original affect hota hai!
# 
# original = np.array([1, 2, 3, 4, 5])
# slice_view = original[1:4]  # [2, 3, 4] - yeh ek VIEW hai
# slice_view[0] = 999  # Slice ko modify karo
# print(original)  # [1, 999, 3, 4, 5] - ORIGINAL BADAL GAYA!
# 
# # Independent copy banane ke liye .copy() use karo
# original = np.array([1, 2, 3, 4, 5])
# slice_copy = original[1:4].copy()  # [2, 3, 4] - yeh ek COPY hai
# slice_copy[0] = 999  # Copy ko modify karo
# print(original)  # [1, 2, 3, 4, 5] - original UNCHANGED raha
# 
# copy() kab use karein:
# - Jab aapko slice modify karna ho bina original ko affect kiye
# - Jab slices ko baad mein use karne ke liye store karna ho
# - Jab functions ko slices pass karna ho jo unhe modify kar sakte hain
# =============================================================================

# =============================================================================
# PRACTICAL ML EXAMPLE: TRAIN-VAL-TEST SPLIT
# =============================================================================
# ENGLISH:
# Real-world scenario: Splitting dataset for ML (60% train, 20% val, 20% test)
#
# dataset = np.arange(1000)  # Simulated dataset with 1000 samples
# 
# # Calculate split indices
# train_end = int(0.6 * len(dataset))  # 600
# val_end = int(0.8 * len(dataset))    # 800
# 
# # Create splits using slicing
# train_data = dataset[:train_end]      # [0:600] - first 600 samples
# val_data = dataset[train_end:val_end] # [600:800] - next 200 samples
# test_data = dataset[val_end:]         # [800:] - last 200 samples
# 
# print(f"Train: {len(train_data)} samples")  # 600
# print(f"Val: {len(val_data)} samples")      # 200
# print(f"Test: {len(test_data)} samples")    # 200
#
# HINGLISH:
# Real-world scenario: ML ke liye dataset split karna (60% train, 20% val, 20% test)
#
# dataset = np.arange(1000)  # 1000 samples ke saath simulated dataset
# 
# # Split indices calculate karo
# train_end = int(0.6 * len(dataset))  # 600
# val_end = int(0.8 * len(dataset))    # 800
# 
# # Slicing use karke splits banao
# train_data = dataset[:train_end]      # [0:600] - pehle 600 samples
# val_data = dataset[train_end:val_end] # [600:800] - agle 200 samples
# test_data = dataset[val_end:]         # [800:] - aakhri 200 samples
# 
# print(f"Train: {len(train_data)} samples")  # 600
# print(f"Val: {len(val_data)} samples")      # 200
# print(f"Test: {len(test_data)} samples")    # 200
# =============================================================================

[1 2 3 4 5]
[2 3 4 5 6 7 8]
[1 2 3 4 5]
[1 3 5 7]


In [16]:
# =============================================================================
# MULTI-DIMENSIONAL ARRAYS AND AXIS-BASED OPERATIONS
# =============================================================================

# =============================================================================
# CREATING A 2D ARRAY (MATRIX) FOR AGGREGATION OPERATIONS
# =============================================================================
# ENGLISH:
# - What it does: Creates a 2D array (matrix) with 3 rows and 4 columns to
#   demonstrate axis-based operations and aggregations
# - Why use it: Understanding axis operations is CRUCIAL in ML for computing
#   statistics across different dimensions (e.g., mean per feature, sum per
#   sample, batch normalization, etc.)
# - Structure: 3×4 matrix = 3 rows (samples/observations), 4 columns (features)
# - Common use cases:
#   * Representing datasets: rows=samples, columns=features
#   * Batch operations: computing statistics across batch dimension
#   * Feature engineering: aggregating values across different axes
#   * Image processing: operations across height, width, or channels
#
# HINGLISH:
# - Yeh kya karta hai: Aggregation operations demonstrate karne ke liye 3 rows
#   aur 4 columns ke saath ek 2D array (matrix) banata hai
# - Kab use karein: ML mein axis operations samajhna BAHUT ZAROORI hai kyunki
#   different dimensions across statistics compute karna padta hai (jaise har
#   feature ka mean, har sample ka sum, batch normalization, etc.)
# - Structure: 3×4 matrix = 3 rows (samples/observations), 4 columns (features)
# - Common use cases:
#   * Datasets represent karna: rows=samples, columns=features
#   * Batch operations: batch dimension across statistics compute karna
#   * Feature engineering: alag axes across values aggregate karna
#   * Image processing: height, width, ya channels across operations
# -----------------------------------------------------------------------------

arr2D = np.array([[1, 22, 33, 44],     # Row 0 (Sample 0)
                  [22, 33, 44, 55],    # Row 1 (Sample 1)
                  [66, 77, 88, 99]])   # Row 2 (Sample 2)
#                  ↑   ↑   ↑   ↑
#               Col0 Col1 Col2 Col3
#             (Feature 0-3)

print(arr2D)
# Output:
# [[ 1 22 33 44]
#  [22 33 44 55]
#  [66 77 88 99]]
# 
# Matrix visualization:
#        Column 0  Column 1  Column 2  Column 3
# Row 0:    1        22        33        44
# Row 1:   22        33        44        55
# Row 2:   66        77        88        99

# =============================================================================
# OPERATION 1: np.sum() - TOTAL SUM (NO AXIS SPECIFIED)
# =============================================================================
# ENGLISH:
# - What it does: Computes the sum of ALL elements in the array, collapsing
#   all dimensions into a single scalar value
# - Syntax: np.sum(array) or array.sum()
# - Why use it: Useful for getting total of all values, computing loss
#   functions, or checking data magnitude
# - When axis is NOT specified:
#   * Treats entire array as flat/1D
#   * Returns single number (scalar)
#   * Sums every single element regardless of position
# - Formula: Returns a + b + c + ... for all elements
# - Common use cases:
#   * Computing total loss across all samples and features
#   * Getting overall dataset statistics
#   * Calculating total pixel intensity in images
#   * Verifying data preprocessing (e.g., ensuring probabilities sum to N)
#   * Computing total weights in neural networks
# - Example:
#   matrix = np.array([[1, 2, 3],
#                      [4, 5, 6]])
#   total = np.sum(matrix)  # 1+2+3+4+5+6 = 21
#   
#   # ML example: Total loss
#   batch_losses = np.array([[0.5, 0.3],
#                            [0.4, 0.2],
#                            [0.6, 0.1]])
#   total_loss = np.sum(batch_losses)  # Sum of all individual losses
#   
#   # Image example: Total pixel intensity
#   image = np.random.randint(0, 255, (100, 100))  # 100x100 grayscale
#   total_intensity = np.sum(image)  # Sum of all pixel values
#
# HINGLISH:
# - Yeh kya karta hai: Array ke SAARE elements ka sum compute karta hai, saare
#   dimensions ko ek single scalar value mein collapse karke
# - Syntax: np.sum(array) ya array.sum()
# - Kab use karein: Saari values ka total lene, loss functions compute karne,
#   ya data magnitude check karne ke liye useful hai
# - Jab axis specify NAHI kiya:
#   * Poore array ko flat/1D ki tarah treat karta hai
#   * Single number (scalar) return karta hai
#   * Position se koi fark nahi, har element ko sum karta hai
# - Formula: Saare elements ke liye a + b + c + ... return karta hai
# - Common use cases:
#   * Saare samples aur features across total loss compute karna
#   * Overall dataset statistics lena
#   * Images mein total pixel intensity calculate karna
#   * Data preprocessing verify karna (jaise probabilities ka sum N hai)
#   * Neural networks mein total weights compute karna
# - Example:
#   matrix = np.array([[1, 2, 3],
#                      [4, 5, 6]])
#   total = np.sum(matrix)  # 1+2+3+4+5+6 = 21
#   
#   # ML example: Total loss
#   batch_losses = np.array([[0.5, 0.3],
#                            [0.4, 0.2],
#                            [0.6, 0.1]])
#   total_loss = np.sum(batch_losses)  # Saare individual losses ka sum
#   
#   # Image example: Total pixel intensity
#   image = np.random.randint(0, 255, (100, 100))  # 100x100 grayscale
#   total_intensity = np.sum(image)  # Saare pixel values ka sum
# -----------------------------------------------------------------------------

print(np.sum(arr2D))
# Output: 589
# Calculation: 1+22+33+44+22+33+44+55+66+77+88+99 = 589
# Sums every single element in the matrix, regardless of row or column

# =============================================================================
# UNDERSTANDING AXES IN NUMPY - CRITICAL CONCEPT
# =============================================================================
# ENGLISH:
# AXIS CONCEPT - THE MOST IMPORTANT CONCEPT IN NUMPY:
# 
# In a 2D array:
# - axis=0 means "along rows" (vertically, top to bottom)
# - axis=1 means "along columns" (horizontally, left to right)
# 
# THINK OF IT AS: "Which dimension gets COLLAPSED?"
# - axis=0: Rows collapse → Result has same number of columns
# - axis=1: Columns collapse → Result has same number of rows
# 
# Visual representation for arr2D with shape (3, 4):
# 
#        axis=1 →
#        (along columns, horizontal)
#        Column 0  Column 1  Column 2  Column 3
#    ↓     [1        22        33        44]      Row 0
# axis=0   [22       33        44        55]      Row 1
# (along   [66       77        88        99]      Row 2
#  rows,
# vertical)
# 
# When you do np.sum(arr2D, axis=0):
# - You're summing DOWN the rows (collapsing row dimension)
# - For each column, add all row values
# - Result shape: (4,) - one sum per column
# - Column 0: 1+22+66=89, Column 1: 22+33+77=132, etc.
# 
# When you do np.sum(arr2D, axis=1):
# - You're summing ACROSS the columns (collapsing column dimension)
# - For each row, add all column values
# - Result shape: (3,) - one sum per row
# - Row 0: 1+22+33+44=100, Row 1: 22+33+44+55=154, etc.
# 
# MEMORY TRICK:
# axis=0 → "0 is vertical like standing | " → operates on ROWS (vertically)
# axis=1 → "1 is horizontal like lying —" → operates on COLUMNS (horizontally)
# 
# For 3D arrays (like RGB images):
# - axis=0: Along depth/batch (first dimension)
# - axis=1: Along height/rows (second dimension)  
# - axis=2: Along width/columns (third dimension)
#
# HINGLISH:
# AXIS CONCEPT - NUMPY MEIN SABSE IMPORTANT CONCEPT:
# 
# 2D array mein:
# - axis=0 matlab "rows ke along" (vertically, upar se neeche)
# - axis=1 matlab "columns ke along" (horizontally, left se right)
# 
# AISE SOCHO: "Kaun sa dimension COLLAPSE hota hai?"
# - axis=0: Rows collapse → Result mein same number of columns
# - axis=1: Columns collapse → Result mein same number of rows
# 
# arr2D ke liye visual representation with shape (3, 4):
# 
#        axis=1 →
#        (columns ke along, horizontal)
#        Column 0  Column 1  Column 2  Column 3
#    ↓     [1        22        33        44]      Row 0
# axis=0   [22       33        44        55]      Row 1
# (rows    [66       77        88        99]      Row 2
#  ke along,
# vertical)
# 
# Jab aap np.sum(arr2D, axis=0) karte hain:
# - Aap rows ko NEECHE sum kar rahe hain (row dimension collapse)
# - Har column ke liye, saari row values add karo
# - Result shape: (4,) - har column ka ek sum
# - Column 0: 1+22+66=89, Column 1: 22+33+77=132, etc.
# 
# Jab aap np.sum(arr2D, axis=1) karte hain:
# - Aap columns ko ACROSS sum kar rahe hain (column dimension collapse)
# - Har row ke liye, saari column values add karo
# - Result shape: (3,) - har row ka ek sum
# - Row 0: 1+22+33+44=100, Row 1: 22+33+44+55=154, etc.
# 
# YAAD RAKHNE KA TARIKA:
# axis=0 → "0 khada hai jaise | " → ROWS par operate (vertically)
# axis=1 → "1 leta hua hai jaise —" → COLUMNS par operate (horizontally)
# 
# 3D arrays ke liye (jaise RGB images):
# - axis=0: Depth/batch ke along (pehla dimension)
# - axis=1: Height/rows ke along (doosra dimension)  
# - axis=2: Width/columns ke along (teesra dimension)
# -----------------------------------------------------------------------------

# =============================================================================
# OPERATION 2: np.sum() WITH axis=0 - SUM ALONG ROWS (COLUMN-WISE SUM)
# =============================================================================
# ENGLISH:
# - What it does: Sums values DOWN each column, collapsing the row dimension.
#   Results in one sum per column.
# - Syntax: np.sum(array, axis=0) or array.sum(axis=0)
# - Why use it: Essential for computing per-feature statistics in ML datasets
#   where rows are samples and columns are features
# - axis=0 behavior:
#   * Operates along axis 0 (rows) - goes DOWN vertically
#   * Collapses row dimension: (3, 4) → (4,)
#   * Returns array with one value per column
#   * Each result is sum of all rows for that column
# - Common use cases:
#   * Computing feature sums across all samples
#   * Batch statistics: sum across batch dimension
#   * Image processing: sum across height (all rows)
#   * Column-wise aggregation in datasets
#   * Computing totals per feature for normalization
# - Example:
#   data = np.array([[10, 20, 30],
#                    [40, 50, 60],
#                    [70, 80, 90]])
#   column_sums = np.sum(data, axis=0)
#   # [120, 150, 180] = [10+40+70, 20+50+80, 30+60+90]
#   
#   # ML example: Feature sums across samples
#   X = np.array([[1.5, 2.3, 3.1],   # Sample 1
#                 [2.1, 1.9, 2.8],   # Sample 2
#                 [1.8, 2.5, 3.2]])  # Sample 3
#   feature_sums = np.sum(X, axis=0)
#   # [5.4, 6.7, 9.1] - sum for each of 3 features
#   feature_means = feature_sums / X.shape[0]  # Mean per feature
#   
#   # Batch processing: Sum across batch
#   batch_predictions = np.array([[0.1, 0.9],
#                                 [0.3, 0.7],
#                                 [0.2, 0.8]])  # 3 samples, 2 classes
#   class_totals = np.sum(batch_predictions, axis=0)
#   # [0.6, 2.4] - total probability for each class across batch
#
# HINGLISH:
# - Yeh kya karta hai: Har column ko NEECHE sum karta hai, row dimension
#   collapse karke. Har column ke liye ek sum milta hai.
# - Syntax: np.sum(array, axis=0) ya array.sum(axis=0)
# - Kab use karein: ML datasets mein per-feature statistics compute karne ke
#   liye essential hai jahan rows samples hain aur columns features hain
# - axis=0 behavior:
#   * Axis 0 (rows) ke along operate karta hai - vertically NEECHE jaata hai
#   * Row dimension collapse: (3, 4) → (4,)
#   * Har column ke liye ek value ka array return karta hai
#   * Har result us column ke saare rows ka sum hai
# - Common use cases:
#   * Saare samples across feature sums compute karna
#   * Batch statistics: batch dimension across sum
#   * Image processing: height across sum (saari rows)
#   * Datasets mein column-wise aggregation
#   * Normalization ke liye har feature ka total compute karna
# - Example:
#   data = np.array([[10, 20, 30],
#                    [40, 50, 60],
#                    [70, 80, 90]])
#   column_sums = np.sum(data, axis=0)
#   # [120, 150, 180] = [10+40+70, 20+50+80, 30+60+90]
#   
#   # ML example: Samples across feature sums
#   X = np.array([[1.5, 2.3, 3.1],   # Sample 1
#                 [2.1, 1.9, 2.8],   # Sample 2
#                 [1.8, 2.5, 3.2]])  # Sample 3
#   feature_sums = np.sum(X, axis=0)
#   # [5.4, 6.7, 9.1] - 3 features mein se har ek ka sum
#   feature_means = feature_sums / X.shape[0]  # Har feature ka mean
#   
#   # Batch processing: Batch across sum
#   batch_predictions = np.array([[0.1, 0.9],
#                                 [0.3, 0.7],
#                                 [0.2, 0.8]])  # 3 samples, 2 classes
#   class_totals = np.sum(batch_predictions, axis=0)
#   # [0.6, 2.4] - batch across har class ka total probability
# -----------------------------------------------------------------------------

sum_of_columns = np.sum(arr2D, axis=0)
print(sum_of_columns)
# Output: [ 89 132 165 198]
# 
# Calculation (summing DOWN each column):
# Column 0: 1 + 22 + 66 = 89
# Column 1: 22 + 33 + 77 = 132
# Column 2: 33 + 44 + 88 = 165
# Column 3: 44 + 55 + 99 = 198
# 
# Visual:
#    Col0  Col1  Col2  Col3
#     ↓     ↓     ↓     ↓
#   [ 1    22    33    44]
#   [22    33    44    55]
#   [66    77    88    99]
#    ↓     ↓     ↓     ↓
#   [89   132   165   198]
# 
# Result shape: (4,) - one sum for each of 4 columns
# Original shape: (3, 4) → axis=0 collapsed → (4,)

# =============================================================================
# OPERATION 3: np.sum() WITH axis=1 - SUM ALONG COLUMNS (ROW-WISE SUM)
# =============================================================================
# ENGLISH:
# - What it does: Sums values ACROSS each row, collapsing the column dimension.
#   Results in one sum per row.
# - Syntax: np.sum(array, axis=1) or array.sum(axis=1)
# - Why use it: Essential for computing per-sample statistics in ML datasets
#   where rows are samples and columns are features
# - axis=1 behavior:
#   * Operates along axis 1 (columns) - goes ACROSS horizontally
#   * Collapses column dimension: (3, 4) → (3,)
#   * Returns array with one value per row
#   * Each result is sum of all columns for that row
# - Common use cases:
#   * Computing total/sum per sample across all features
#   * Row-wise aggregation in datasets
#   * Calculating total score per student (rows=students, cols=subjects)
#   * Sum of predictions per sample across classes
#   * Image processing: sum across width (all columns)
# - Example:
#   data = np.array([[10, 20, 30],
#                    [40, 50, 60],
#                    [70, 80, 90]])
#   row_sums = np.sum(data, axis=1)
#   # [60, 150, 240] = [10+20+30, 40+50+60, 70+80+90]
#   
#   # ML example: Total feature value per sample
#   X = np.array([[1.5, 2.3, 3.1],   # Sample 1
#                 [2.1, 1.9, 2.8],   # Sample 2
#                 [1.8, 2.5, 3.2]])  # Sample 3
#   sample_totals = np.sum(X, axis=1)
#   # [6.9, 6.8, 7.5] - total for each sample across all features
#   
#   # Probability normalization
#   logits = np.array([[2.0, 1.0, 0.5],
#                      [1.5, 2.5, 1.0]])  # 2 samples, 3 classes
#   exp_logits = np.exp(logits)
#   row_sums = np.sum(exp_logits, axis=1, keepdims=True)
#   probabilities = exp_logits / row_sums  # Softmax normalization
#   
#   # Student scores: Total per student
#   scores = np.array([[85, 90, 78],   # Student 1: Math, Science, English
#                      [92, 88, 95],   # Student 2
#                      [78, 85, 82]])  # Student 3
#   total_scores = np.sum(scores, axis=1)
#   # [253, 275, 245] - each student's total score
#
# HINGLISH:
# - Yeh kya karta hai: Har row ko ACROSS sum karta hai, column dimension
#   collapse karke. Har row ke liye ek sum milta hai.
# - Syntax: np.sum(array, axis=1) ya array.sum(axis=1)
# - Kab use karein: ML datasets mein per-sample statistics compute karne ke
#   liye essential hai jahan rows samples hain aur columns features hain
# - axis=1 behavior:
#   * Axis 1 (columns) ke along operate karta hai - horizontally ACROSS jaata hai
#   * Column dimension collapse: (3, 4) → (3,)
#   * Har row ke liye ek value ka array return karta hai
#   * Har result us row ke saare columns ka sum hai
# - Common use cases:
#   * Saare features across har sample ka total/sum compute karna
#   * Datasets mein row-wise aggregation
#   * Har student ka total score calculate karna (rows=students, cols=subjects)
#   * Classes across har sample ke predictions ka sum
#   * Image processing: width across sum (saare columns)
# - Example:
#   data = np.array([[10, 20, 30],
#                    [40, 50, 60],
#                    [70, 80, 90]])
#   row_sums = np.sum(data, axis=1)
#   # [60, 150, 240] = [10+20+30, 40+50+60, 70+80+90]
#   
#   # ML example: Har sample ka total feature value
#   X = np.array([[1.5, 2.3, 3.1],   # Sample 1
#                 [2.1, 1.9, 2.8],   # Sample 2
#                 [1.8, 2.5, 3.2]])  # Sample 3
#   sample_totals = np.sum(X, axis=1)
#   # [6.9, 6.8, 7.5] - saare features across har sample ka total
#   
#   # Probability normalization
#   logits = np.array([[2.0, 1.0, 0.5],
#                      [1.5, 2.5, 1.0]])  # 2 samples, 3 classes
#   exp_logits = np.exp(logits)
#   row_sums = np.sum(exp_logits, axis=1, keepdims=True)
#   probabilities = exp_logits / row_sums  # Softmax normalization
#   
#   # Student scores: Har student ka total
#   scores = np.array([[85, 90, 78],   # Student 1: Math, Science, English
#                      [92, 88, 95],   # Student 2
#                      [78, 85, 82]])  # Student 3
#   total_scores = np.sum(scores, axis=1)
#   # [253, 275, 245] - har student ka total score
# -----------------------------------------------------------------------------

sum_of_rows = np.sum(arr2D, axis=1)
print(sum_of_rows)
# Output: [100 154 330]
# 
# Calculation (summing ACROSS each row):
# Row 0: 1 + 22 + 33 + 44 = 100
# Row 1: 22 + 33 + 44 + 55 = 154
# Row 2: 66 + 77 + 88 + 99 = 330
# 
# Visual:
#   Row 0: [ 1  22  33  44] → 100
#           ←───────────→
#   Row 1: [22  33  44  55] → 154
#           ←───────────→
#   Row 2: [66  77  88  99] → 330
#           ←───────────→
# 
# Result shape: (3,) - one sum for each of 3 rows
# Original shape: (3, 4) → axis=1 collapsed → (3,)

# =============================================================================
# OTHER COMMON AGGREGATION FUNCTIONS WITH AXIS
# =============================================================================
# ENGLISH:
# All these functions work the same way with axis parameter:
# 
# 1. np.mean(array, axis=None/0/1) - Average
#    mean_cols = np.mean(arr2D, axis=0)  # Mean of each column
#    mean_rows = np.mean(arr2D, axis=1)  # Mean of each row
# 
# 2. np.max(array, axis=None/0/1) - Maximum value
#    max_cols = np.max(arr2D, axis=0)  # Max in each column
#    max_rows = np.max(arr2D, axis=1)  # Max in each row
# 
# 3. np.min(array, axis=None/0/1) - Minimum value
#    min_cols = np.min(arr2D, axis=0)  # Min in each column
#    min_rows = np.min(arr2D, axis=1)  # Min in each row
# 
# 4. np.std(array, axis=None/0/1) - Standard deviation
#    std_cols = np.std(arr2D, axis=0)  # Std dev of each column (feature)
#    std_rows = np.std(arr2D, axis=1)  # Std dev of each row (sample)
# 
# 5. np.var(array, axis=None/0/1) - Variance
#    var_cols = np.var(arr2D, axis=0)  # Variance of each column
# 
# 6. np.median(array, axis=None/0/1) - Median value
#    median_cols = np.median(arr2D, axis=0)  # Median of each column
# 
# 7. np.argmax(array, axis=None/0/1) - Index of maximum
#    argmax_cols = np.argmax(arr2D, axis=0)  # Row index of max in each col
#    argmax_rows = np.argmax(arr2D, axis=1)  # Col index of max in each row
# 
# 8. np.argmin(array, axis=None/0/1) - Index of minimum
#    argmin_rows = np.argmin(arr2D, axis=1)  # Col index of min in each row
# 
# 9. np.prod(array, axis=None/0/1) - Product of elements
#    prod_rows = np.prod(arr2D, axis=1)  # Product across each row
# 
# 10. np.cumsum(array, axis=None/0/1) - Cumulative sum
#     cumsum_cols = np.cumsum(arr2D, axis=0)  # Running sum down columns
#
# HINGLISH:
# Yeh saare functions axis parameter ke saath same tarah kaam karte hain:
# 
# 1. np.mean(array, axis=None/0/1) - Average
#    mean_cols = np.mean(arr2D, axis=0)  # Har column ka mean
#    mean_rows = np.mean(arr2D, axis=1)  # Har row ka mean
# 
# 2. np.max(array, axis=None/0/1) - Maximum value
#    max_cols = np.max(arr2D, axis=0)  # Har column mein max
#    max_rows = np.max(arr2D, axis=1)  # Har row mein max
# 
# 3. np.min(array, axis=None/0/1) - Minimum value
#    min_cols = np.min(arr2D, axis=0)  # Har column mein min
#    min_rows = np.min(arr2D, axis=1)  # Har row mein min
# 
# 4. np.std(array, axis=None/0/1) - Standard deviation
#    std_cols = np.std(arr2D, axis=0)  # Har column (feature) ka std dev
#    std_rows = np.std(arr2D, axis=1)  # Har row (sample) ka std dev
# 
# 5. np.var(array, axis=None/0/1) - Variance
#    var_cols = np.var(arr2D, axis=0)  # Har column ka variance
# 
# 6. np.median(array, axis=None/0/1) - Median value
#    median_cols = np.median(arr2D, axis=0)  # Har column ka median
# 
# 7. np.argmax(array, axis=None/0/1) - Maximum ka index
#    argmax_cols = np.argmax(arr2D, axis=0)  # Har col mein max ka row index
#    argmax_rows = np.argmax(arr2D, axis=1)  # Har row mein max ka col index
# 
# 8. np.argmin(array, axis=None/0/1) - Minimum ka index
#    argmin_rows = np.argmin(arr2D, axis=1)  # Har row mein min ka col index
# 
# 9. np.prod(array, axis=None/0/1) - Elements ka product
#    prod_rows = np.prod(arr2D, axis=1)  # Har row across product
# 
# 10. np.cumsum(array, axis=None/0/1) - Cumulative sum
#     cumsum_cols = np.cumsum(arr2D, axis=0)  # Columns neeche running sum
# =============================================================================

# =============================================================================
# PRACTICAL ML EXAMPLE: FEATURE NORMALIZATION
# =============================================================================
# ENGLISH:
# Real-world scenario: Normalizing features (Z-score normalization)
#
# # Dataset: 4 samples, 3 features
# X = np.array([[1.0, 2.0, 3.0],
#               [4.0, 5.0, 6.0],
#               [7.0, 8.0, 9.0],
#               [10.0, 11.0, 12.0]])
# 
# # Compute mean and std for each feature (axis=0)
# feature_means = np.mean(X, axis=0)  # [5.5, 6.5, 7.5]
# feature_stds = np.std(X, axis=0)    # [3.416, 3.416, 3.416]
# 
# # Normalize: (X - mean) / std for each feature
# X_normalized = (X - feature_means) / feature_stds
# 
# # Verify normalization worked
# print(np.mean(X_normalized, axis=0))  # ~[0, 0, 0] - mean centered
# print(np.std(X_normalized, axis=0))   # ~[1, 1, 1] - unit variance
#
# HINGLISH:
# Real-world scenario: Features normalize karna (Z-score normalization)
#
# # Dataset: 4 samples, 3 features
# X = np.array([[1.0, 2.0, 3.0],
#               [4.0, 5.0, 6.0],
#               [7.0, 8.0, 9.0],
#               [10.0, 11.0, 12.0]])
# 
# # Har feature ke liye mean aur std compute karo (axis=0)
# feature_means = np.mean(X, axis=0)  # [5.5, 6.5, 7.5]
# feature_stds = np.std(X, axis=0)    # [3.416, 3.416, 3.416]
# 
# # Normalize karo: har feature ke liye (X - mean) / std
# X_normalized = (X - feature_means) / feature_stds
# 
# # Verify karo ki normalization kaam kiya
# print(np.mean(X_normalized, axis=0))  # ~[0, 0, 0] - mean centered
# print(np.std(X_normalized, axis=0))   # ~[1, 1, 1] - unit variance
# =============================================================================

[[ 1 22 33 44]
 [22 33 44 55]
 [66 77 88 99]]
584
[ 89 132 165 198]
[100 154 330]


In [21]:
# =============================================================================
# 3D ARRAYS (TENSORS) - MULTI-DIMENSIONAL DATA STRUCTURES
# =============================================================================

# =============================================================================
# CREATING A 3D ARRAY (TENSOR)
# =============================================================================
# ENGLISH:
# - What it does: Creates a 3-dimensional array (tensor) with shape (2, 3, 3).
#   This is essentially a stack of 2D matrices.
# - Why use it: 3D arrays are FUNDAMENTAL in deep learning for representing:
#   * RGB images: (height, width, channels) - e.g., (224, 224, 3)
#   * Video frames: (frames, height, width) or (frames, height, width, channels)
#   * Batches of data: (batch_size, samples, features)
#   * Convolutional layer outputs: (batch, height, width, filters)
#   * Time series batches: (batch, timesteps, features)
# - Structure breakdown:
#   * First dimension (axis 0): "depth" or "layers" - 2 matrices stacked
#   * Second dimension (axis 1): rows within each matrix - 3 rows
#   * Third dimension (axis 2): columns within each matrix - 3 columns
# - Shape (2, 3, 3) means:
#   * 2 "layers" or "matrices" or "channels"
#   * Each matrix has 3 rows
#   * Each matrix has 3 columns
#   * Total elements: 2 × 3 × 3 = 18 elements
# - Think of it as:
#   * A box containing 2 sheets of paper (layers/matrices)
#   * Each sheet has a 3×3 grid of numbers
#   * You need 3 indices to locate any number: [which_layer, which_row, which_column]
# - Common use cases:
#   * Color images: arr[height, width, RGB_channel]
#   * Batch of grayscale images: arr[image_num, height, width]
#   * Video data: arr[frame, height, width]
#   * CNN feature maps: arr[batch, feature_map_h, feature_map_w]
#   * Sequence batches: arr[batch, sequence_length, feature_dim]
# - Example:
#   # RGB image (100×100 pixels, 3 color channels)
#   image = np.zeros((100, 100, 3))  # shape: (100, 100, 3)
#   # Accessing pixel at row 50, col 30, red channel
#   red_value = image[50, 30, 0]
#   
#   # Batch of 32 grayscale images (28×28 each)
#   batch = np.zeros((32, 28, 28))  # shape: (32, 28, 28)
#   # Accessing 5th image in batch
#   fifth_image = batch[4, :, :]  # shape: (28, 28)
#   
#   # Time series: 64 samples, each with 100 timesteps, 10 features
#   timeseries = np.zeros((64, 100, 10))  # shape: (64, 100, 10)
#   # Accessing first sample's data at timestep 50
#   features_at_t50 = timeseries[0, 50, :]  # shape: (10,)
#
# HINGLISH:
# - Yeh kya karta hai: Shape (2, 3, 3) ke saath ek 3-dimensional array (tensor)
#   banata hai. Yeh basically 2D matrices ka stack hai.
# - Kab use karein: 3D arrays deep learning mein represent karne ke liye
#   FUNDAMENTAL hain:
#   * RGB images: (height, width, channels) - jaise (224, 224, 3)
#   * Video frames: (frames, height, width) ya (frames, height, width, channels)
#   * Data ke batches: (batch_size, samples, features)
#   * Convolutional layer outputs: (batch, height, width, filters)
#   * Time series batches: (batch, timesteps, features)
# - Structure breakdown:
#   * Pehla dimension (axis 0): "depth" ya "layers" - 2 matrices stacked
#   * Doosra dimension (axis 1): har matrix ke andar rows - 3 rows
#   * Teesra dimension (axis 2): har matrix ke andar columns - 3 columns
# - Shape (2, 3, 3) ka matlab:
#   * 2 "layers" ya "matrices" ya "channels"
#   * Har matrix mein 3 rows hain
#   * Har matrix mein 3 columns hain
#   * Total elements: 2 × 3 × 3 = 18 elements
# - Aise socho:
#   * Ek box jismein 2 sheets of paper hain (layers/matrices)
#   * Har sheet par 3×3 grid of numbers hai
#   * Kisi bhi number ko locate karne ke liye 3 indices chahiye: [kaun_si_layer, kaun_si_row, kaun_sa_column]
# - Common use cases:
#   * Color images: arr[height, width, RGB_channel]
#   * Grayscale images ka batch: arr[image_num, height, width]
#   * Video data: arr[frame, height, width]
#   * CNN feature maps: arr[batch, feature_map_h, feature_map_w]
#   * Sequence batches: arr[batch, sequence_length, feature_dim]
# - Example:
#   # RGB image (100×100 pixels, 3 color channels)
#   image = np.zeros((100, 100, 3))  # shape: (100, 100, 3)
#   # Row 50, col 30, red channel par pixel access karna
#   red_value = image[50, 30, 0]
#   
#   # 32 grayscale images ka batch (har ek 28×28)
#   batch = np.zeros((32, 28, 28))  # shape: (32, 28, 28)
#   # Batch mein 5th image access karna
#   fifth_image = batch[4, :, :]  # shape: (28, 28)
#   
#   # Time series: 64 samples, har ek mein 100 timesteps, 10 features
#   timeseries = np.zeros((64, 100, 10))  # shape: (64, 100, 10)
#   # Pehle sample ka data timestep 50 par access karna
#   features_at_t50 = timeseries[0, 50, :]  # shape: (10,)
# -----------------------------------------------------------------------------

arr3D = np.array([
    # Matrix 0 (First layer/depth) - axis 0, index 0
    [[1, 2, 3],      # Row 0 of Matrix 0
     [4, 5, 6],      # Row 1 of Matrix 0
     [7, 8, 9]],     # Row 2 of Matrix 0
    
    # Matrix 1 (Second layer/depth) - axis 0, index 1
    [[11, 22, 33],   # Row 0 of Matrix 1
     [44, 55, 66],   # Row 1 of Matrix 1
     [77, 88, 99]]   # Row 2 of Matrix 1
])

# =============================================================================
# VISUALIZING 3D ARRAY STRUCTURE
# =============================================================================
# ENGLISH:
# Visual representation of arr3D with shape (2, 3, 3):
#
# Think of it as 2 layers stacked on top of each other:
#
# Layer 0 (arr3D[0]):          Layer 1 (arr3D[1]):
# ┌─────────────┐              ┌─────────────┐
# │ 1   2   3  │              │ 11  22  33 │
# │ 4   5   6  │              │ 44  55  66 │
# │ 7   8   9  │              │ 77  88  99 │
# └─────────────┘              └─────────────┘
#
# 3D coordinates system:
# - arr3D[layer, row, column]
# - arr3D[axis_0, axis_1, axis_2]
# - arr3D[depth, height, width]
#
# Examples of accessing elements:
# arr3D[0, 0, 0] = 1   (Layer 0, Row 0, Col 0)
# arr3D[0, 1, 1] = 5   (Layer 0, Row 1, Col 1)
# arr3D[1, 0, 0] = 11  (Layer 1, Row 0, Col 0)
# arr3D[1, 1, 1] = 55  (Layer 1, Row 1, Col 1)
# arr3D[1, 2, 2] = 99  (Layer 1, Row 2, Col 2)
#
# HINGLISH:
# Shape (2, 3, 3) ke saath arr3D ka visual representation:
#
# Ise 2 layers ki tarah socho jo ek doosre ke upar stacked hain:
#
# Layer 0 (arr3D[0]):          Layer 1 (arr3D[1]):
# ┌─────────────┐              ┌─────────────┐
# │ 1   2   3  │              │ 11  22  33 │
# │ 4   5   6  │              │ 44  55  66 │
# │ 7   8   9  │              │ 77  88  99 │
# └─────────────┘              └─────────────┘
#
# 3D coordinates system:
# - arr3D[layer, row, column]
# - arr3D[axis_0, axis_1, axis_2]
# - arr3D[depth, height, width]
#
# Elements access karne ke examples:
# arr3D[0, 0, 0] = 1   (Layer 0, Row 0, Col 0)
# arr3D[0, 1, 1] = 5   (Layer 0, Row 1, Col 1)
# arr3D[1, 0, 0] = 11  (Layer 1, Row 0, Col 0)
# arr3D[1, 1, 1] = 55  (Layer 1, Row 1, Col 1)
# arr3D[1, 2, 2] = 99  (Layer 1, Row 2, Col 2)
# -----------------------------------------------------------------------------

print(arr3D, arr3D.shape)
# Output:
# [[[ 1  2  3]
#   [ 4  5  6]
#   [ 7  8  9]]
#
#  [[11 22 33]
#   [44 55 66]
#   [77 88 99]]] (2, 3, 3)
#
# Shape (2, 3, 3) breakdown:
# - First number (2): Number of 3×3 matrices (depth/layers)
# - Second number (3): Number of rows in each matrix
# - Third number (3): Number of columns in each matrix
# - Total elements: 2 × 3 × 3 = 18

# =============================================================================
# 3D ARRAY INDEXING - ACCESSING INDIVIDUAL ELEMENTS
# =============================================================================
# ENGLISH:
# - What it does: Accesses a single element in a 3D array using three indices:
#   [layer_index, row_index, column_index]
# - Syntax: arr3D[i, j, k] where:
#   * i = which layer/matrix (axis 0) - ranges from 0 to 1 in this case
#   * j = which row within that matrix (axis 1) - ranges from 0 to 2
#   * k = which column within that row (axis 2) - ranges from 0 to 2
# - Why use it: Essential for accessing specific values in multi-dimensional
#   data like individual pixel values in images, specific features in batches,
#   or particular timesteps in sequences
# - Indexing order matters:
#   * Always: [slowest-changing, ..., fastest-changing]
#   * For images: [batch, height, width, channel] or [height, width, channel]
#   * For video: [frame, height, width] or [frame, height, width, channel]
#   * For batches: [sample, feature_1, feature_2, ...]
# - Common use cases:
#   * Getting pixel value: image[y, x, channel]
#   * Accessing specific frame: video[frame_num, y, x]
#   * Getting element from batch: batch[sample_idx, row, col]
#   * Extracting feature value: data[sample, timestep, feature]
# - Example:
#   # RGB image (256×256, 3 channels)
#   img = np.random.rand(256, 256, 3)
#   red_pixel = img[100, 150, 0]    # Red value at pixel (100, 150)
#   green_pixel = img[100, 150, 1]  # Green value at same pixel
#   blue_pixel = img[100, 150, 2]   # Blue value at same pixel
#   
#   # Batch of images (32 images, 28×28 each)
#   batch = np.random.rand(32, 28, 28)
#   pixel_value = batch[5, 10, 15]  # Pixel at (10,15) in 6th image
#   
#   # Time series (10 samples, 100 timesteps, 5 features)
#   ts_data = np.random.rand(10, 100, 5)
#   feature_val = ts_data[0, 50, 2]  # 3rd feature at timestep 50, sample 0
#
# HINGLISH:
# - Yeh kya karta hai: Teen indices use karke 3D array mein ek single element
#   access karta hai: [layer_index, row_index, column_index]
# - Syntax: arr3D[i, j, k] jahan:
#   * i = kaun si layer/matrix (axis 0) - is case mein 0 se 1 tak
#   * j = us matrix mein kaun si row (axis 1) - 0 se 2 tak
#   * k = us row mein kaun sa column (axis 2) - 0 se 2 tak
# - Kab use karein: Multi-dimensional data mein specific values access karne
#   ke liye essential hai jaise images mein individual pixel values, batches
#   mein specific features, ya sequences mein particular timesteps
# - Indexing order important hai:
#   * Hamesha: [slowest-changing, ..., fastest-changing]
#   * Images ke liye: [batch, height, width, channel] ya [height, width, channel]
#   * Video ke liye: [frame, height, width] ya [frame, height, width, channel]
#   * Batches ke liye: [sample, feature_1, feature_2, ...]
# - Common use cases:
#   * Pixel value lena: image[y, x, channel]
#   * Specific frame access karna: video[frame_num, y, x]
#   * Batch se element lena: batch[sample_idx, row, col]
#   * Feature value extract karna: data[sample, timestep, feature]
# - Example:
#   # RGB image (256×256, 3 channels)
#   img = np.random.rand(256, 256, 3)
#   red_pixel = img[100, 150, 0]    # Pixel (100, 150) par red value
#   green_pixel = img[100, 150, 1]  # Same pixel par green value
#   blue_pixel = img[100, 150, 2]   # Same pixel par blue value
#   
#   # Images ka batch (32 images, har ek 28×28)
#   batch = np.random.rand(32, 28, 28)
#   pixel_value = batch[5, 10, 15]  # 6th image mein (10,15) par pixel
#   
#   # Time series (10 samples, 100 timesteps, 5 features)
#   ts_data = np.random.rand(10, 100, 5)
#   feature_val = ts_data[0, 50, 2]  # Sample 0 mein timestep 50 par 3rd feature
# -----------------------------------------------------------------------------

print(arr3D[1, 1, 1])
# Output: 55
#
# Breakdown of indexing arr3D[1, 1, 1]:
# - Index 0 (layer/matrix): 1 → Select the SECOND matrix (Matrix 1)
# - Index 1 (row): 1 → Select the SECOND row within Matrix 1
# - Index 2 (column): 1 → Select the SECOND column within that row
#
# Visual trace:
# Step 1: arr3D[1] → Select Matrix 1
#   [[11, 22, 33],
#    [44, 55, 66],  ← We want this row
#    [77, 88, 99]]
#
# Step 2: arr3D[1, 1] → Select Row 1 of Matrix 1
#   [44, 55, 66]
#        ↑
#     We want this column
#
# Step 3: arr3D[1, 1, 1] → Select Column 1
#   55 ← Final result

# =============================================================================
# MORE 3D INDEXING EXAMPLES
# =============================================================================
# ENGLISH:
# Accessing different elements to understand indexing:
#
# # Get first element (top-left of first matrix)
# print(arr3D[0, 0, 0])  # Output: 1
#
# # Get last element (bottom-right of second matrix)
# print(arr3D[1, 2, 2])  # Output: 99
#
# # Get entire first matrix (all rows and columns of layer 0)
# print(arr3D[0])        # Output: [[1,2,3], [4,5,6], [7,8,9]]
# # Or equivalently:
# print(arr3D[0, :, :])  # Same output
#
# # Get entire second matrix
# print(arr3D[1])        # Output: [[11,22,33], [44,55,66], [77,88,99]]
#
# # Get first row from first matrix
# print(arr3D[0, 0])     # Output: [1, 2, 3]
# # Or equivalently:
# print(arr3D[0, 0, :])  # Same output
#
# # Get middle row from second matrix
# print(arr3D[1, 1])     # Output: [44, 55, 66]
#
# # Get first column from all matrices (all layers, all rows, column 0)
# print(arr3D[:, :, 0])  # Output: [[1,4,7], [11,44,77]]
#
# # Get middle element from all matrices (all layers, row 1, col 1)
# print(arr3D[:, 1, 1])  # Output: [5, 55]
#
# # Using negative indices
# print(arr3D[-1, -1, -1])  # Output: 99 (last layer, last row, last column)
#
# HINGLISH:
# Indexing samajhne ke liye alag elements access karna:
#
# # Pehla element lo (pehli matrix ka top-left)
# print(arr3D[0, 0, 0])  # Output: 1
#
# # Aakhri element lo (doosri matrix ka bottom-right)
# print(arr3D[1, 2, 2])  # Output: 99
#
# # Poori pehli matrix lo (layer 0 ki saari rows aur columns)
# print(arr3D[0])        # Output: [[1,2,3], [4,5,6], [7,8,9]]
# # Ya equivalently:
# print(arr3D[0, :, :])  # Same output
#
# # Poori doosri matrix lo
# print(arr3D[1])        # Output: [[11,22,33], [44,55,66], [77,88,99]]
#
# # Pehli matrix se pehli row lo
# print(arr3D[0, 0])     # Output: [1, 2, 3]
# # Ya equivalently:
# print(arr3D[0, 0, :])  # Same output
#
# # Doosri matrix se middle row lo
# print(arr3D[1, 1])     # Output: [44, 55, 66]
#
# # Saari matrices se pehla column lo (saari layers, saari rows, column 0)
# print(arr3D[:, :, 0])  # Output: [[1,4,7], [11,44,77]]
#
# # Saari matrices se middle element lo (saari layers, row 1, col 1)
# print(arr3D[:, 1, 1])  # Output: [5, 55]
#
# # Negative indices use karna
# print(arr3D[-1, -1, -1])  # Output: 99 (last layer, last row, last column)
# =============================================================================

# =============================================================================
# 3D ARRAY AXIS OPERATIONS
# =============================================================================
# ENGLISH:
# Understanding axis operations in 3D arrays (CRITICAL for ML):
#
# For arr3D with shape (2, 3, 3):
# - axis=0: Operates along depth/layers (collapses layers)
# - axis=1: Operates along rows (collapses rows)
# - axis=2: Operates along columns (collapses columns)
#
# Examples:
#
# # Sum along axis 0 (collapse layers) - shape (2,3,3) → (3,3)
# sum_axis0 = np.sum(arr3D, axis=0)
# # Adds corresponding elements from both matrices
# # [[1+11, 2+22, 3+33],
# #  [4+44, 5+55, 6+66],
# #  [7+77, 8+88, 9+99]]
# # = [[12, 24, 36], [48, 60, 72], [84, 96, 108]]
#
# # Sum along axis 1 (collapse rows) - shape (2,3,3) → (2,3)
# sum_axis1 = np.sum(arr3D, axis=1)
# # For each matrix, sums down the rows (column-wise sums)
# # Matrix 0: [1+4+7, 2+5+8, 3+6+9] = [12, 15, 18]
# # Matrix 1: [11+44+77, 22+55+88, 33+66+99] = [132, 165, 198]
# # Result: [[12, 15, 18], [132, 165, 198]]
#
# # Sum along axis 2 (collapse columns) - shape (2,3,3) → (2,3)
# sum_axis2 = np.sum(arr3D, axis=2)
# # For each matrix, sums across the columns (row-wise sums)
# # Matrix 0: [1+2+3, 4+5+6, 7+8+9] = [6, 15, 24]
# # Matrix 1: [11+22+33, 44+55+66, 77+88+99] = [66, 165, 264]
# # Result: [[6, 15, 24], [66, 165, 264]]
#
# # Sum along multiple axes
# sum_axis_01 = np.sum(arr3D, axis=(0, 1))  # Collapse layers and rows → (3,)
# sum_all = np.sum(arr3D)  # Collapse all axes → scalar
#
# HINGLISH:
# 3D arrays mein axis operations samajhna (ML ke liye CRITICAL):
#
# Shape (2, 3, 3) wale arr3D ke liye:
# - axis=0: Depth/layers ke along operate (layers collapse)
# - axis=1: Rows ke along operate (rows collapse)
# - axis=2: Columns ke along operate (columns collapse)
#
# Examples:
#
# # axis 0 ke along sum (layers collapse) - shape (2,3,3) → (3,3)
# sum_axis0 = np.sum(arr3D, axis=0)
# # Dono matrices se corresponding elements add karta hai
# # [[1+11, 2+22, 3+33],
# #  [4+44, 5+55, 6+66],
# #  [7+77, 8+88, 9+99]]
# # = [[12, 24, 36], [48, 60, 72], [84, 96, 108]]
#
# # axis 1 ke along sum (rows collapse) - shape (2,3,3) → (2,3)
# sum_axis1 = np.sum(arr3D, axis=1)
# # Har matrix ke liye, rows ko neeche sum karta hai (column-wise sums)
# # Matrix 0: [1+4+7, 2+5+8, 3+6+9] = [12, 15, 18]
# # Matrix 1: [11+44+77, 22+55+88, 33+66+99] = [132, 165, 198]
# # Result: [[12, 15, 18], [132, 165, 198]]
#
# # axis 2 ke along sum (columns collapse) - shape (2,3,3) → (2,3)
# sum_axis2 = np.sum(arr3D, axis=2)
# # Har matrix ke liye, columns ko across sum karta hai (row-wise sums)
# # Matrix 0: [1+2+3, 4+5+6, 7+8+9] = [6, 15, 24]
# # Matrix 1: [11+22+33, 44+55+66, 77+88+99] = [66, 165, 264]
# # Result: [[6, 15, 24], [66, 165, 264]]
#
# # Multiple axes ke along sum
# sum_axis_01 = np.sum(arr3D, axis=(0, 1))  # Layers aur rows collapse → (3,)
# sum_all = np.sum(arr3D)  # Saare axes collapse → scalar
# =============================================================================

# =============================================================================
# PRACTICAL ML EXAMPLE: RGB IMAGE OPERATIONS
# =============================================================================
# ENGLISH:
# Real-world scenario: Processing an RGB image
#
# # Simulate RGB image: 4×4 pixels, 3 color channels (R, G, B)
# rgb_image = np.array([
#     # Pixel row 0
#     [[255, 0, 0],    [255, 0, 0],    [0, 255, 0],    [0, 255, 0]],
#     # Pixel row 1
#     [[255, 0, 0],    [255, 0, 0],    [0, 255, 0],    [0, 255, 0]],
#     # Pixel row 2
#     [[0, 0, 255],    [0, 0, 255],    [255, 255, 0],  [255, 255, 0]],
#     # Pixel row 3
#     [[0, 0, 255],    [0, 0, 255],    [255, 255, 0],  [255, 255, 0]]
# ])  # Shape: (4, 4, 3) - height=4, width=4, channels=3
#
# # Get red channel only (extract plane)
# red_channel = rgb_image[:, :, 0]  # Shape: (4, 4)
#
# # Get green channel
# green_channel = rgb_image[:, :, 1]  # Shape: (4, 4)
#
# # Get blue channel
# blue_channel = rgb_image[:, :, 2]  # Shape: (4, 4)
#
# # Convert to grayscale (average of R, G, B)
# grayscale = np.mean(rgb_image, axis=2)  # Shape: (4, 4)
#
# # Get pixel at position (2, 3) - all channels
# pixel_rgb = rgb_image[2, 3, :]  # Shape: (3,) - [R, G, B] values
#
# # Get average intensity per channel across entire image
# avg_red = np.mean(rgb_image[:, :, 0])
# avg_green = np.mean(rgb_image[:, :, 1])
# avg_blue = np.mean(rgb_image[:, :, 2])
# # Or more efficiently:
# channel_averages = np.mean(rgb_image, axis=(0, 1))  # Shape: (3,)
#
# HINGLISH:
# Real-world scenario: RGB image process karna
#
# # RGB image simulate karo: 4×4 pixels, 3 color channels (R, G, B)
# rgb_image = np.array([
#     # Pixel row 0
#     [[255, 0, 0],    [255, 0, 0],    [0, 255, 0],    [0, 255, 0]],
#     # Pixel row 1
#     [[255, 0, 0],    [255, 0, 0],    [0, 255, 0],    [0, 255, 0]],
#     # Pixel row 2
#     [[0, 0, 255],    [0, 0, 255],    [255, 255, 0],  [255, 255, 0]],
#     # Pixel row 3
#     [[0, 0, 255],    [0, 0, 255],    [255, 255, 0],  [255, 255, 0]]
# ])  # Shape: (4, 4, 3) - height=4, width=4, channels=3
#
# # Sirf red channel lo (plane extract karo)
# red_channel = rgb_image[:, :, 0]  # Shape: (4, 4)
#
# # Green channel lo
# green_channel = rgb_image[:, :, 1]  # Shape: (4, 4)
#
# # Blue channel lo
# blue_channel = rgb_image[:, :, 2]  # Shape: (4, 4)
#
# # Grayscale mein convert karo (R, G, B ka average)
# grayscale = np.mean(rgb_image, axis=2)  # Shape: (4, 4)
#
# # Position (2, 3) par pixel lo - saare channels
# pixel_rgb = rgb_image[2, 3, :]  # Shape: (3,) - [R, G, B] values
#
# # Poori image mein har channel ka average intensity lo
# avg_red = np.mean(rgb_image[:, :, 0])
# avg_green = np.mean(rgb_image[:, :, 1])
# avg_blue = np.mean(rgb_image[:, :, 2])
# # Ya zyada efficiently:
# channel_averages = np.mean(rgb_image, axis=(0, 1))  # Shape: (3,)
# =============================================================================

# =============================================================================
# 3D SLICING EXAMPLES
# =============================================================================
# ENGLISH:
# Slicing works independently on each axis:
#
# # Get both matrices, first 2 rows, all columns
# slice1 = arr3D[:, :2, :]
# # Shape: (2, 2, 3)
#
# # Get first matrix, all rows, last 2 columns
# slice2 = arr3D[0, :, -2:]
# # Shape: (3, 2)
#
# # Get second matrix, middle row only
# slice3 = arr3D[1, 1, :]
# # Shape: (3,) - [44, 55, 66]
#
# # Get diagonal elements from both matrices
# diag0 = arr3D[0, [0,1,2], [0,1,2]]  # [1, 5, 9]
# diag1 = arr3D[1, [0,1,2], [0,1,2]]  # [11, 55, 99]
#
# HINGLISH:
# Har axis par independently slicing kaam karti hai:
#
# # Dono matrices lo, pehli 2 rows, saare columns
# slice1 = arr3D[:, :2, :]
# # Shape: (2, 2, 3)
#
# # Pehli matrix lo, saari rows, aakhri 2 columns
# slice2 = arr3D[0, :, -2:]
# # Shape: (3, 2)
#
# # Doosri matrix lo, sirf middle row
# slice3 = arr3D[1, 1, :]
# # Shape: (3,) - [44, 55, 66]
#
# # Dono matrices se diagonal elements lo
# diag0 = arr3D[0, [0,1,2], [0,1,2]]  # [1, 5, 9]
# diag1 = arr3D[1, [0,1,2], [0,1,2]]  # [11, 55, 99]
# =============================================================================

# **Quick Reference: 3D Array Structure**

# | Dimension | Name | Common Interpretations | Index Range (this example) |
# |-----------|------|------------------------|---------------------------|
# | axis=0 | Depth/Layer/Batch | Matrices, Images, Samples | 0-1 (2 layers) |
# | axis=1 | Height/Row | Vertical position | 0-2 (3 rows) |
# | axis=2 | Width/Column | Horizontal position | 0-2 (3 columns) |

# **Common 3D Shapes in ML:**
# - **RGB Image**: `(height, width, 3)` - e.g., `(224, 224, 3)`
# - **Batch of Grayscale**: `(batch, height, width)` - e.g., `(32, 28, 28)`
# - **Batch of RGB**: `(batch, height, width, 3)` - e.g., `(32, 224, 224, 3)`
# - **Time Series Batch**: `(batch, timesteps, features)` - e.g., `(64, 100, 10)`

# **Indexing Pattern:**
# ```
# arr3D[layer, row, column]
#       ↑      ↑     ↑
#    axis=0  axis=1 axis=2
#    depth   height  width

[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[11 22 33]
  [44 55 66]
  [77 88 99]]] (2, 3, 3)
55


In [23]:
# =============================================================================
# NUMPY VECTORIZATION - EFFICIENT ELEMENT-WISE OPERATIONS
# =============================================================================

# =============================================================================
# WHAT IS VECTORIZATION?
# =============================================================================
# ENGLISH:
# Vectorization is NumPy's ability to perform operations on entire arrays
# WITHOUT explicit Python loops. This is:
# - MUCH faster (10-100x) than Python loops because operations run in C
# - More readable and concise
# - Automatically parallelized on modern CPUs
# - The CORE reason NumPy exists and why it's essential for ML
#
# Instead of:
#   result = []
#   for i in range(len(arr)):
#       result.append(arr[i] ** 2)  # SLOW - Python loop
#
# You write:
#   result = arr ** 2  # FAST - vectorized operation
#
# HINGLISH:
# Vectorization NumPy ki ability hai poore arrays par operations perform karne
# ki BINA explicit Python loops ke. Yeh:
# - Python loops se BAHUT zyada fast hai (10-100x) kyunki operations C mein
#   run hote hain
# - Zyada readable aur concise hai
# - Modern CPUs par automatically parallelized hai
# - NumPy exist karne ki aur ML ke liye essential hone ki CORE reason hai
#
# Iske bajaye:
#   result = []
#   for i in range(len(arr)):
#       result.append(arr[i] ** 2)  # SLOW - Python loop
#
# Aap likhte hain:
#   result = arr ** 2  # FAST - vectorized operation
# -----------------------------------------------------------------------------

# =============================================================================
# CREATING ARRAYS FOR VECTORIZATION DEMONSTRATIONS
# =============================================================================
# ENGLISH:
# - What it does: Creates sample arrays to demonstrate vectorized operations
# - arr: 1D array with 5 elements
# - arr2: Another 1D array with 5 elements (same shape as arr)
# - arr3: 2D array with shape (2, 5) - demonstrates broadcasting
#
# HINGLISH:
# - Yeh kya karta hai: Vectorized operations demonstrate karne ke liye sample
#   arrays banata hai
# - arr: 5 elements wala 1D array
# - arr2: Doosra 1D array with 5 elements (arr jaisa hi shape)
# - arr3: Shape (2, 5) wala 2D array - broadcasting demonstrate karta hai
# -----------------------------------------------------------------------------

arr = np.array([1, 2, 3, 4, 5])
arr2 = np.array([5, 6, 7, 8, 9])
arr3 = np.array([[1, 2, 3, 4, 5],   # Row 0
                 [2, 3, 4, 5, 6]])  # Row 1

# =============================================================================
# OPERATION 1: ELEMENT-WISE EXPONENTIATION (arr ** 2)
# =============================================================================
# ENGLISH:
# - What it does: Squares each element in the array independently. The **
#   operator performs element-wise exponentiation.
# - Syntax: array ** power
# - Why use it: Common in ML for:
#   * Computing squared errors: (predictions - actual) ** 2
#   * Calculating variance and standard deviation
#   * Feature engineering: creating polynomial features
#   * L2 regularization: sum of squared weights
#   * Euclidean distance: sqrt(sum((x - y) ** 2))
# - How it works:
#   * Takes each element independently
#   * Raises it to the specified power
#   * Returns new array with same shape
#   * MUCH faster than Python loop
# - Performance comparison:
#   # Slow Python way (avoid this):
#   result = [x ** 2 for x in arr]  # List comprehension - slower
#   
#   # Fast NumPy way (use this):
#   result = arr ** 2  # Vectorized - 10-100x faster
# - Common use cases:
#   * Mean Squared Error (MSE): np.mean((y_pred - y_true) ** 2)
#   * L2 norm (magnitude): np.sqrt(np.sum(vector ** 2))
#   * Variance: np.mean((data - mean) ** 2)
#   * Polynomial features: X_squared = X ** 2
#   * Distance calculations in clustering/KNN
# - Example:
#   # Mean Squared Error calculation
#   predictions = np.array([2.5, 3.0, 4.2, 5.1])
#   actual = np.array([2.0, 3.5, 4.0, 5.0])
#   squared_errors = (predictions - actual) ** 2
#   # [0.25, 0.25, 0.04, 0.01]
#   mse = np.mean(squared_errors)  # 0.1375
#   
#   # L2 norm (Euclidean length of vector)
#   vector = np.array([3, 4])
#   magnitude = np.sqrt(np.sum(vector ** 2))  # 5.0
#   
#   # Creating polynomial features
#   X = np.array([1, 2, 3, 4, 5])
#   X_poly = np.column_stack([X, X**2, X**3])
#   # [[1, 1, 1], [2, 4, 8], [3, 9, 27], [4, 16, 64], [5, 25, 125]]
#
# HINGLISH:
# - Yeh kya karta hai: Array ke har element ko independently square karta hai.
#   ** operator element-wise exponentiation perform karta hai.
# - Syntax: array ** power
# - Kab use karein: ML mein common hai:
#   * Squared errors compute karna: (predictions - actual) ** 2
#   * Variance aur standard deviation calculate karna
#   * Feature engineering: polynomial features banana
#   * L2 regularization: squared weights ka sum
#   * Euclidean distance: sqrt(sum((x - y) ** 2))
# - Kaise kaam karta hai:
#   * Har element ko independently leta hai
#   * Use specified power tak raise karta hai
#   * Same shape wala naya array return karta hai
#   * Python loop se BAHUT zyada fast hai
# - Performance comparison:
#   # Slow Python tarika (isse avoid karo):
#   result = [x ** 2 for x in arr]  # List comprehension - slower
#   
#   # Fast NumPy tarika (isse use karo):
#   result = arr ** 2  # Vectorized - 10-100x faster
# - Common use cases:
#   * Mean Squared Error (MSE): np.mean((y_pred - y_true) ** 2)
#   * L2 norm (magnitude): np.sqrt(np.sum(vector ** 2))
#   * Variance: np.mean((data - mean) ** 2)
#   * Polynomial features: X_squared = X ** 2
#   * Clustering/KNN mein distance calculations
# - Example:
#   # Mean Squared Error calculation
#   predictions = np.array([2.5, 3.0, 4.2, 5.1])
#   actual = np.array([2.0, 3.5, 4.0, 5.0])
#   squared_errors = (predictions - actual) ** 2
#   # [0.25, 0.25, 0.04, 0.01]
#   mse = np.mean(squared_errors)  # 0.1375
#   
#   # L2 norm (vector ki Euclidean length)
#   vector = np.array([3, 4])
#   magnitude = np.sqrt(np.sum(vector ** 2))  # 5.0
#   
#   # Polynomial features banana
#   X = np.array([1, 2, 3, 4, 5])
#   X_poly = np.column_stack([X, X**2, X**3])
#   # [[1, 1, 1], [2, 4, 8], [3, 9, 27], [4, 16, 64], [5, 25, 125]]
# -----------------------------------------------------------------------------

print(arr ** 2)
# Output: [ 1  4  9 16 25]
#
# Element-wise calculation:
# [1, 2, 3, 4, 5] ** 2
#  ↓  ↓  ↓  ↓  ↓
# [1² 2² 3² 4² 5²]
#  ↓  ↓  ↓  ↓  ↓
# [1, 4, 9, 16, 25]
#
# Each element is squared independently
# No loops needed - operation is vectorized

# =============================================================================
# OPERATION 2: SCALAR ADDITION (arr + scalar)
# =============================================================================
# ENGLISH:
# - What it does: Adds a scalar (single number) to every element in the array.
#   This is called "broadcasting" - the scalar is automatically extended to
#   match the array shape.
# - Syntax: array + scalar (or scalar + array - commutative)
# - Why use it: Essential for:
#   * Data normalization: shifting data by a constant
#   * Bias addition in neural networks: output = weights @ input + bias
#   * Feature scaling: adjusting ranges
#   * Temperature conversion: celsius + 273.15 = kelvin
#   * Offset corrections in data preprocessing
# - Broadcasting behavior:
#   * Scalar is treated as array of same shape with repeated value
#   * [1, 2, 3] + 2 becomes [1, 2, 3] + [2, 2, 2]
#   * Then element-wise addition is performed
# - All arithmetic operators work this way:
#   * Addition: arr + 5
#   * Subtraction: arr - 3
#   * Multiplication: arr * 2
#   * Division: arr / 4
#   * Power: arr ** 0.5
#   * Modulo: arr % 2
# - Common use cases:
#   * Data centering: data - mean
#   * Adding bias in neural networks
#   * Temperature conversions
#   * Shifting probability distributions
#   * Adjusting pixel intensities in images
# - Example:
#   # Centering data (zero-mean normalization)
#   data = np.array([10, 20, 30, 40, 50])
#   mean = np.mean(data)  # 30
#   centered = data - mean  # [-20, -10, 0, 10, 20]
#   
#   # Temperature conversion
#   celsius = np.array([0, 10, 20, 30, 100])
#   fahrenheit = celsius * 9/5 + 32  # [32, 50, 68, 86, 212]
#   
#   # Neural network bias addition
#   weighted_sum = np.array([0.5, -0.3, 0.8])
#   bias = 0.1
#   output = weighted_sum + bias  # [0.6, -0.2, 0.9]
#   
#   # Image brightness adjustment
#   image = np.random.rand(256, 256) * 255  # Grayscale image
#   brighter = image + 50  # Increase brightness by 50
#   brighter = np.clip(brighter, 0, 255)  # Keep in valid range
#
# HINGLISH:
# - Yeh kya karta hai: Array ke har element mein ek scalar (single number)
#   add karta hai. Ise "broadcasting" kehte hain - scalar automatically array
#   shape match karne ke liye extend ho jata hai.
# - Syntax: array + scalar (ya scalar + array - commutative hai)
# - Kab use karein: Zaroori hai:
#   * Data normalization: constant se data shift karna
#   * Neural networks mein bias addition: output = weights @ input + bias
#   * Feature scaling: ranges adjust karna
#   * Temperature conversion: celsius + 273.15 = kelvin
#   * Data preprocessing mein offset corrections
# - Broadcasting behavior:
#   * Scalar ko same shape ke array ki tarah treat kiya jata hai with repeated value
#   * [1, 2, 3] + 2 ban jata hai [1, 2, 3] + [2, 2, 2]
#   * Phir element-wise addition perform hota hai
# - Saare arithmetic operators aise hi kaam karte hain:
#   * Addition: arr + 5
#   * Subtraction: arr - 3
#   * Multiplication: arr * 2
#   * Division: arr / 4
#   * Power: arr ** 0.5
#   * Modulo: arr % 2
# - Common use cases:
#   * Data centering: data - mean
#   * Neural networks mein bias add karna
#   * Temperature conversions
#   * Probability distributions shift karna
#   * Images mein pixel intensities adjust karna
# - Example:
#   # Data center karna (zero-mean normalization)
#   data = np.array([10, 20, 30, 40, 50])
#   mean = np.mean(data)  # 30
#   centered = data - mean  # [-20, -10, 0, 10, 20]
#   
#   # Temperature conversion
#   celsius = np.array([0, 10, 20, 30, 100])
#   fahrenheit = celsius * 9/5 + 32  # [32, 50, 68, 86, 212]
#   
#   # Neural network mein bias addition
#   weighted_sum = np.array([0.5, -0.3, 0.8])
#   bias = 0.1
#   output = weighted_sum + bias  # [0.6, -0.2, 0.9]
#   
#   # Image brightness adjustment
#   image = np.random.rand(256, 256) * 255  # Grayscale image
#   brighter = image + 50  # Brightness 50 se increase karo
#   brighter = np.clip(brighter, 0, 255)  # Valid range mein rakho
# -----------------------------------------------------------------------------

print(arr + 2)
# Output: [3 4 5 6 7]
#
# Broadcasting visualization:
# [1, 2, 3, 4, 5] + 2
#  ↓  ↓  ↓  ↓  ↓     ↓ (scalar broadcasts to match array)
# [1, 2, 3, 4, 5] + [2, 2, 2, 2, 2]
#  ↓  ↓  ↓  ↓  ↓     ↓  ↓  ↓  ↓  ↓
# [3, 4, 5, 6, 7]
#
# The scalar 2 is added to each element

# =============================================================================
# OPERATION 3: ARRAY + ARRAY (SAME SHAPE)
# =============================================================================
# ENGLISH:
# - What it does: Performs element-wise addition of two arrays with the SAME
#   shape. Corresponding elements are added together.
# - Syntax: array1 + array2 (arrays must have compatible shapes)
# - Why use it: Fundamental operation in ML for:
#   * Combining predictions from multiple models (ensemble learning)
#   * Adding gradients in backpropagation
#   * Vector addition in linear algebra
#   * Merging feature vectors
#   * Computing residuals: actual - prediction
# - How it works:
#   * Arrays must have same shape or be broadcastable
#   * Elements at same positions are added
#   * Returns new array with same shape
#   * Much faster than Python loops
# - All element-wise operators work similarly:
#   * Addition: arr1 + arr2
#   * Subtraction: arr1 - arr2
#   * Multiplication: arr1 * arr2 (NOT matrix multiplication!)
#   * Division: arr1 / arr2
#   * Power: arr1 ** arr2
#   * Comparison: arr1 > arr2, arr1 == arr2
# - Common use cases:
#   * Ensemble predictions: (model1_pred + model2_pred) / 2
#   * Gradient accumulation in training
#   * Computing errors: predictions - targets
#   * Feature combinations: feature1 + feature2
#   * Vector arithmetic in embeddings
# - Example:
#   # Ensemble learning - averaging two models
#   model1_pred = np.array([0.8, 0.2, 0.6, 0.9])
#   model2_pred = np.array([0.7, 0.3, 0.5, 0.8])
#   ensemble_pred = (model1_pred + model2_pred) / 2
#   # [0.75, 0.25, 0.55, 0.85]
#   
#   # Computing prediction errors
#   predictions = np.array([2.5, 3.2, 4.1, 5.0])
#   actual = np.array([2.0, 3.5, 4.0, 5.2])
#   errors = predictions - actual  # [0.5, -0.3, 0.1, -0.2]
#   
#   # Gradient accumulation
#   grad_batch1 = np.array([0.1, 0.2, 0.3])
#   grad_batch2 = np.array([0.15, 0.18, 0.25])
#   total_grad = grad_batch1 + grad_batch2  # [0.25, 0.38, 0.55]
#   
#   # Vector addition
#   vec1 = np.array([1, 2, 3])
#   vec2 = np.array([4, 5, 6])
#   result = vec1 + vec2  # [5, 7, 9]
#
# HINGLISH:
# - Yeh kya karta hai: SAME shape wale do arrays ka element-wise addition
#   perform karta hai. Corresponding elements ko ek saath add kiya jata hai.
# - Syntax: array1 + array2 (arrays ka shape compatible hona chahiye)
# - Kab use karein: ML mein fundamental operation hai:
#   * Multiple models se predictions combine karna (ensemble learning)
#   * Backpropagation mein gradients add karna
#   * Linear algebra mein vector addition
#   * Feature vectors merge karna
#   * Residuals compute karna: actual - prediction
# - Kaise kaam karta hai:
#   * Arrays ka same shape hona chahiye ya broadcastable hona chahiye
#   * Same positions par elements add hote hain
#   * Same shape wala naya array return hota hai
#   * Python loops se bahut zyada fast hai
# - Saare element-wise operators aise hi kaam karte hain:
#   * Addition: arr1 + arr2
#   * Subtraction: arr1 - arr2
#   * Multiplication: arr1 * arr2 (matrix multiplication NAHI!)
#   * Division: arr1 / arr2
#   * Power: arr1 ** arr2
#   * Comparison: arr1 > arr2, arr1 == arr2
# - Common use cases:
#   * Ensemble predictions: (model1_pred + model2_pred) / 2
#   * Training mein gradient accumulation
#   * Errors compute karna: predictions - targets
#   * Feature combinations: feature1 + feature2
#   * Embeddings mein vector arithmetic
# - Example:
#   # Ensemble learning - do models average karna
#   model1_pred = np.array([0.8, 0.2, 0.6, 0.9])
#   model2_pred = np.array([0.7, 0.3, 0.5, 0.8])
#   ensemble_pred = (model1_pred + model2_pred) / 2
#   # [0.75, 0.25, 0.55, 0.85]
#   
#   # Prediction errors compute karna
#   predictions = np.array([2.5, 3.2, 4.1, 5.0])
#   actual = np.array([2.0, 3.5, 4.0, 5.2])
#   errors = predictions - actual  # [0.5, -0.3, 0.1, -0.2]
#   
#   # Gradient accumulation
#   grad_batch1 = np.array([0.1, 0.2, 0.3])
#   grad_batch2 = np.array([0.15, 0.18, 0.25])
#   total_grad = grad_batch1 + grad_batch2  # [0.25, 0.38, 0.55]
#   
#   # Vector addition
#   vec1 = np.array([1, 2, 3])
#   vec2 = np.array([4, 5, 6])
#   result = vec1 + vec2  # [5, 7, 9]
# -----------------------------------------------------------------------------

print(arr + arr2)
# Output: [ 6  8 10 12 14]
#
# Element-wise addition:
# [1, 2, 3, 4,  5]  (arr)
# +
# [5, 6, 7, 8,  9]  (arr2)
# ↓  ↓  ↓  ↓   ↓
# [6, 8, 10, 12, 14]
#
# Each pair of corresponding elements is added:
# 1+5=6, 2+6=8, 3+7=10, 4+8=12, 5+9=14

# =============================================================================
# OPERATION 4: BROADCASTING - ARRAY + ARRAY (DIFFERENT SHAPES)
# =============================================================================
# ENGLISH:
# - What it does: Adds arrays of DIFFERENT but compatible shapes by
#   automatically "broadcasting" the smaller array to match the larger one.
# - Syntax: array1 + array2 (where shapes are compatible for broadcasting)
# - Why use it: EXTREMELY powerful feature that makes code concise and fast:
#   * Adding bias to each sample in a batch
#   * Normalizing each feature independently
#   * Image operations across channels
#   * Batch processing without explicit loops
# - Broadcasting rules (NumPy automatically applies):
#   1. If arrays have different number of dimensions, prepend 1s to smaller
#   2. Arrays are compatible if for each dimension:
#      - They are equal, OR
#      - One of them is 1
#   3. The smaller array is "stretched" to match larger array's shape
# - How it works for arr (shape 5,) + arr3 (shape 2,5):
#   Step 1: arr shape (5,) becomes (1, 5) - prepend dimension
#   Step 2: arr (1, 5) broadcasts to (2, 5) - repeat along axis 0
#   Step 3: Element-wise addition with arr3 (2, 5)
# - Broadcasting examples with different shapes:
#   (3, 1) + (3, 4) → (3, 4)  ✓ Compatible
#   (5,) + (5, 3) → (5, 3)    ✓ Compatible
#   (3, 1) + (1, 4) → (3, 4)  ✓ Compatible
#   (3, 2) + (3, 4) → Error   ✗ Incompatible
# - Common use cases:
#   * Adding bias to batch: batch_output + bias_vector
#   * Feature normalization: (X - mean) / std (mean & std per feature)
#   * Image processing: img + color_adjustment (per channel)
#   * Matrix operations with row/column vectors
# - Example:
#   # Adding bias to each sample in batch
#   batch = np.array([[1, 2, 3],
#                     [4, 5, 6],
#                     [7, 8, 9]])  # shape: (3, 3) - 3 samples, 3 features
#   bias = np.array([0.1, 0.2, 0.3])  # shape: (3,) - one bias per feature
#   output = batch + bias  # Broadcasting: (3,3) + (3,) → (3,3)
#   # [[1.1, 2.2, 3.3],
#   #  [4.1, 5.2, 6.3],
#   #  [7.1, 8.2, 9.3]]
#   
#   # Feature normalization (mean & std per feature)
#   X = np.array([[1, 10, 100],
#                 [2, 20, 200],
#                 [3, 30, 300]])  # shape: (3, 3)
#   mean = np.array([2, 20, 200])  # shape: (3,) - mean per feature
#   std = np.array([1, 10, 100])   # shape: (3,) - std per feature
#   X_normalized = (X - mean) / std  # Broadcasting both operations
#   
#   # Image RGB adjustment
#   img = np.random.rand(256, 256, 3)  # shape: (256, 256, 3)
#   rgb_adjust = np.array([1.1, 1.0, 0.9])  # shape: (3,) - R,G,B adjustments
#   adjusted = img * rgb_adjust  # Broadcasting: (256,256,3) * (3,)
#
# HINGLISH:
# - Yeh kya karta hai: DIFFERENT lekin compatible shapes wale arrays ko
#   automatically chhote array ko bade array ke shape mein "broadcast" karke
#   add karta hai.
# - Syntax: array1 + array2 (jahan shapes broadcasting ke liye compatible hain)
# - Kab use karein: BAHUT powerful feature jo code ko concise aur fast banata hai:
#   * Batch mein har sample mein bias add karna
#   * Har feature ko independently normalize karna
#   * Channels across image operations
#   * Explicit loops ke bina batch processing
# - Broadcasting rules (NumPy automatically apply karta hai):
#   1. Agar arrays ke dimensions alag hain, to chhote mein 1s prepend karo
#   2. Arrays compatible hain agar har dimension ke liye:
#      - Wo equal hain, YA
#      - Unme se ek 1 hai
#   3. Chhota array bade array ke shape match karne ke liye "stretched" hota hai
# - arr (shape 5,) + arr3 (shape 2,5) ke liye kaise kaam karta hai:
#   Step 1: arr ka shape (5,) ban jata hai (1, 5) - dimension prepend
#   Step 2: arr (1, 5) broadcast hota hai (2, 5) mein - axis 0 ke along repeat
#   Step 3: arr3 (2, 5) ke saath element-wise addition
# - Alag shapes ke saath broadcasting examples:
#   (3, 1) + (3, 4) → (3, 4)  ✓ Compatible
#   (5,) + (5, 3) → (5, 3)    ✓ Compatible
#   (3, 1) + (1, 4) → (3, 4)  ✓ Compatible
#   (3, 2) + (3, 4) → Error   ✗ Incompatible
# - Common use cases:
#   * Batch mein bias add karna: batch_output + bias_vector
#   * Feature normalization: (X - mean) / std (har feature ke liye mean & std)
#   * Image processing: img + color_adjustment (har channel ke liye)
#   * Row/column vectors ke saath matrix operations
# - Example:
#   # Batch mein har sample mein bias add karna
#   batch = np.array([[1, 2, 3],
#                     [4, 5, 6],
#                     [7, 8, 9]])  # shape: (3, 3) - 3 samples, 3 features
#   bias = np.array([0.1, 0.2, 0.3])  # shape: (3,) - har feature ke liye ek bias
#   output = batch + bias  # Broadcasting: (3,3) + (3,) → (3,3)
#   # [[1.1, 2.2, 3.3],
#   #  [4.1, 5.2, 6.3],
#   #  [7.1, 8.2, 9.3]]
#   
#   # Feature normalization (har feature ke liye mean & std)
#   X = np.array([[1, 10, 100],
#                 [2, 20, 200],
#                 [3, 30, 300]])  # shape: (3, 3)
#   mean = np.array([2, 20, 200])  # shape: (3,) - har feature ka mean
#   std = np.array([1, 10, 100])   # shape: (3,) - har feature ka std
#   X_normalized = (X - mean) / std  # Dono operations broadcast hote hain
#   
#   # Image RGB adjustment
#   img = np.random.rand(256, 256, 3)  # shape: (256, 256, 3)
#   rgb_adjust = np.array([1.1, 1.0, 0.9])  # shape: (3,) - R,G,B adjustments
#   adjusted = img * rgb_adjust  # Broadcasting: (256,256,3) * (3,)
# -----------------------------------------------------------------------------

print(arr + arr3)
# Output:
# [[2 4 6 8 10]
#  [3 5 7 9 11]]
#
# Broadcasting visualization:
# arr shape:  (5,)   → treated as (1, 5) → broadcast to (2, 5)
# arr3 shape: (2, 5) → stays as (2, 5)
#
# Step-by-step:
# Original arr:  [1, 2, 3, 4, 5]  (shape: 5,)
# 
# After broadcasting arr to (2, 5):
# [[1, 2, 3, 4, 5],   ← arr broadcasted (repeated)
#  [1, 2, 3, 4, 5]]   ← same row repeated
# 
# arr3:
# [[1, 2, 3, 4, 5],   ← Row 0
#  [2, 3, 4, 5, 6]]   ← Row 1
# 
# Element-wise addition:
# [[1+1, 2+2, 3+3, 4+4, 5+5],
#  [1+2, 2+3, 3+4, 4+5, 5+6]]
# 
# Result:
# [[2, 4, 6, 8, 10],
#  [3, 5, 7, 9, 11]]

# =============================================================================
# ALL VECTORIZED ARITHMETIC OPERATIONS
# =============================================================================
# ENGLISH:
# NumPy supports all arithmetic operations in vectorized form:
#
# # Addition
# arr + 5          # Add scalar to all elements
# arr1 + arr2      # Element-wise addition
#
# # Subtraction
# arr - 3          # Subtract scalar from all elements
# arr1 - arr2      # Element-wise subtraction
#
# # Multiplication (element-wise, NOT matrix multiplication)
# arr * 2          # Multiply all elements by scalar
# arr1 * arr2      # Element-wise multiplication
#
# # Division
# arr / 4          # Divide all elements by scalar
# arr1 / arr2      # Element-wise division
#
# # Floor division
# arr // 3         # Integer division
#
# # Modulo
# arr % 2          # Remainder after division
#
# # Exponentiation
# arr ** 2         # Square all elements
# arr ** 0.5       # Square root (same as np.sqrt(arr))
# arr1 ** arr2     # Element-wise power
#
# # Comparison operations (return boolean arrays)
# arr > 3          # [False, False, False, True, True]
# arr == 3         # [False, False, True, False, False]
# arr1 < arr2      # Element-wise comparison
#
# # Logical operations
# (arr > 2) & (arr < 5)   # AND operation
# (arr < 2) | (arr > 4)   # OR operation
# ~(arr > 3)              # NOT operation
#
# HINGLISH:
# NumPy saare arithmetic operations ko vectorized form mein support karta hai:
#
# # Addition
# arr + 5          # Saare elements mein scalar add karo
# arr1 + arr2      # Element-wise addition
#
# # Subtraction
# arr - 3          # Saare elements se scalar subtract karo
# arr1 - arr2      # Element-wise subtraction
#
# # Multiplication (element-wise, matrix multiplication NAHI)
# arr * 2          # Saare elements ko scalar se multiply karo
# arr1 * arr2      # Element-wise multiplication
#
# # Division
# arr / 4          # Saare elements ko scalar se divide karo
# arr1 / arr2      # Element-wise division
#
# # Floor division
# arr // 3         # Integer division
#
# # Modulo
# arr % 2          # Division ke baad remainder
#
# # Exponentiation
# arr ** 2         # Saare elements ko square karo
# arr ** 0.5       # Square root (np.sqrt(arr) jaisa hi)
# arr1 ** arr2     # Element-wise power
#
# # Comparison operations (boolean arrays return karte hain)
# arr > 3          # [False, False, False, True, True]
# arr == 3         # [False, False, True, False, False]
# arr1 < arr2      # Element-wise comparison
#
# # Logical operations
# (arr > 2) & (arr < 5)   # AND operation
# (arr < 2) | (arr > 4)   # OR operation
# ~(arr > 3)              # NOT operation
# =============================================================================

# =============================================================================
# PERFORMANCE COMPARISON: LOOP vs VECTORIZATION
# =============================================================================
# ENGLISH:
# Demonstrating why vectorization is crucial for ML:
#
# import time
# 
# # Create large array
# large_arr = np.arange(1000000)
# 
# # Method 1: Python loop (SLOW)
# start = time.time()
# result_loop = []
# for x in large_arr:
#     result_loop.append(x ** 2)
# loop_time = time.time() - start
# print(f"Loop time: {loop_time:.4f} seconds")
# 
# # Method 2: List comprehension (STILL SLOW)
# start = time.time()
# result_comp = [x ** 2 for x in large_arr]
# comp_time = time.time() - start
# print(f"Comprehension time: {comp_time:.4f} seconds")
# 
# # Method 3: NumPy vectorization (FAST!)
# start = time.time()
# result_vec = large_arr ** 2
# vec_time = time.time() - start
# print(f"Vectorization time: {vec_time:.4f} seconds")
# 
# # Typical results:
# # Loop time: 0.2500 seconds
# # Comprehension time: 0.1800 seconds
# # Vectorization time: 0.0020 seconds  ← 100x faster!
#
# HINGLISH:
# Demonstrate kar rahe hain ki vectorization ML ke liye kyun crucial hai:
#
# import time
# 
# # Bada array banao
# large_arr = np.arange(1000000)
# 
# # Method 1: Python loop (SLOW)
# start = time.time()
# result_loop = []
# for x in large_arr:
#     result_loop.append(x ** 2)
# loop_time = time.time() - start
# print(f"Loop time: {loop_time:.4f} seconds")
# 
# # Method 2: List comprehension (ABHI BHI SLOW)
# start = time.time()
# result_comp = [x ** 2 for x in large_arr]
# comp_time = time.time() - start
# print(f"Comprehension time: {comp_time:.4f} seconds")
# 
# # Method 3: NumPy vectorization (FAST!)
# start = time.time()
# result_vec = large_arr ** 2
# vec_time = time.time() - start
# print(f"Vectorization time: {vec_time:.4f} seconds")
# 
# # Typical results:
# # Loop time: 0.2500 seconds
# # Comprehension time: 0.1800 seconds
# # Vectorization time: 0.0020 seconds  ← 100x faster!
# =============================================================================

# =============================================================================
# PRACTICAL ML EXAMPLE: GRADIENT DESCENT STEP
# =============================================================================
# ENGLISH:
# Real-world scenario: One step of gradient descent (vectorized)
#
# # Hyperparameters
# learning_rate = 0.01
# 
# # Model parameters (weights)
# weights = np.array([0.5, -0.3, 0.8, 0.2])
# 
# # Computed gradients from backpropagation
# gradients = np.array([0.1, -0.05, 0.15, 0.02])
# 
# # Gradient descent update (fully vectorized - no loops!)
# weights = weights - learning_rate * gradients
# # [0.499, -0.2995, 0.7985, 0.1998]
# 
# # Without vectorization, you'd need:
# # for i in range(len(weights)):
# #     weights[i] = weights[i] - learning_rate * gradients[i]
# # Much slower and less readable!
#
# HINGLISH:
# Real-world scenario: Gradient descent ka ek step (vectorized)
#
# # Hyperparameters
# learning_rate = 0.01
# 
# # Model parameters (weights)
# weights = np.array([0.5, -0.3, 0.8, 0.2])
# 
# # Backpropagation se computed gradients
# gradients = np.array([0.1, -0.05, 0.15, 0.02])
# 
# # Gradient descent update (fully vectorized - loops nahi!)
# weights = weights - learning_rate * gradients
# # [0.499, -0.2995, 0.7985, 0.1998]
# 
# # Vectorization ke bina, aapko yeh karna padega:
# # for i in range(len(weights)):
# #     weights[i] = weights[i] - learning_rate * gradients[i]
# # Bahut slower aur kam readable!
# =============================================================================

[ 1  4  9 16 25]
[3 4 5 6 7]
[ 6  8 10 12 14]
[[ 2  4  6  8 10]
 [ 3  5  7  9 11]]


In [25]:
# =============================================================================
# DATA NORMALIZATION (STANDARDIZATION/Z-SCORE NORMALIZATION)
# =============================================================================

# =============================================================================
# WHAT IS NORMALIZATION?
# =============================================================================
# ENGLISH:
# Normalization (specifically Z-score normalization/standardization) transforms
# data to have:
# - Mean = 0 (centered around zero)
# - Standard deviation = 1 (unit variance)
#
# This is one of the MOST IMPORTANT preprocessing steps in ML because:
# - Features with different scales can dominate learning (e.g., age vs income)
# - Gradient descent converges faster with normalized data
# - Many ML algorithms (SVM, Neural Networks, KNN) perform better
# - Prevents numerical instability in computations
#
# Formula: z = (x - μ) / σ
# where:
# - x = original value
# - μ (mu) = mean of the data
# - σ (sigma) = standard deviation of the data
# - z = normalized value (z-score)
#
# HINGLISH:
# Normalization (specifically Z-score normalization/standardization) data ko
# transform karta hai taaki:
# - Mean = 0 ho (zero ke around centered)
# - Standard deviation = 1 ho (unit variance)
#
# Yeh ML mein sabse IMPORTANT preprocessing steps mein se ek hai kyunki:
# - Alag scales wale features learning ko dominate kar sakte hain (jaise age vs income)
# - Normalized data ke saath gradient descent zyada fast converge hota hai
# - Bahut saare ML algorithms (SVM, Neural Networks, KNN) better perform karte hain
# - Computations mein numerical instability se bachata hai
#
# Formula: z = (x - μ) / σ
# jahan:
# - x = original value
# - μ (mu) = data ka mean
# - σ (sigma) = data ka standard deviation
# - z = normalized value (z-score)
# -----------------------------------------------------------------------------

# =============================================================================
# CREATING SAMPLE DATA FOR NORMALIZATION
# =============================================================================
# ENGLISH:
# - What it does: Creates a simple 2×2 matrix for demonstrating normalization
# - Values: [1, 2, 3, 4] - small range for easy calculation verification
# - Why this example: Simple numbers make it easy to understand the math
#   behind normalization
#
# HINGLISH:
# - Yeh kya karta hai: Normalization demonstrate karne ke liye ek simple
#   2×2 matrix banata hai
# - Values: [1, 2, 3, 4] - easy calculation verification ke liye small range
# - Yeh example kyun: Simple numbers normalization ke peeche ka math samajhne
#   mein aasaan banate hain
# -----------------------------------------------------------------------------

arr = np.array([[1, 2],    # Row 0
                [3, 4]])   # Row 1
# Shape: (2, 2)
# Total elements: 4
# Values: 1, 2, 3, 4

# =============================================================================
# STEP 1: COMPUTING THE MEAN (AVERAGE)
# =============================================================================
# ENGLISH:
# - What it does: Calculates the arithmetic mean (average) of ALL elements
#   in the array
# - Syntax: np.mean(array, axis=None) - axis=None means compute over all elements
# - Formula: mean = (sum of all values) / (count of values)
# - Why compute mean: It represents the "center" of the data. We subtract
#   this to center the data around zero.
# - How it works:
#   * Sums all elements: 1 + 2 + 3 + 4 = 10
#   * Divides by count: 10 / 4 = 2.5
#   * Returns single scalar value
# - Common use cases:
#   * Data centering for normalization
#   * Computing average performance metrics
#   * Finding typical values in datasets
#   * Baseline for variance calculations
#   * Feature scaling in preprocessing
# - Mean computation options:
#   * np.mean(arr) - mean of all elements (what we use here)
#   * np.mean(arr, axis=0) - mean per column (feature-wise)
#   * np.mean(arr, axis=1) - mean per row (sample-wise)
# - Example:
#   # Global mean (all elements)
#   data = np.array([[10, 20], [30, 40]])
#   global_mean = np.mean(data)  # 25.0
#   
#   # Per-feature mean (column-wise)
#   X = np.array([[1, 10, 100],
#                 [2, 20, 200],
#                 [3, 30, 300]])
#   feature_means = np.mean(X, axis=0)  # [2., 20., 200.]
#   
#   # Per-sample mean (row-wise)
#   sample_means = np.mean(X, axis=1)  # [37., 74., 111.]
#
# HINGLISH:
# - Yeh kya karta hai: Array ke SAARE elements ka arithmetic mean (average)
#   calculate karta hai
# - Syntax: np.mean(array, axis=None) - axis=None matlab saare elements par compute
# - Formula: mean = (saari values ka sum) / (values ki count)
# - Mean kyun compute kare: Yeh data ke "center" ko represent karta hai. Hum
#   ise subtract karte hain taaki data zero ke around center ho jaye.
# - Kaise kaam karta hai:
#   * Saare elements ko sum karta hai: 1 + 2 + 3 + 4 = 10
#   * Count se divide karta hai: 10 / 4 = 2.5
#   * Single scalar value return karta hai
# - Common use cases:
#   * Normalization ke liye data centering
#   * Average performance metrics compute karna
#   * Datasets mein typical values dhoondhna
#   * Variance calculations ke liye baseline
#   * Preprocessing mein feature scaling
# - Mean computation options:
#   * np.mean(arr) - saare elements ka mean (yahan hum yeh use kar rahe hain)
#   * np.mean(arr, axis=0) - har column ka mean (feature-wise)
#   * np.mean(arr, axis=1) - har row ka mean (sample-wise)
# - Example:
#   # Global mean (saare elements)
#   data = np.array([[10, 20], [30, 40]])
#   global_mean = np.mean(data)  # 25.0
#   
#   # Per-feature mean (column-wise)
#   X = np.array([[1, 10, 100],
#                 [2, 20, 200],
#                 [3, 30, 300]])
#   feature_means = np.mean(X, axis=0)  # [2., 20., 200.]
#   
#   # Per-sample mean (row-wise)
#   sample_means = np.mean(X, axis=1)  # [37., 74., 111.]
# -----------------------------------------------------------------------------

mean = np.mean(arr)
# Calculation: (1 + 2 + 3 + 4) / 4 = 10 / 4 = 2.5

print(mean)
# Output: 2.5
# This is the center point of the data

# =============================================================================
# STEP 2: COMPUTING STANDARD DEVIATION
# =============================================================================
# ENGLISH:
# - What it does: Calculates how spread out the data is from the mean.
#   Measures the average distance of each point from the mean.
# - Syntax: np.std(array, axis=None, ddof=0)
# - Formula: σ = sqrt(mean((x - μ)²))
#   Step-by-step:
#   1. Subtract mean from each value: (x - μ)
#   2. Square each difference: (x - μ)²
#   3. Take mean of squared differences: mean((x - μ)²) = variance
#   4. Take square root: sqrt(variance) = standard deviation
# - Why compute std: It measures the "scale" of the data. We divide by this
#   to scale the data to unit variance (std = 1).
# - ddof parameter (Degrees of Freedom):
#   * ddof=0 (default): Population std (divide by N) - use for entire dataset
#   * ddof=1: Sample std (divide by N-1) - use for sample statistics
#   * For ML normalization, typically use ddof=0
# - How it works for our data:
#   Step 1: Differences from mean: [1-2.5, 2-2.5, 3-2.5, 4-2.5]
#                                 = [-1.5, -0.5, 0.5, 1.5]
#   Step 2: Square differences: [2.25, 0.25, 0.25, 2.25]
#   Step 3: Mean of squares: (2.25 + 0.25 + 0.25 + 2.25) / 4 = 5.0 / 4 = 1.25
#   Step 4: Square root: sqrt(1.25) ≈ 1.118
# - Interpretation:
#   * Small std (close to 0): Data points are close to mean (little variation)
#   * Large std: Data points are spread out (high variation)
#   * std = 0: All values are identical
# - Common use cases:
#   * Measuring data variability/spread
#   * Normalizing data to unit variance
#   * Detecting outliers (values > 3 std from mean)
#   * Comparing variability across features
#   * Risk assessment in finance
# - Example:
#   # Low variability
#   low_var = np.array([10, 10.5, 9.5, 10])
#   np.std(low_var)  # ≈ 0.35 - values close to mean
#   
#   # High variability
#   high_var = np.array([1, 50, 100, 200])
#   np.std(high_var)  # ≈ 74.4 - values spread out
#   
#   # Per-feature std (important for ML)
#   X = np.array([[1, 100],
#                 [2, 200],
#                 [3, 300]])
#   feature_stds = np.std(X, axis=0)  # [0.816, 81.65]
#   # Feature 2 has much higher variability
#
# HINGLISH:
# - Yeh kya karta hai: Calculate karta hai ki data mean se kitna spread out
#   hai. Har point ki mean se average distance measure karta hai.
# - Syntax: np.std(array, axis=None, ddof=0)
# - Formula: σ = sqrt(mean((x - μ)²))
#   Step-by-step:
#   1. Har value se mean subtract karo: (x - μ)
#   2. Har difference ko square karo: (x - μ)²
#   3. Squared differences ka mean lo: mean((x - μ)²) = variance
#   4. Square root lo: sqrt(variance) = standard deviation
# - Std kyun compute kare: Yeh data ke "scale" ko measure karta hai. Hum isse
#   divide karte hain taaki data unit variance (std = 1) tak scale ho jaye.
# - ddof parameter (Degrees of Freedom):
#   * ddof=0 (default): Population std (N se divide) - poore dataset ke liye use
#   * ddof=1: Sample std (N-1 se divide) - sample statistics ke liye use
#   * ML normalization ke liye, typically ddof=0 use karte hain
# - Hamare data ke liye kaise kaam karta hai:
#   Step 1: Mean se differences: [1-2.5, 2-2.5, 3-2.5, 4-2.5]
#                               = [-1.5, -0.5, 0.5, 1.5]
#   Step 2: Differences square karo: [2.25, 0.25, 0.25, 2.25]
#   Step 3: Squares ka mean: (2.25 + 0.25 + 0.25 + 2.25) / 4 = 5.0 / 4 = 1.25
#   Step 4: Square root: sqrt(1.25) ≈ 1.118
# - Interpretation:
#   * Chhota std (0 ke paas): Data points mean ke paas hain (kam variation)
#   * Bada std: Data points spread out hain (zyada variation)
#   * std = 0: Saari values same hain
# - Common use cases:
#   * Data variability/spread measure karna
#   * Data ko unit variance tak normalize karna
#   * Outliers detect karna (values > mean se 3 std)
#   * Features across variability compare karna
#   * Finance mein risk assessment
# - Example:
#   # Kam variability
#   low_var = np.array([10, 10.5, 9.5, 10])
#   np.std(low_var)  # ≈ 0.35 - values mean ke paas
#   
#   # Zyada variability
#   high_var = np.array([1, 50, 100, 200])
#   np.std(high_var)  # ≈ 74.4 - values spread out
#   
#   # Per-feature std (ML ke liye important)
#   X = np.array([[1, 100],
#                 [2, 200],
#                 [3, 300]])
#   feature_stds = np.std(X, axis=0)  # [0.816, 81.65]
#   # Feature 2 mein bahut zyada variability hai
# -----------------------------------------------------------------------------

std_dev = np.std(arr)
# Calculation:
# Step 1: [1-2.5, 2-2.5, 3-2.5, 4-2.5] = [-1.5, -0.5, 0.5, 1.5]
# Step 2: [2.25, 0.25, 0.25, 2.25]
# Step 3: (2.25 + 0.25 + 0.25 + 2.25) / 4 = 1.25
# Step 4: sqrt(1.25) ≈ 1.118

print(std_dev)
# Output: 1.118033988749895
# This tells us the average distance from the mean

# =============================================================================
# STEP 3: NORMALIZATION (Z-SCORE TRANSFORMATION)
# =============================================================================
# ENGLISH:
# - What it does: Transforms the data using the formula: z = (x - μ) / σ
#   This creates a new dataset where:
#   * Mean = 0 (centered)
#   * Standard deviation = 1 (unit variance)
# - Why this works: By subtracting mean and dividing by std, we:
#   1. Center the data around zero (subtract mean)
#   2. Scale to unit variance (divide by std)
# - Benefits of normalization:
#   * All features on same scale (prevents feature dominance)
#   * Faster gradient descent convergence
#   * Better performance for distance-based algorithms (KNN, K-Means)
#   * Prevents numerical instability
#   * Required for many ML algorithms (Neural Networks, SVM, PCA)
# - Vectorized operation: Uses broadcasting
#   * arr - mean: Subtracts scalar from entire array (broadcasting)
#   * (arr - mean) / std_dev: Divides entire array by scalar (broadcasting)
# - Interpretation of normalized values (z-scores):
#   * z = 0: Value is exactly at the mean
#   * z = 1: Value is 1 standard deviation above mean
#   * z = -1: Value is 1 standard deviation below mean
#   * z = 2: Value is 2 standard deviations above mean
#   * |z| > 3: Often considered an outlier
# - Common use cases:
#   * Preprocessing features for ML models
#   * Comparing features with different units (age vs salary)
#   * Neural network input normalization
#   * PCA (Principal Component Analysis)
#   * SVM kernel tricks
# - When NOT to use:
#   * Tree-based models (Random Forest, XGBoost) - they don't need it
#   * When preserving original scale is important
#   * When data is already on similar scales
# - Example:
#   # Feature scaling for ML
#   # Without normalization - BAD!
#   X = np.array([[25, 50000],      # Age, Salary
#                 [30, 60000],
#                 [35, 80000]])
#   # Salary dominates because of large values
#   
#   # With normalization - GOOD!
#   X_normalized = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
#   # Now both features have similar scale
#   
#   # Neural network input normalization
#   images = np.random.rand(1000, 28, 28)  # MNIST-like
#   # Pixel values in range [0, 1]
#   mean = np.mean(images)
#   std = np.std(images)
#   images_normalized = (images - mean) / std
#   # Now centered around 0 with std=1
#
# HINGLISH:
# - Yeh kya karta hai: Formula use karke data transform karta hai: z = (x - μ) / σ
#   Yeh naya dataset banata hai jahan:
#   * Mean = 0 (centered)
#   * Standard deviation = 1 (unit variance)
# - Yeh kyun kaam karta hai: Mean subtract karke aur std se divide karke, hum:
#   1. Data ko zero ke around center karte hain (mean subtract)
#   2. Unit variance tak scale karte hain (std se divide)
# - Normalization ke benefits:
#   * Saare features same scale par (feature dominance se bachata hai)
#   * Gradient descent zyada fast converge hota hai
#   * Distance-based algorithms ke liye better performance (KNN, K-Means)
#   * Numerical instability se bachata hai
#   * Bahut saare ML algorithms ke liye zaroori (Neural Networks, SVM, PCA)
# - Vectorized operation: Broadcasting use karta hai
#   * arr - mean: Poore array se scalar subtract (broadcasting)
#   * (arr - mean) / std_dev: Poore array ko scalar se divide (broadcasting)
# - Normalized values (z-scores) ka interpretation:
#   * z = 0: Value exactly mean par hai
#   * z = 1: Value mean se 1 standard deviation upar hai
#   * z = -1: Value mean se 1 standard deviation neeche hai
#   * z = 2: Value mean se 2 standard deviations upar hai
#   * |z| > 3: Aksar outlier consider kiya jata hai
# - Common use cases:
#   * ML models ke liye features preprocess karna
#   * Alag units wale features compare karna (age vs salary)
#   * Neural network input normalization
#   * PCA (Principal Component Analysis)
#   * SVM kernel tricks
# - Kab use NAHI karna:
#   * Tree-based models (Random Forest, XGBoost) - unhe zaroorat nahi
#   * Jab original scale preserve karna important ho
#   * Jab data pehle se similar scales par ho
# - Example:
#   # ML ke liye feature scaling
#   # Normalization ke bina - BURA!
#   X = np.array([[25, 50000],      # Age, Salary
#                 [30, 60000],
#                 [35, 80000]])
#   # Salary dominate karti hai badi values ki wajah se
#   
#   # Normalization ke saath - ACCHA!
#   X_normalized = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
#   # Ab dono features similar scale par hain
#   
#   # Neural network input normalization
#   images = np.random.rand(1000, 28, 28)  # MNIST-jaisa
#   # Pixel values range [0, 1] mein
#   mean = np.mean(images)
#   std = np.std(images)
#   images_normalized = (images - mean) / std
#   # Ab 0 ke around centered with std=1
# -----------------------------------------------------------------------------

normalized_arr = (arr - mean) / std_dev
# Calculation for each element:
# Element [0,0]: (1 - 2.5) / 1.118 = -1.5 / 1.118 ≈ -1.342
# Element [0,1]: (2 - 2.5) / 1.118 = -0.5 / 1.118 ≈ -0.447
# Element [1,0]: (3 - 2.5) / 1.118 = 0.5 / 1.118 ≈ 0.447
# Element [1,1]: (4 - 2.5) / 1.118 = 1.5 / 1.118 ≈ 1.342

print(normalized_arr)
# Output:
# [[-1.34164079 -0.4472136 ]
#  [ 0.4472136   1.34164079]]
#
# Interpretation:
# - Negative values: Below the original mean
# - Positive values: Above the original mean
# - Values near 0: Close to the original mean
# - Magnitude indicates distance from mean in units of std

# =============================================================================
# VERIFYING NORMALIZATION WORKED
# =============================================================================
# ENGLISH:
# After normalization, we can verify it worked correctly:
#
# # Check mean of normalized data (should be ≈ 0)
# normalized_mean = np.mean(normalized_arr)
# print(f"Mean of normalized data: {normalized_mean}")
# # Output: Mean of normalized data: 0.0 (or very close, like 1e-16)
#
# # Check std of normalized data (should be ≈ 1)
# normalized_std = np.std(normalized_arr)
# print(f"Std of normalized data: {normalized_std}")
# # Output: Std of normalized data: 1.0
#
# This confirms the normalization was successful!
#
# HINGLISH:
# Normalization ke baad, hum verify kar sakte hain ki yeh sahi kaam kiya:
#
# # Normalized data ka mean check karo (≈ 0 hona chahiye)
# normalized_mean = np.mean(normalized_arr)
# print(f"Mean of normalized data: {normalized_mean}")
# # Output: Mean of normalized data: 0.0 (ya bahut paas, jaise 1e-16)
#
# # Normalized data ka std check karo (≈ 1 hona chahiye)
# normalized_std = np.std(normalized_arr)
# print(f"Std of normalized data: {normalized_std}")
# # Output: Std of normalized data: 1.0
#
# Yeh confirm karta hai ki normalization successful raha!
# -----------------------------------------------------------------------------

# =============================================================================
# FEATURE-WISE VS GLOBAL NORMALIZATION
# =============================================================================
# ENGLISH:
# IMPORTANT: In ML, we typically normalize EACH FEATURE independently!
#
# # BAD: Global normalization (what we did above)
# X = np.array([[1, 100],
#               [2, 200],
#               [3, 300]])
# global_mean = np.mean(X)  # Mean across ALL values
# global_std = np.std(X)
# X_global_norm = (X - global_mean) / global_std
# # Problem: Treats all features together, doesn't account for different scales
#
# # GOOD: Feature-wise normalization (correct for ML)
# X = np.array([[1, 100],
#               [2, 200],
#               [3, 300]])
# feature_means = np.mean(X, axis=0)  # Mean per feature: [2, 200]
# feature_stds = np.std(X, axis=0)    # Std per feature: [0.816, 81.65]
# X_feature_norm = (X - feature_means) / feature_stds
# # Each feature normalized independently
# # [[(-1/0.816), (-100/81.65)],
# #  [(0/0.816), (0/81.65)],
# #  [(1/0.816), (100/81.65)]]
#
# Why feature-wise?
# - Each feature has its own distribution
# - Preserves relationships within each feature
# - Standard practice in ML preprocessing
#
# HINGLISH:
# IMPORTANT: ML mein, hum typically HAR FEATURE ko independently normalize karte hain!
#
# # BURA: Global normalization (jo humne upar kiya)
# X = np.array([[1, 100],
#               [2, 200],
#               [3, 300]])
# global_mean = np.mean(X)  # SAARI values ka mean
# global_std = np.std(X)
# X_global_norm = (X - global_mean) / global_std
# # Problem: Saare features ko ek saath treat karta hai, alag scales account nahi karta
#
# # ACCHA: Feature-wise normalization (ML ke liye sahi)
# X = np.array([[1, 100],
#               [2, 200],
#               [3, 300]])
# feature_means = np.mean(X, axis=0)  # Har feature ka mean: [2, 200]
# feature_stds = np.std(X, axis=0)    # Har feature ka std: [0.816, 81.65]
# X_feature_norm = (X - feature_means) / feature_stds
# # Har feature independently normalized
# # [[(-1/0.816), (-100/81.65)],
# #  [(0/0.816), (0/81.65)],
# #  [(1/0.816), (100/81.65)]]
#
# Feature-wise kyun?
# - Har feature ka apna distribution hota hai
# - Har feature ke andar relationships preserve karta hai
# - ML preprocessing mein standard practice hai
# =============================================================================

# =============================================================================
# OTHER NORMALIZATION TECHNIQUES
# =============================================================================
# ENGLISH:
# Besides Z-score normalization, there are other techniques:
#
# 1. MIN-MAX NORMALIZATION (Scaling to [0, 1])
#    Formula: x_scaled = (x - min) / (max - min)
#    data = np.array([1, 2, 3, 4, 5])
#    min_val = np.min(data)
#    max_val = np.max(data)
#    minmax_scaled = (data - min_val) / (max_val - min_val)
#    # Result: [0, 0.25, 0.5, 0.75, 1.0]
#    Use when: Need specific range [0,1] or [-1,1]
#
# 2. ROBUST SCALING (using median and IQR)
#    Formula: x_scaled = (x - median) / IQR
#    data = np.array([1, 2, 3, 4, 100])  # Has outlier
#    median = np.median(data)
#    q75, q25 = np.percentile(data, [75, 25])
#    iqr = q75 - q25
#    robust_scaled = (data - median) / iqr
#    Use when: Data has outliers (robust to outliers)
#
# 3. L2 NORMALIZATION (Unit norm)
#    Formula: x_normalized = x / ||x||₂
#    vector = np.array([3, 4])
#    l2_norm = np.sqrt(np.sum(vector ** 2))  # 5.0
#    unit_vector = vector / l2_norm  # [0.6, 0.8]
#    Use when: Need unit length vectors (text embeddings, cosine similarity)
#
# 4. LOG TRANSFORMATION
#    Formula: x_transformed = log(x + 1)
#    data = np.array([1, 10, 100, 1000])
#    log_transformed = np.log1p(data)  # log(x+1) to handle 0
#    Use when: Data is heavily skewed (income, populations)
#
# HINGLISH:
# Z-score normalization ke alawa, aur bhi techniques hain:
#
# 1. MIN-MAX NORMALIZATION ([0, 1] tak scaling)
#    Formula: x_scaled = (x - min) / (max - min)
#    data = np.array([1, 2, 3, 4, 5])
#    min_val = np.min(data)
#    max_val = np.max(data)
#    minmax_scaled = (data - min_val) / (max_val - min_val)
#    # Result: [0, 0.25, 0.5, 0.75, 1.0]
#    Kab use karein: Jab specific range [0,1] ya [-1,1] chahiye
#
# 2. ROBUST SCALING (median aur IQR use karke)
#    Formula: x_scaled = (x - median) / IQR
#    data = np.array([1, 2, 3, 4, 100])  # Outlier hai
#    median = np.median(data)
#    q75, q25 = np.percentile(data, [75, 25])
#    iqr = q75 - q25
#    robust_scaled = (data - median) / iqr
#    Kab use karein: Jab data mein outliers hain (outliers ke against robust)
#
# 3. L2 NORMALIZATION (Unit norm)
#    Formula: x_normalized = x / ||x||₂
#    vector = np.array([3, 4])
#    l2_norm = np.sqrt(np.sum(vector ** 2))  # 5.0
#    unit_vector = vector / l2_norm  # [0.6, 0.8]
#    Kab use karein: Jab unit length vectors chahiye (text embeddings, cosine similarity)
#
# 4. LOG TRANSFORMATION
#    Formula: x_transformed = log(x + 1)
#    data = np.array([1, 10, 100, 1000])
#    log_transformed = np.log1p(data)  # log(x+1) taaki 0 handle ho
#    Kab use karein: Jab data heavily skewed ho (income, populations)
# =============================================================================

# =============================================================================
# PRACTICAL ML EXAMPLE: COMPLETE PREPROCESSING PIPELINE
# =============================================================================
# ENGLISH:
# Real-world scenario: Preprocessing a dataset for neural network
#
# # Simulate dataset: 100 samples, 3 features
# np.random.seed(42)
# X_train = np.random.randn(100, 3) * [10, 100, 1000] + [50, 500, 5000]
# # Features have very different scales!
#
# # Step 1: Compute statistics on TRAINING data only
# train_means = np.mean(X_train, axis=0)
# train_stds = np.std(X_train, axis=0)
#
# # Step 2: Normalize training data
# X_train_normalized = (X_train - train_means) / train_stds
#
# # Step 3: Normalize test data using TRAINING statistics (CRITICAL!)
# X_test = np.random.randn(20, 3) * [10, 100, 1000] + [50, 500, 5000]
# X_test_normalized = (X_test - train_means) / train_stds
# # Use training mean/std, NOT test mean/std!
#
# # Step 4: Verify normalization
# print("Training data - Mean:", np.mean(X_train_normalized, axis=0))
# # Should be close to [0, 0, 0]
# print("Training data - Std:", np.std(X_train_normalized, axis=0))
# # Should be close to [1, 1, 1]
#
# WHY use training statistics for test data?
# - Prevents data leakage
# - Ensures same transformation as training
# - Models expect same scale as training
#
# HINGLISH:
# Real-world scenario: Neural network ke liye dataset preprocess karna
#
# # Dataset simulate karo: 100 samples, 3 features
# np.random.seed(42)
# X_train = np.random.randn(100, 3) * [10, 100, 1000] + [50, 500, 5000]
# # Features ki bahut alag scales hain!
#
# # Step 1: Sirf TRAINING data par statistics compute karo
# train_means = np.mean(X_train, axis=0)
# train_stds = np.std(X_train, axis=0)
#
# # Step 2: Training data normalize karo
# X_train_normalized = (X_train - train_means) / train_stds
#
# # Step 3: Test data ko TRAINING statistics use karke normalize karo (CRITICAL!)
# X_test = np.random.randn(20, 3) * [10, 100, 1000] + [50, 500, 5000]
# X_test_normalized = (X_test - train_means) / train_stds
# # Training mean/std use karo, test mean/std NAHI!
#
# # Step 4: Normalization verify karo
# print("Training data - Mean:", np.mean(X_train_normalized, axis=0))
# # [0, 0, 0] ke paas hona chahiye
# print("Training data - Std:", np.std(X_train_normalized, axis=0))
# # [1, 1, 1] ke paas hona chahiye
#
# Test data ke liye training statistics kyun use karein?
# - Data leakage se bachata hai
# - Training jaisa hi transformation ensure karta hai
# - Models training jaisi hi scale expect karte hain
# =============================================================================

2.5
1.118033988749895
[[-1.34164079 -0.4472136 ]
 [ 0.4472136   1.34164079]]


In [27]:
# =============================================================================
# NUMPY MATHEMATICAL FUNCTIONS - AGGREGATION OPERATIONS
# =============================================================================

# =============================================================================
# WHAT ARE AGGREGATION FUNCTIONS?
# =============================================================================
# ENGLISH:
# Aggregation functions reduce an array to a single value (or fewer values)
# by performing calculations across all elements or along specific axes.
# These are ESSENTIAL for:
# - Computing statistics on datasets
# - Loss function calculations in ML
# - Performance metrics evaluation
# - Data analysis and exploration
# - Feature engineering
#
# HINGLISH:
# Aggregation functions ek array ko single value (ya kam values) mein reduce
# karte hain saare elements par ya specific axes ke along calculations
# perform karke. Yeh zaroori hain:
# - Datasets par statistics compute karne ke liye
# - ML mein loss function calculations ke liye
# - Performance metrics evaluate karne ke liye
# - Data analysis aur exploration ke liye
# - Feature engineering ke liye
# -----------------------------------------------------------------------------

# =============================================================================
# CREATING SAMPLE ARRAY FOR DEMONSTRATIONS
# =============================================================================
# ENGLISH:
# - What it does: Creates a simple 1D array with 6 elements for demonstrating
#   various mathematical aggregation functions
# - Values: [1, 2, 3, 4, 5, 6] - sequential for easy verification
# - Why this example: Small, simple numbers make it easy to understand and
#   verify each operation's result
#
# HINGLISH:
# - Yeh kya karta hai: Various mathematical aggregation functions demonstrate
#   karne ke liye 6 elements wala simple 1D array banata hai
# - Values: [1, 2, 3, 4, 5, 6] - easy verification ke liye sequential
# - Yeh example kyun: Chhote, simple numbers har operation ka result samajhne
#   aur verify karne mein aasaan banate hain
# -----------------------------------------------------------------------------

arr = np.array([1, 2, 3, 4, 5, 6])

# =============================================================================
# FUNCTION 1: np.sum() - SUMMATION
# =============================================================================
# ENGLISH:
# - What it does: Computes the sum (total) of all elements in the array
# - Syntax: np.sum(array, axis=None)
# - Formula: sum = a₁ + a₂ + a₃ + ... + aₙ
# - Why use it: Fundamental operation for:
#   * Computing total loss across all samples
#   * Calculating total counts/amounts
#   * L1 regularization: sum(|weights|)
#   * Feature aggregation
#   * Batch processing totals
# - How it works:
#   * Adds all elements together
#   * Returns single scalar (if axis=None)
#   * Can work along specific axes for multi-dimensional arrays
# - Common use cases:
#   * Total loss: np.sum((y_pred - y_true) ** 2)
#   * Count occurrences: np.sum(arr == value)
#   * L1 norm: np.sum(np.abs(weights))
#   * Probability verification: np.sum(probabilities) should equal 1
#   * Batch totals: np.sum(batch_scores, axis=0)
# - Example:
#   # Counting elements meeting condition
#   data = np.array([1, 5, 3, 8, 2, 9])
#   count_above_5 = np.sum(data > 5)  # 2 (values 8 and 9)
#   
#   # Total loss calculation
#   predictions = np.array([0.8, 0.6, 0.9])
#   targets = np.array([1.0, 0.5, 1.0])
#   total_loss = np.sum((predictions - targets) ** 2)  # 0.14
#   
#   # Summing along axis
#   matrix = np.array([[1, 2, 3], [4, 5, 6]])
#   col_sums = np.sum(matrix, axis=0)  # [5, 7, 9] - sum each column
#   row_sums = np.sum(matrix, axis=1)  # [6, 15] - sum each row
#
# HINGLISH:
# - Yeh kya karta hai: Array ke saare elements ka sum (total) compute karta hai
# - Syntax: np.sum(array, axis=None)
# - Formula: sum = a₁ + a₂ + a₃ + ... + aₙ
# - Kab use karein: Fundamental operation hai:
#   * Saare samples across total loss compute karne ke liye
#   * Total counts/amounts calculate karne ke liye
#   * L1 regularization: sum(|weights|)
#   * Feature aggregation
#   * Batch processing totals
# - Kaise kaam karta hai:
#   * Saare elements ko ek saath add karta hai
#   * Single scalar return karta hai (agar axis=None)
#   * Multi-dimensional arrays ke liye specific axes ke along kaam kar sakta hai
# - Common use cases:
#   * Total loss: np.sum((y_pred - y_true) ** 2)
#   * Occurrences count karna: np.sum(arr == value)
#   * L1 norm: np.sum(np.abs(weights))
#   * Probability verification: np.sum(probabilities) 1 ke barabar hona chahiye
#   * Batch totals: np.sum(batch_scores, axis=0)
# - Example:
#   # Condition meet karne wale elements count karna
#   data = np.array([1, 5, 3, 8, 2, 9])
#   count_above_5 = np.sum(data > 5)  # 2 (values 8 aur 9)
#   
#   # Total loss calculation
#   predictions = np.array([0.8, 0.6, 0.9])
#   targets = np.array([1.0, 0.5, 1.0])
#   total_loss = np.sum((predictions - targets) ** 2)  # 0.14
#   
#   # Axis ke along sum karna
#   matrix = np.array([[1, 2, 3], [4, 5, 6]])
#   col_sums = np.sum(matrix, axis=0)  # [5, 7, 9] - har column ka sum
#   row_sums = np.sum(matrix, axis=1)  # [6, 15] - har row ka sum
# -----------------------------------------------------------------------------

print(np.sum(arr))
# Output: 21
# Calculation: 1 + 2 + 3 + 4 + 5 + 6 = 21

# =============================================================================
# FUNCTION 2: np.prod() - PRODUCT
# =============================================================================
# ENGLISH:
# - What it does: Computes the product (multiplication) of all elements
# - Syntax: np.prod(array, axis=None)
# - Formula: product = a₁ × a₂ × a₃ × ... × aₙ
# - Why use it: Less common than sum, but useful for:
#   * Calculating probabilities in naive Bayes: P(A∩B) = P(A) × P(B)
#   * Computing geometric mean: (prod(values))^(1/n)
#   * Factorial calculations: np.prod(np.arange(1, n+1))
#   * Dimensionality calculations: total_size = np.prod(shape)
#   * Compound growth: final = initial × np.prod(1 + rates)
# - Warning: 
#   * Can overflow quickly with large numbers
#   * Returns 0 if any element is 0
#   * May cause underflow with many small decimals
# - Common use cases:
#   * Independent probability: P(all) = P(1) × P(2) × P(3)
#   * Array size calculation: np.prod(arr.shape)
#   * Factorial: np.prod(range(1, n+1))
#   * Geometric mean: np.prod(arr) ** (1/len(arr))
#   * Scaling factors: total_scale = np.prod(scale_factors)
# - Example:
#   # Calculate factorial
#   n = 5
#   factorial_5 = np.prod(np.arange(1, n+1))  # 120
#   
#   # Independent probabilities
#   probs = np.array([0.9, 0.8, 0.95])  # Success rates
#   prob_all_succeed = np.prod(probs)  # 0.684
#   
#   # Array size from shape
#   shape = (10, 20, 30)
#   total_elements = np.prod(shape)  # 6000
#   
#   # Geometric mean
#   values = np.array([2, 8, 4])
#   geom_mean = np.prod(values) ** (1/len(values))  # 4.0
#
# HINGLISH:
# - Yeh kya karta hai: Saare elements ka product (multiplication) compute karta hai
# - Syntax: np.prod(array, axis=None)
# - Formula: product = a₁ × a₂ × a₃ × ... × aₙ
# - Kab use karein: Sum se kam common, lekin useful hai:
#   * Naive Bayes mein probabilities calculate karne: P(A∩B) = P(A) × P(B)
#   * Geometric mean compute karna: (prod(values))^(1/n)
#   * Factorial calculations: np.prod(np.arange(1, n+1))
#   * Dimensionality calculations: total_size = np.prod(shape)
#   * Compound growth: final = initial × np.prod(1 + rates)
# - Warning: 
#   * Bade numbers ke saath jaldi overflow ho sakta hai
#   * Agar koi element 0 hai to 0 return karta hai
#   * Bahut saare small decimals ke saath underflow ho sakta hai
# - Common use cases:
#   * Independent probability: P(all) = P(1) × P(2) × P(3)
#   * Array size calculation: np.prod(arr.shape)
#   * Factorial: np.prod(range(1, n+1))
#   * Geometric mean: np.prod(arr) ** (1/len(arr))
#   * Scaling factors: total_scale = np.prod(scale_factors)
# - Example:
#   # Factorial calculate karna
#   n = 5
#   factorial_5 = np.prod(np.arange(1, n+1))  # 120
#   
#   # Independent probabilities
#   probs = np.array([0.9, 0.8, 0.95])  # Success rates
#   prob_all_succeed = np.prod(probs)  # 0.684
#   
#   # Shape se array size
#   shape = (10, 20, 30)
#   total_elements = np.prod(shape)  # 6000
#   
#   # Geometric mean
#   values = np.array([2, 8, 4])
#   geom_mean = np.prod(values) ** (1/len(values))  # 4.0
# -----------------------------------------------------------------------------

print(np.prod(arr))
# Output: 720
# Calculation: 1 × 2 × 3 × 4 × 5 × 6 = 720

# =============================================================================
# FUNCTION 3: np.min() - MINIMUM VALUE
# =============================================================================
# ENGLISH:
# - What it does: Finds the smallest (minimum) value in the array
# - Syntax: np.min(array, axis=None)
# - Why use it: Essential for:
#   * Finding worst-case scenarios
#   * Min-max normalization: (x - min) / (max - min)
#   * Detecting lower bounds in data
#   * Validation checks (ensuring values above threshold)
#   * Clipping operations: np.clip(arr, min_val, max_val)
# - How it works:
#   * Compares all elements
#   * Returns the smallest value found
#   * Can work along specific axes
# - Common use cases:
#   * Min-max scaling: min_val = np.min(data)
#   * Finding lowest score/error
#   * Data validation: assert np.min(probabilities) >= 0
#   * Lower bound detection
#   * Finding minimum distance in clustering
# - Example:
#   # Min-max normalization
#   data = np.array([10, 20, 30, 40, 50])
#   min_val = np.min(data)  # 10
#   max_val = np.max(data)  # 50
#   normalized = (data - min_val) / (max_val - min_val)
#   # [0.0, 0.25, 0.5, 0.75, 1.0]
#   
#   # Finding worst performance
#   scores = np.array([0.85, 0.92, 0.78, 0.95])
#   worst_score = np.min(scores)  # 0.78
#   
#   # Per-feature minimum
#   X = np.array([[1, 10], [2, 5], [3, 15]])
#   feature_mins = np.min(X, axis=0)  # [1, 5]
#
# HINGLISH:
# - Yeh kya karta hai: Array mein sabse chhoti (minimum) value dhoondhta hai
# - Syntax: np.min(array, axis=None)
# - Kab use karein: Zaroori hai:
#   * Worst-case scenarios dhoondhne ke liye
#   * Min-max normalization: (x - min) / (max - min)
#   * Data mein lower bounds detect karne
#   * Validation checks (values threshold se upar ensure karna)
#   * Clipping operations: np.clip(arr, min_val, max_val)
# - Kaise kaam karta hai:
#   * Saare elements compare karta hai
#   * Sabse chhoti value return karta hai
#   * Specific axes ke along kaam kar sakta hai
# - Common use cases:
#   * Min-max scaling: min_val = np.min(data)
#   * Lowest score/error dhoondhna
#   * Data validation: assert np.min(probabilities) >= 0
#   * Lower bound detection
#   * Clustering mein minimum distance dhoondhna
# - Example:
#   # Min-max normalization
#   data = np.array([10, 20, 30, 40, 50])
#   min_val = np.min(data)  # 10
#   max_val = np.max(data)  # 50
#   normalized = (data - min_val) / (max_val - min_val)
#   # [0.0, 0.25, 0.5, 0.75, 1.0]
#   
#   # Worst performance dhoondhna
#   scores = np.array([0.85, 0.92, 0.78, 0.95])
#   worst_score = np.min(scores)  # 0.78
#   
#   # Har feature ka minimum
#   X = np.array([[1, 10], [2, 5], [3, 15]])
#   feature_mins = np.min(X, axis=0)  # [1, 5]
# -----------------------------------------------------------------------------

print(np.min(arr))
# Output: 1
# The smallest value in [1, 2, 3, 4, 5, 6] is 1

# =============================================================================
# FUNCTION 4: np.max() - MAXIMUM VALUE
# =============================================================================
# ENGLISH:
# - What it does: Finds the largest (maximum) value in the array
# - Syntax: np.max(array, axis=None)
# - Why use it: Essential for:
#   * Finding best-case scenarios
#   * Min-max normalization (upper bound)
#   * Detecting upper bounds/outliers
#   * Softmax computation: exp(x) / sum(exp(x))
#   * Gradient clipping: prevent exploding gradients
# - How it works:
#   * Compares all elements
#   * Returns the largest value found
#   * Can work along specific axes
# - Common use cases:
#   * Min-max scaling: max_val = np.max(data)
#   * Finding highest score/accuracy
#   * Confidence scores: max_prob = np.max(probabilities)
#   * Data range checking
#   * Peak detection in signals
# - Example:
#   # Finding best model
#   accuracies = np.array([0.85, 0.92, 0.88, 0.95])
#   best_accuracy = np.max(accuracies)  # 0.95
#   
#   # Softmax normalization (numerical stability)
#   logits = np.array([1.0, 2.0, 3.0])
#   max_logit = np.max(logits)  # 3.0
#   exp_logits = np.exp(logits - max_logit)  # Subtract max for stability
#   softmax = exp_logits / np.sum(exp_logits)
#   
#   # Per-class maximum
#   predictions = np.array([[0.7, 0.2, 0.1],
#                           [0.1, 0.8, 0.1],
#                           [0.2, 0.3, 0.5]])
#   max_per_sample = np.max(predictions, axis=1)  # [0.7, 0.8, 0.5]
#
# HINGLISH:
# - Yeh kya karta hai: Array mein sabse badi (maximum) value dhoondhta hai
# - Syntax: np.max(array, axis=None)
# - Kab use karein: Zaroori hai:
#   * Best-case scenarios dhoondhne ke liye
#   * Min-max normalization (upper bound)
#   * Upper bounds/outliers detect karne
#   * Softmax computation: exp(x) / sum(exp(x))
#   * Gradient clipping: exploding gradients se bachna
# - Kaise kaam karta hai:
#   * Saare elements compare karta hai
#   * Sabse badi value return karta hai
#   * Specific axes ke along kaam kar sakta hai
# - Common use cases:
#   * Min-max scaling: max_val = np.max(data)
#   * Highest score/accuracy dhoondhna
#   * Confidence scores: max_prob = np.max(probabilities)
#   * Data range checking
#   * Signals mein peak detection
# - Example:
#   # Best model dhoondhna
#   accuracies = np.array([0.85, 0.92, 0.88, 0.95])
#   best_accuracy = np.max(accuracies)  # 0.95
#   
#   # Softmax normalization (numerical stability)
#   logits = np.array([1.0, 2.0, 3.0])
#   max_logit = np.max(logits)  # 3.0
#   exp_logits = np.exp(logits - max_logit)  # Stability ke liye max subtract
#   softmax = exp_logits / np.sum(exp_logits)
#   
#   # Har class ka maximum
#   predictions = np.array([[0.7, 0.2, 0.1],
#                           [0.1, 0.8, 0.1],
#                           [0.2, 0.3, 0.5]])
#   max_per_sample = np.max(predictions, axis=1)  # [0.7, 0.8, 0.5]
# -----------------------------------------------------------------------------

print(np.max(arr))
# Output: 6
# The largest value in [1, 2, 3, 4, 5, 6] is 6

# =============================================================================
# FUNCTION 5: np.mean() - ARITHMETIC MEAN (AVERAGE)
# =============================================================================
# ENGLISH:
# - What it does: Computes the arithmetic mean (average) of all elements
# - Syntax: np.mean(array, axis=None)
# - Formula: mean = (sum of all values) / (count of values)
# - Why use it: MOST COMMONLY USED statistic for:
#   * Data centering in normalization
#   * Computing average performance
#   * Expected value calculations
#   * Baseline comparisons
#   * Loss function aggregation
# - How it works:
#   * Sums all elements
#   * Divides by number of elements
#   * Returns average value
# - Common use cases:
#   * Normalization: (data - mean) / std
#   * Average accuracy/loss
#   * Expected returns in finance
#   * Center of mass calculations
#   * Feature statistics
# - Example covered in normalization section above
#
# HINGLISH:
# - Yeh kya karta hai: Saare elements ka arithmetic mean (average) compute karta hai
# - Syntax: np.mean(array, axis=None)
# - Formula: mean = (saari values ka sum) / (values ki count)
# - Kab use karein: Statistics mein SABSE ZYADA use hone wala:
#   * Normalization mein data centering
#   * Average performance compute karna
#   * Expected value calculations
#   * Baseline comparisons
#   * Loss function aggregation
# - Kaise kaam karta hai:
#   * Saare elements ko sum karta hai
#   * Elements ki sankhya se divide karta hai
#   * Average value return karta hai
# - Common use cases:
#   * Normalization: (data - mean) / std
#   * Average accuracy/loss
#   * Finance mein expected returns
#   * Center of mass calculations
#   * Feature statistics
# - Example upar normalization section mein covered hai
# -----------------------------------------------------------------------------

print(np.mean(arr))
# Output: 3.5
# Calculation: (1 + 2 + 3 + 4 + 5 + 6) / 6 = 21 / 6 = 3.5

# =============================================================================
# FUNCTION 6: np.argmin() - INDEX OF MINIMUM VALUE
# =============================================================================
# ENGLISH:
# - What it does: Returns the INDEX (position) of the minimum value, not the
#   value itself
# - Syntax: np.argmin(array, axis=None)
# - Why use it: Essential for:
#   * Finding which sample/feature has lowest value
#   * Selecting best model (lowest loss)
#   * Identifying nearest neighbor in KNN
#   * Finding optimal hyperparameters
#   * Locating minimum distance
# - How it works:
#   * Finds minimum value
#   * Returns its position/index (0-based)
#   * If multiple minimums exist, returns first occurrence
# - Difference from np.min():
#   * np.min() → returns the VALUE (e.g., 1)
#   * np.argmin() → returns the INDEX (e.g., 0)
# - Common use cases:
#   * Best model selection: best_idx = np.argmin(losses)
#   * Nearest neighbor: nearest = np.argmin(distances)
#   * Optimal hyperparameter: best_lr_idx = np.argmin(validation_losses)
#   * Classification: predicted_class = np.argmin(distances_to_centers)
#   * Finding minimum along axis: np.argmin(matrix, axis=1)
# - Example:
#   # Finding best model by loss
#   losses = np.array([0.45, 0.32, 0.28, 0.35])
#   best_model_idx = np.argmin(losses)  # 2 (third model has lowest loss)
#   
#   # Nearest neighbor (KNN)
#   test_point = np.array([2.5, 3.5])
#   train_points = np.array([[1, 1], [2, 3], [4, 5]])
#   distances = np.sqrt(np.sum((train_points - test_point)**2, axis=1))
#   nearest_idx = np.argmin(distances)  # Index of closest point
#   
#   # Per-row minimum index
#   matrix = np.array([[5, 2, 8],
#                      [1, 9, 3],
#                      [7, 4, 6]])
#   min_indices = np.argmin(matrix, axis=1)  # [1, 0, 1]
#   # Row 0: index 1 (value 2), Row 1: index 0 (value 1), Row 2: index 1 (value 4)
#
# HINGLISH:
# - Yeh kya karta hai: Minimum value ka INDEX (position) return karta hai,
#   value khud nahi
# - Syntax: np.argmin(array, axis=None)
# - Kab use karein: Zaroori hai:
#   * Dhoondhne ke liye ki kaun sa sample/feature lowest value rakhta hai
#   * Best model select karna (lowest loss)
#   * KNN mein nearest neighbor identify karna
#   * Optimal hyperparameters dhoondhna
#   * Minimum distance locate karna
# - Kaise kaam karta hai:
#   * Minimum value dhoondhta hai
#   * Uska position/index return karta hai (0-based)
#   * Agar multiple minimums hain, to pehli occurrence return karta hai
# - np.min() se difference:
#   * np.min() → VALUE return karta hai (jaise 1)
#   * np.argmin() → INDEX return karta hai (jaise 0)
# - Common use cases:
#   * Best model selection: best_idx = np.argmin(losses)
#   * Nearest neighbor: nearest = np.argmin(distances)
#   * Optimal hyperparameter: best_lr_idx = np.argmin(validation_losses)
#   * Classification: predicted_class = np.argmin(distances_to_centers)
#   * Axis ke along minimum: np.argmin(matrix, axis=1)
# - Example:
#   # Loss se best model dhoondhna
#   losses = np.array([0.45, 0.32, 0.28, 0.35])
#   best_model_idx = np.argmin(losses)  # 2 (teesre model ka loss sabse kam)
#   
#   # Nearest neighbor (KNN)
#   test_point = np.array([2.5, 3.5])
#   train_points = np.array([[1, 1], [2, 3], [4, 5]])
#   distances = np.sqrt(np.sum((train_points - test_point)**2, axis=1))
#   nearest_idx = np.argmin(distances)  # Sabse paas point ka index
#   
#   # Har row ka minimum index
#   matrix = np.array([[5, 2, 8],
#                      [1, 9, 3],
#                      [7, 4, 6]])
#   min_indices = np.argmin(matrix, axis=1)  # [1, 0, 1]
#   # Row 0: index 1 (value 2), Row 1: index 0 (value 1), Row 2: index 1 (value 4)
# -----------------------------------------------------------------------------

print(np.argmin(arr))
# Output: 0
# The minimum value (1) is at index 0

# =============================================================================
# FUNCTION 7: np.argmax() - INDEX OF MAXIMUM VALUE
# =============================================================================
# ENGLISH:
# - What it does: Returns the INDEX (position) of the maximum value, not the
#   value itself
# - Syntax: np.argmax(array, axis=None)
# - Why use it: EXTREMELY IMPORTANT in ML for:
#   * Classification predictions: predicted_class = np.argmax(probabilities)
#   * Finding best performing model/epoch
#   * Selecting highest confidence prediction
#   * Winner-takes-all mechanisms
#   * One-hot encoding to labels
# - How it works:
#   * Finds maximum value
#   * Returns its position/index (0-based)
#   * If multiple maximums exist, returns first occurrence
# - Difference from np.max():
#   * np.max() → returns the VALUE (e.g., 6)
#   * np.argmax() → returns the INDEX (e.g., 5)
# - Common use cases:
#   * Classification: class = np.argmax(predictions, axis=1)
#   * Best epoch selection: best_epoch = np.argmax(val_accuracies)
#   * Converting softmax to labels
#   * Finding most confident prediction
#   * Attention mechanism: attend_to = np.argmax(attention_weights)
# - Example:
#   # Multi-class classification
#   predictions = np.array([[0.1, 0.7, 0.2],   # Sample 1
#                           [0.8, 0.1, 0.1],   # Sample 2
#                           [0.2, 0.3, 0.5]])  # Sample 3
#   predicted_classes = np.argmax(predictions, axis=1)
#   # [1, 0, 2] - Each sample's predicted class
#   
#   # Finding best training epoch
#   val_accs = np.array([0.75, 0.82, 0.88, 0.85, 0.87])
#   best_epoch = np.argmax(val_accs)  # 2 (third epoch has highest accuracy)
#   
#   # Attention mechanism
#   attention_scores = np.array([0.1, 0.6, 0.2, 0.1])
#   focus_on = np.argmax(attention_scores)  # 1 - attend to second element
#
# HINGLISH:
# - Yeh kya karta hai: Maximum value ka INDEX (position) return karta hai,
#   value khud nahi
# - Syntax: np.argmax(array, axis=None)
# - Kab use karein: ML mein BAHUT IMPORTANT:
#   * Classification predictions: predicted_class = np.argmax(probabilities)
#   * Best performing model/epoch dhoondhna
#   * Highest confidence prediction select karna
#   * Winner-takes-all mechanisms
#   * One-hot encoding ko labels mein convert karna
# - Kaise kaam karta hai:
#   * Maximum value dhoondhta hai
#   * Uska position/index return karta hai (0-based)
#   * Agar multiple maximums hain, to pehli occurrence return karta hai
# - np.max() se difference:
#   * np.max() → VALUE return karta hai (jaise 6)
#   * np.argmax() → INDEX return karta hai (jaise 5)
# - Common use cases:
#   * Classification: class = np.argmax(predictions, axis=1)
#   * Best epoch selection: best_epoch = np.argmax(val_accuracies)
#   * Softmax ko labels mein convert karna
#   * Sabse confident prediction dhoondhna
#   * Attention mechanism: attend_to = np.argmax(attention_weights)
# - Example:
#   # Multi-class classification
#   predictions = np.array([[0.1, 0.7, 0.2],   # Sample 1
#                           [0.8, 0.1, 0.1],   # Sample 2
#                           [0.2, 0.3, 0.5]])  # Sample 3
#   predicted_classes = np.argmax(predictions, axis=1)
#   # [1, 0, 2] - Har sample ki predicted class
#   
#   # Best training epoch dhoondhna
#   val_accs = np.array([0.75, 0.82, 0.88, 0.85, 0.87])
#   best_epoch = np.argmax(val_accs)  # 2 (teesre epoch ka accuracy sabse zyada)
#   
#   # Attention mechanism
#   attention_scores = np.array([0.1, 0.6, 0.2, 0.1])
#   focus_on = np.argmax(attention_scores)  # 1 - doosre element par attend
# -----------------------------------------------------------------------------

print(np.argmax(arr))
# Output: 5
# The maximum value (6) is at index 5 (last position)

# =============================================================================
# FUNCTION 8: np.std() - STANDARD DEVIATION
# =============================================================================
# ENGLISH:
# - Covered in detail in the normalization section above
# - Measures spread/variability of data
# - Formula: σ = sqrt(mean((x - μ)²))
# - Essential for normalization, outlier detection, and understanding data distribution
#
# HINGLISH:
# - Upar normalization section mein detail mein covered hai
# - Data ke spread/variability ko measure karta hai
# - Formula: σ = sqrt(mean((x - μ)²))
# - Normalization, outlier detection, aur data distribution samajhne ke liye essential
# -----------------------------------------------------------------------------

print(np.std(arr))
# Output: 1.707825127659933
# Calculation:
# Step 1: mean = 3.5
# Step 2: differences = [-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]
# Step 3: squared = [6.25, 2.25, 0.25, 0.25, 2.25, 6.25]
# Step 4: variance = 17.5 / 6 ≈ 2.917
# Step 5: std = sqrt(2.917) ≈ 1.708

# =============================================================================
# FUNCTION 9: np.median() - MEDIAN (MIDDLE VALUE)
# =============================================================================
# ENGLISH:
# - What it does: Finds the middle value when data is sorted. If even number
#   of elements, returns average of two middle values.
# - Syntax: np.median(array, axis=None)
# - Why use it: Better than mean for data with outliers:
#   * ROBUST to outliers (not affected by extreme values)
#   * Represents "typical" value better when data is skewed
#   * Used in robust statistics
#   * Better for income, house prices (skewed distributions)
# - How it works:
#   * Sorts the array
#   * If odd length: returns middle element
#   * If even length: returns average of two middle elements
# - Median vs Mean:
#   * Mean: Affected by outliers (e.g., [1,2,3,1000] → mean=251.5)
#   * Median: NOT affected by outliers (e.g., [1,2,3,1000] → median=2.5)
# - Common use cases:
#   * Income statistics (Bill Gates doesn't skew median)
#   * House price analysis
#   * Robust statistics in presence of outliers
#   * Percentile calculations (median = 50th percentile)
#   * Data with heavy tails
# - Example:
#   # With outlier - median is robust
#   salaries = np.array([30000, 35000, 40000, 42000, 1000000])
#   mean_salary = np.mean(salaries)    # 229,400 (skewed by outlier!)
#   median_salary = np.median(salaries)  # 40,000 (more representative)
#   
#   # Even vs odd length
#   odd_arr = np.array([1, 3, 5])
#   np.median(odd_arr)  # 3 (middle element)
#   
#   even_arr = np.array([1, 2, 3, 4])
#   np.median(even_arr)  # 2.5 (average of 2 and 3)
#   
#   # Along axis
#   matrix = np.array([[1, 10, 2], [5, 3, 8]])
#   col_medians = np.median(matrix, axis=0)  # [3, 6.5, 5]
#
# HINGLISH:
# - Yeh kya karta hai: Data sort karne par beech ki value dhoondhta hai. Agar
#   even number of elements hain, to do beech ki values ka average return karta hai.
# - Syntax: np.median(array, axis=None)
# - Kab use karein: Outliers wale data ke liye mean se better:
#   * Outliers se ROBUST (extreme values affect nahi karte)
#   * Jab data skewed ho to "typical" value better represent karta hai
#   * Robust statistics mein use hota hai
#   * Income, house prices ke liye better (skewed distributions)
# - Kaise kaam karta hai:
#   * Array ko sort karta hai
#   * Agar odd length: beech ka element return karta hai
#   * Agar even length: do beech ke elements ka average return karta hai
# - Median vs Mean:
#   * Mean: Outliers se affect hota hai (jaise [1,2,3,1000] → mean=251.5)
#   * Median: Outliers se affect NAHI hota (jaise [1,2,3,1000] → median=2.5)
# - Common use cases:
#   * Income statistics (Bill Gates median ko skew nahi karta)
#   * House price analysis
#   * Outliers ki presence mein robust statistics
#   * Percentile calculations (median = 50th percentile)
#   * Heavy tails wala data
# - Example:
#   # Outlier ke saath - median robust hai
#   salaries = np.array([30000, 35000, 40000, 42000, 1000000])
#   mean_salary = np.mean(salaries)    # 229,400 (outlier se skewed!)
#   median_salary = np.median(salaries)  # 40,000 (zyada representative)
#   
#   # Even vs odd length
#   odd_arr = np.array([1, 3, 5])
#   np.median(odd_arr)  # 3 (beech ka element)
#   
#   even_arr = np.array([1, 2, 3, 4])
#   np.median(even_arr)  # 2.5 (2 aur 3 ka average)
#   
#   # Axis ke along
#   matrix = np.array([[1, 10, 2], [5, 3, 8]])
#   col_medians = np.median(matrix, axis=0)  # [3, 6.5, 5]
# -----------------------------------------------------------------------------

print(np.median(arr))
# Output: 3.5
# For [1, 2, 3, 4, 5, 6]:
# - Even length (6 elements)
# - Two middle values: 3 and 4
# - Median = (3 + 4) / 2 = 3.5

# =============================================================================
# FUNCTION 10: np.var() - VARIANCE
# =============================================================================
# ENGLISH:
# - What it does: Computes the variance - the average of squared deviations
#   from the mean. Variance = (std)²
# - Syntax: np.var(array, axis=None, ddof=0)
# - Formula: variance = mean((x - μ)²)
# - Why use it:
#   * Measures how spread out data is (like std, but squared)
#   * Used in many statistical formulas
#   * Basis for standard deviation: std = sqrt(variance)
#   * Feature selection: high variance = informative feature
#   * ANOVA (Analysis of Variance)
# - Relationship to std:
#   * variance = std²
#   * std = sqrt(variance)
# - ddof parameter (Degrees of Freedom):
#   * ddof=0 (default): Population variance (divide by N)
#   * ddof=1: Sample variance (divide by N-1)
# - Units issue:
#   * Variance is in squared units (harder to interpret)
#   * Std is in original units (easier to interpret)
#   * Both measure same thing (spread)
# - Common use cases:
#   * ANOVA tests
#   * Feature selection (remove low-variance features)
#   * Calculating standard deviation
#   * Statistical hypothesis testing
#   * Quality control (process variation)
# - Example:
#   # Feature selection - remove low variance features
#   X = np.array([[1, 10, 100],
#                 [1, 20, 200],
#                 [1, 30, 300],
#                 [1, 40, 400]])
#   feature_vars = np.var(X, axis=0)  # [0, 125, 12500]
#   # Feature 0 has zero variance (constant) - can be removed!
#   
#   # Relationship to std
#   data = np.array([2, 4, 6, 8])
#   variance = np.var(data)  # 5.0
#   std = np.std(data)       # 2.236... = sqrt(5.0)
#   # Verify: std² = variance
#   np.std(data) ** 2  # 5.0 ✓
#
# HINGLISH:
# - Yeh kya karta hai: Variance compute karta hai - mean se squared deviations
#   ka average. Variance = (std)²
# - Syntax: np.var(array, axis=None, ddof=0)
# - Formula: variance = mean((x - μ)²)
# - Kab use karein:
#   * Data kitna spread out hai measure karna (std jaisa, lekin squared)
#   * Bahut saare statistical formulas mein use hota hai
#   * Standard deviation ki basis: std = sqrt(variance)
#   * Feature selection: high variance = informative feature
#   * ANOVA (Analysis of Variance)
# - Std ke saath relationship:
#   * variance = std²
#   * std = sqrt(variance)
# - ddof parameter (Degrees of Freedom):
#   * ddof=0 (default): Population variance (N se divide)
#   * ddof=1: Sample variance (N-1 se divide)
# - Units issue:
#   * Variance squared units mein hai (interpret karna mushkil)
#   * Std original units mein hai (interpret karna aasaan)
#   * Dono same cheez measure karte hain (spread)
# - Common use cases:
#   * ANOVA tests
#   * Feature selection (low-variance features remove karna)
#   * Standard deviation calculate karna
#   * Statistical hypothesis testing
#   * Quality control (process variation)
# - Example:
#   # Feature selection - low variance features remove karna
#   X = np.array([[1, 10, 100],
#                 [1, 20, 200],
#                 [1, 30, 300],
#                 [1, 40, 400]])
#   feature_vars = np.var(X, axis=0)  # [0, 125, 12500]
#   # Feature 0 ka zero variance hai (constant) - remove kar sakte hain!
#   
#   # Std ke saath relationship
#   data = np.array([2, 4, 6, 8])
#   variance = np.var(data)  # 5.0
#   std = np.std(data)       # 2.236... = sqrt(5.0)
#   # Verify: std² = variance
#   np.std(data) ** 2  # 5.0 ✓
# -----------------------------------------------------------------------------

print(np.var(arr))
# Output: 2.9166666666666665
# Calculation:
# Step 1: mean = 3.5
# Step 2: differences = [-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]
# Step 3: squared = [6.25, 2.25, 0.25, 0.25, 2.25, 6.25]
# Step 4: variance = (6.25 + 2.25 + 0.25 + 0.25 + 2.25 + 6.25) / 6
#                  = 17.5 / 6 ≈ 2.917
# Note: std² = 1.708² ≈ 2.917 ✓

# =============================================================================
# SUMMARY TABLE: ALL FUNCTIONS AT A GLANCE
# =============================================================================
# ENGLISH:
# Function    | Returns        | Use Case
# ------------|----------------|------------------------------------------
# np.sum()    | Sum            | Total, counting, L1 norm
# np.prod()   | Product        | Probabilities, factorial, geometric mean
# np.min()    | Min value      | Lower bound, worst case, min-max scaling
# np.max()    | Max value      | Upper bound, best case, min-max scaling
# np.mean()   | Average        | Normalization, expected value, baseline
# np.argmin() | Min index      | Best model (low loss), nearest neighbor
# np.argmax() | Max index      | Classification, best epoch, attention
# np.std()    | Std deviation  | Normalization, variability, outliers
# np.median() | Middle value   | Robust statistics, skewed data
# np.var()    | Variance       | Spread measure, feature selection, ANOVA
#
# HINGLISH:
# Function    | Returns        | Use Case
# ------------|----------------|------------------------------------------
# np.sum()    | Sum            | Total, counting, L1 norm
# np.prod()   | Product        | Probabilities, factorial, geometric mean
# np.min()    | Min value      | Lower bound, worst case, min-max scaling
# np.max()    | Max value      | Upper bound, best case, min-max scaling
# np.mean()   | Average        | Normalization, expected value, baseline
# np.argmin() | Min index      | Best model (kam loss), nearest neighbor
# np.argmax() | Max index      | Classification, best epoch, attention
# np.std()    | Std deviation  | Normalization, variability, outliers
# np.median() | Beech ki value | Robust statistics, skewed data
# np.var()    | Variance       | Spread measure, feature selection, ANOVA
# =============================================================================

21
720
1
6
3.5
0
5
1.707825127659933
3.5
2.9166666666666665


In [29]:
# =============================================================================
# NUMPY POWER AND MATHEMATICAL FUNCTIONS
# =============================================================================

# =============================================================================
# WHAT ARE POWER FUNCTIONS?
# =============================================================================
# ENGLISH:
# Power functions perform mathematical operations involving exponents, roots,
# and logarithms. These are FUNDAMENTAL in ML for:
# - Activation functions (sigmoid, tanh, ReLU variants)
# - Loss functions (log loss, cross-entropy)
# - Feature transformations (polynomial features, log transforms)
# - Gradient calculations (exponentials in backprop)
# - Probability distributions (softmax uses exp)
#
# HINGLISH:
# Power functions mathematical operations perform karte hain jo exponents,
# roots, aur logarithms se related hain. ML mein yeh FUNDAMENTAL hain:
# - Activation functions (sigmoid, tanh, ReLU variants)
# - Loss functions (log loss, cross-entropy)
# - Feature transformations (polynomial features, log transforms)
# - Gradient calculations (backprop mein exponentials)
# - Probability distributions (softmax exp use karta hai)
# -----------------------------------------------------------------------------

arr = np.array([1, 2, 3, 4, 5, 6])

# =============================================================================
# FUNCTION 1: np.square() - ELEMENT-WISE SQUARING
# =============================================================================
# ENGLISH:
# - What it does: Squares each element (raises to power 2). Equivalent to
#   arr ** 2 but more explicit and readable.
# - Syntax: np.square(array)
# - Formula: square(x) = x²
# - Why use it: Common in ML for:
#   * Mean Squared Error (MSE): np.mean(np.square(y_pred - y_true))
#   * L2 regularization: lambda * np.sum(np.square(weights))
#   * Euclidean distance: np.sqrt(np.sum(np.square(x - y)))
#   * Variance calculation: np.mean(np.square(x - mean))
#   * Polynomial features: creating x² features
# - Performance: Slightly faster than ** 2 (optimized operation)
# - Common use cases:
#   * MSE loss: mse = np.mean(np.square(predictions - targets))
#   * L2 norm squared: l2_squared = np.sum(np.square(vector))
#   * Distance calculations: dist² = np.sum(np.square(p1 - p2))
#   * Ridge regression penalty: penalty = alpha * np.sum(np.square(weights))
#   * Creating polynomial features for regression
# - Example:
#   # Mean Squared Error
#   y_true = np.array([3.0, 5.0, 2.0, 7.0])
#   y_pred = np.array([2.5, 5.5, 2.0, 6.8])
#   mse = np.mean(np.square(y_pred - y_true))
#   # [(0.5)², (0.5)², (0)², (0.2)²] → mean → 0.135
#   
#   # L2 regularization term
#   weights = np.array([0.5, -0.3, 0.8, -0.2])
#   l2_penalty = 0.01 * np.sum(np.square(weights))
#   # 0.01 * (0.25 + 0.09 + 0.64 + 0.04) = 0.0102
#   
#   # Euclidean distance
#   point1 = np.array([1, 2, 3])
#   point2 = np.array([4, 6, 8])
#   distance = np.sqrt(np.sum(np.square(point1 - point2)))
#   # sqrt((3)² + (4)² + (5)²) = sqrt(50) ≈ 7.07
#
# HINGLISH:
# - Yeh kya karta hai: Har element ko square karta hai (power 2 tak raise).
#   arr ** 2 ke equivalent hai lekin zyada explicit aur readable.
# - Syntax: np.square(array)
# - Formula: square(x) = x²
# - Kab use karein: ML mein common hai:
#   * Mean Squared Error (MSE): np.mean(np.square(y_pred - y_true))
#   * L2 regularization: lambda * np.sum(np.square(weights))
#   * Euclidean distance: np.sqrt(np.sum(np.square(x - y)))
#   * Variance calculation: np.mean(np.square(x - mean))
#   * Polynomial features: x² features banana
# - Performance: ** 2 se thoda faster (optimized operation)
# - Common use cases:
#   * MSE loss: mse = np.mean(np.square(predictions - targets))
#   * L2 norm squared: l2_squared = np.sum(np.square(vector))
#   * Distance calculations: dist² = np.sum(np.square(p1 - p2))
#   * Ridge regression penalty: penalty = alpha * np.sum(np.square(weights))
#   * Regression ke liye polynomial features banana
# - Example:
#   # Mean Squared Error
#   y_true = np.array([3.0, 5.0, 2.0, 7.0])
#   y_pred = np.array([2.5, 5.5, 2.0, 6.8])
#   mse = np.mean(np.square(y_pred - y_true))
#   # [(0.5)², (0.5)², (0)², (0.2)²] → mean → 0.135
#   
#   # L2 regularization term
#   weights = np.array([0.5, -0.3, 0.8, -0.2])
#   l2_penalty = 0.01 * np.sum(np.square(weights))
#   # 0.01 * (0.25 + 0.09 + 0.64 + 0.04) = 0.0102
#   
#   # Euclidean distance
#   point1 = np.array([1, 2, 3])
#   point2 = np.array([4, 6, 8])
#   distance = np.sqrt(np.sum(np.square(point1 - point2)))
#   # sqrt((3)² + (4)² + (5)²) = sqrt(50) ≈ 7.07
# -----------------------------------------------------------------------------

print(np.square(arr))
# Output: [ 1  4  9 16 25 36]
# Calculation: [1², 2², 3², 4², 5², 6²]

# =============================================================================
# FUNCTION 2: np.sqrt() - SQUARE ROOT
# =============================================================================
# ENGLISH:
# - What it does: Computes the square root of each element. Equivalent to
#   arr ** 0.5 but more readable and optimized.
# - Syntax: np.sqrt(array)
# - Formula: sqrt(x) = √x = x^(1/2)
# - Why use it: Essential for:
#   * Euclidean distance: dist = np.sqrt(np.sum((x - y)**2))
#   * Standard deviation: std = np.sqrt(variance)
#   * RMSE (Root Mean Squared Error): np.sqrt(mse)
#   * L2 norm: np.sqrt(np.sum(x**2))
#   * Normalizing vectors to unit length
# - Important notes:
#   * Returns NaN for negative numbers (use np.sqrt(np.abs(x)) if needed)
#   * Always returns float, even for perfect squares
#   * Element-wise operation (vectorized)
# - Common use cases:
#   * RMSE: rmse = np.sqrt(np.mean((y_pred - y_true)**2))
#   * Euclidean norm: norm = np.sqrt(np.sum(vector**2))
#   * Standard deviation from variance: std = np.sqrt(var)
#   * Normalizing embeddings: normalized = vec / np.sqrt(np.sum(vec**2))
#   * Distance metrics in clustering/KNN
# - Example:
#   # Root Mean Squared Error
#   errors = np.array([0.5, -0.3, 0.2, -0.1])
#   mse = np.mean(np.square(errors))  # 0.0975
#   rmse = np.sqrt(mse)  # 0.312
#   
#   # L2 normalization (unit vector)
#   vector = np.array([3, 4])
#   norm = np.sqrt(np.sum(np.square(vector)))  # 5.0
#   unit_vector = vector / norm  # [0.6, 0.8]
#   
#   # Standard deviation from variance
#   data = np.array([2, 4, 6, 8])
#   variance = np.var(data)  # 5.0
#   std = np.sqrt(variance)  # 2.236...
#   # Verify:
#   np.std(data)  # 2.236... ✓
#
# HINGLISH:
# - Yeh kya karta hai: Har element ka square root compute karta hai.
#   arr ** 0.5 ke equivalent hai lekin zyada readable aur optimized.
# - Syntax: np.sqrt(array)
# - Formula: sqrt(x) = √x = x^(1/2)
# - Kab use karein: Zaroori hai:
#   * Euclidean distance: dist = np.sqrt(np.sum((x - y)**2))
#   * Standard deviation: std = np.sqrt(variance)
#   * RMSE (Root Mean Squared Error): np.sqrt(mse)
#   * L2 norm: np.sqrt(np.sum(x**2))
#   * Vectors ko unit length tak normalize karna
# - Important notes:
#   * Negative numbers ke liye NaN return karta hai (zaroorat ho to np.sqrt(np.abs(x)))
#   * Hamesha float return karta hai, perfect squares ke liye bhi
#   * Element-wise operation (vectorized)
# - Common use cases:
#   * RMSE: rmse = np.sqrt(np.mean((y_pred - y_true)**2))
#   * Euclidean norm: norm = np.sqrt(np.sum(vector**2))
#   * Variance se standard deviation: std = np.sqrt(var)
#   * Embeddings normalize karna: normalized = vec / np.sqrt(np.sum(vec**2))
#   * Clustering/KNN mein distance metrics
# - Example:
#   # Root Mean Squared Error
#   errors = np.array([0.5, -0.3, 0.2, -0.1])
#   mse = np.mean(np.square(errors))  # 0.0975
#   rmse = np.sqrt(mse)  # 0.312
#   
#   # L2 normalization (unit vector)
#   vector = np.array([3, 4])
#   norm = np.sqrt(np.sum(np.square(vector)))  # 5.0
#   unit_vector = vector / norm  # [0.6, 0.8]
#   
#   # Variance se standard deviation
#   data = np.array([2, 4, 6, 8])
#   variance = np.var(data)  # 5.0
#   std = np.sqrt(variance)  # 2.236...
#   # Verify:
#   np.std(data)  # 2.236... ✓
# -----------------------------------------------------------------------------

print(np.sqrt(arr))
# Output: [1.         1.41421356 1.73205081 2.         2.23606798 2.44948975]
# Calculation: [√1, √2, √3, √4, √5, √6]

# =============================================================================
# FUNCTION 3: np.power() - ELEMENT-WISE POWER
# =============================================================================
# ENGLISH:
# - What it does: Raises first array elements to powers specified in second
#   array (or scalar). More flexible than ** operator.
# - Syntax: np.power(base, exponent)
# - Formula: power(x, n) = x^n
# - Why use it: Flexible exponentiation for:
#   * Polynomial features: x, x², x³, x⁴
#   * Custom transformations
#   * Element-wise different powers
#   * Box-Cox transformations
# - Advantages over **:
#   * Can take two arrays (element-wise powers)
#   * More explicit in code
#   * Better for broadcasting scenarios
# - Common use cases:
#   * Polynomial features: np.power(X, [1, 2, 3])
#   * Custom scaling: np.power(data, 0.5)  # Same as sqrt
#   * Box-Cox transform: (x^λ - 1) / λ
#   * Nonlinear transformations
# - Example:
#   # Polynomial feature creation
#   X = np.array([2, 3, 4])
#   X_poly = np.column_stack([
#       np.power(X, 1),  # [2, 3, 4]
#       np.power(X, 2),  # [4, 9, 16]
#       np.power(X, 3)   # [8, 27, 64]
#   ])
#   
#   # Element-wise different powers
#   bases = np.array([2, 3, 4, 5])
#   exponents = np.array([1, 2, 3, 4])
#   results = np.power(bases, exponents)
#   # [2^1, 3^2, 4^3, 5^4] = [2, 9, 64, 625]
#   
#   # Fractional powers (roots)
#   data = np.array([4, 9, 16, 25])
#   square_roots = np.power(data, 0.5)  # [2, 3, 4, 5]
#   cube_roots = np.power(data, 1/3)    # [1.587, 2.080, 2.520, 2.924]
#
# HINGLISH:
# - Yeh kya karta hai: Pehle array ke elements ko doosre array (ya scalar)
#   mein specify kiye powers tak raise karta hai. ** operator se zyada flexible.
# - Syntax: np.power(base, exponent)
# - Formula: power(x, n) = x^n
# - Kab use karein: Flexible exponentiation ke liye:
#   * Polynomial features: x, x², x³, x⁴
#   * Custom transformations
#   * Element-wise alag powers
#   * Box-Cox transformations
# - ** se advantages:
#   * Do arrays le sakta hai (element-wise powers)
#   * Code mein zyada explicit
#   * Broadcasting scenarios ke liye better
# - Common use cases:
#   * Polynomial features: np.power(X, [1, 2, 3])
#   * Custom scaling: np.power(data, 0.5)  # sqrt jaisa
#   * Box-Cox transform: (x^λ - 1) / λ
#   * Nonlinear transformations
# - Example:
#   # Polynomial feature creation
#   X = np.array([2, 3, 4])
#   X_poly = np.column_stack([
#       np.power(X, 1),  # [2, 3, 4]
#       np.power(X, 2),  # [4, 9, 16]
#       np.power(X, 3)   # [8, 27, 64]
#   ])
#   
#   # Element-wise alag powers
#   bases = np.array([2, 3, 4, 5])
#   exponents = np.array([1, 2, 3, 4])
#   results = np.power(bases, exponents)
#   # [2^1, 3^2, 4^3, 5^4] = [2, 9, 64, 625]
#   
#   # Fractional powers (roots)
#   data = np.array([4, 9, 16, 25])
#   square_roots = np.power(data, 0.5)  # [2, 3, 4, 5]
#   cube_roots = np.power(data, 1/3)    # [1.587, 2.080, 2.520, 2.924]
# -----------------------------------------------------------------------------

print(np.power(arr, 2))
# Output: [ 1  4  9 16 25 36]
# Same as np.square(arr)

print(np.power(arr, 3))
# Output: [  1   8  27  64 125 216]
# Cubes: [1³, 2³, 3³, 4³, 5³, 6³]

# =============================================================================
# FUNCTION 4: np.exp() - EXPONENTIAL (e^x)
# =============================================================================
# ENGLISH:
# - What it does: Computes e^x (Euler's number raised to power x) for each
#   element. e ≈ 2.71828...
# - Syntax: np.exp(array)
# - Formula: exp(x) = e^x
# - Why use it: EXTREMELY IMPORTANT in ML for:
#   * Sigmoid activation: σ(x) = 1 / (1 + e^(-x))
#   * Softmax function: exp(x) / sum(exp(x))
#   * Log-likelihood calculations
#   * Exponential distributions
#   * Gradient calculations in backpropagation
# - Warning: Can overflow for large values (e^100 is huge!)
# - Numerical stability trick: For softmax, subtract max before exp
# - Common use cases:
#   * Sigmoid: 1 / (1 + np.exp(-x))
#   * Softmax: np.exp(x) / np.sum(np.exp(x))
#   * Log probability to probability: np.exp(log_prob)
#   * Exponential decay: value * np.exp(-decay_rate * time)
#   * Gaussian distributions: np.exp(-(x-μ)²/(2σ²))
# - Example:
#   # Sigmoid activation function
#   z = np.array([-2, -1, 0, 1, 2])
#   sigmoid = 1 / (1 + np.exp(-z))
#   # [0.119, 0.269, 0.5, 0.731, 0.881]
#   
#   # Softmax (with numerical stability)
#   logits = np.array([1.0, 2.0, 3.0])
#   max_logit = np.max(logits)
#   exp_logits = np.exp(logits - max_logit)  # Subtract max for stability
#   softmax = exp_logits / np.sum(exp_logits)
#   # [0.090, 0.245, 0.665]
#   
#   # Converting log probabilities
#   log_probs = np.array([-0.5, -1.0, -1.5])
#   probs = np.exp(log_probs)
#   # [0.607, 0.368, 0.223]
#   
#   # Exponential decay (learning rate scheduling)
#   initial_lr = 0.1
#   decay_rate = 0.05
#   epochs = np.arange(10)
#   lr_schedule = initial_lr * np.exp(-decay_rate * epochs)
#
# HINGLISH:
# - Yeh kya karta hai: Har element ke liye e^x compute karta hai (Euler's
#   number ko power x tak raise). e ≈ 2.71828...
# - Syntax: np.exp(array)
# - Formula: exp(x) = e^x
# - Kab use karein: ML mein BAHUT IMPORTANT:
#   * Sigmoid activation: σ(x) = 1 / (1 + e^(-x))
#   * Softmax function: exp(x) / sum(exp(x))
#   * Log-likelihood calculations
#   * Exponential distributions
#   * Backpropagation mein gradient calculations
# - Warning: Badi values ke liye overflow ho sakta hai (e^100 bahut bada!)
# - Numerical stability trick: Softmax ke liye exp se pehle max subtract karo
# - Common use cases:
#   * Sigmoid: 1 / (1 + np.exp(-x))
#   * Softmax: np.exp(x) / np.sum(np.exp(x))
#   * Log probability ko probability: np.exp(log_prob)
#   * Exponential decay: value * np.exp(-decay_rate * time)
#   * Gaussian distributions: np.exp(-(x-μ)²/(2σ²))
# - Example:
#   # Sigmoid activation function
#   z = np.array([-2, -1, 0, 1, 2])
#   sigmoid = 1 / (1 + np.exp(-z))
#   # [0.119, 0.269, 0.5, 0.731, 0.881]
#   
#   # Softmax (numerical stability ke saath)
#   logits = np.array([1.0, 2.0, 3.0])
#   max_logit = np.max(logits)
#   exp_logits = np.exp(logits - max_logit)  # Stability ke liye max subtract
#   softmax = exp_logits / np.sum(exp_logits)
#   # [0.090, 0.245, 0.665]
#   
#   # Log probabilities convert karna
#   log_probs = np.array([-0.5, -1.0, -1.5])
#   probs = np.exp(log_probs)
#   # [0.607, 0.368, 0.223]
#   
#   # Exponential decay (learning rate scheduling)
#   initial_lr = 0.1
#   decay_rate = 0.05
#   epochs = np.arange(10)
#   lr_schedule = initial_lr * np.exp(-decay_rate * epochs)
# -----------------------------------------------------------------------------

print(np.exp(arr))
# Output: [  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591  403.42879349]
# Calculation: [e^1, e^2, e^3, e^4, e^5, e^6]

# =============================================================================
# FUNCTION 5: np.log() - NATURAL LOGARITHM (ln)
# =============================================================================
# ENGLISH:
# - What it does: Computes natural logarithm (base e) of each element.
#   Inverse of np.exp()
# - Syntax: np.log(array)
# - Formula: log(x) = ln(x) = log_e(x)
# - Why use it: Critical in ML for:
#   * Cross-entropy loss: -sum(y * log(p))
#   * Log-likelihood in probabilistic models
#   * Log transformations for skewed data
#   * Information theory (entropy, KL divergence)
#   * Numerical stability (log-sum-exp trick)
# - Important notes:
#   * Only defined for positive numbers (x > 0)
#   * Returns -inf for 0, NaN for negative numbers
#   * Converts multiplication to addition: log(ab) = log(a) + log(b)
# - Common use cases:
#   * Binary cross-entropy: -y*log(p) - (1-y)*log(1-p)
#   * Log-likelihood: sum(log(probabilities))
#   * Log transform skewed features: np.log(income + 1)
#   * KL divergence: sum(p * log(p/q))
#   * Numerical stability in products of small numbers
# - Example:
#   # Binary cross-entropy loss
#   y_true = np.array([1, 0, 1, 1])
#   y_pred = np.array([0.9, 0.1, 0.8, 0.7])
#   bce = -np.mean(y_true * np.log(y_pred) + (1-y_true) * np.log(1-y_pred))
#   # 0.178
#   
#   # Log transformation for skewed data
#   income = np.array([30000, 50000, 100000, 500000, 1000000])
#   log_income = np.log(income)  # Reduces skewness
#   
#   # Converting probabilities to log space
#   probs = np.array([0.5, 0.3, 0.2])
#   log_probs = np.log(probs)
#   # Product in original space = sum in log space
#   product = np.prod(probs)  # 0.03
#   log_sum = np.sum(log_probs)  # -3.507
#   np.exp(log_sum)  # 0.03 ✓
#
# HINGLISH:
# - Yeh kya karta hai: Har element ka natural logarithm (base e) compute
#   karta hai. np.exp() ka inverse.
# - Syntax: np.log(array)
# - Formula: log(x) = ln(x) = log_e(x)
# - Kab use karein: ML mein critical:
#   * Cross-entropy loss: -sum(y * log(p))
#   * Probabilistic models mein log-likelihood
#   * Skewed data ke liye log transformations
#   * Information theory (entropy, KL divergence)
#   * Numerical stability (log-sum-exp trick)
# - Important notes:
#   * Sirf positive numbers ke liye defined (x > 0)
#   * 0 ke liye -inf return karta hai, negative numbers ke liye NaN
#   * Multiplication ko addition mein convert: log(ab) = log(a) + log(b)
# - Common use cases:
#   * Binary cross-entropy: -y*log(p) - (1-y)*log(1-p)
#   * Log-likelihood: sum(log(probabilities))
#   * Skewed features ko log transform: np.log(income + 1)
#   * KL divergence: sum(p * log(p/q))
#   * Chote numbers ke products mein numerical stability
# - Example:
#   # Binary cross-entropy loss
#   y_true = np.array([1, 0, 1, 1])
#   y_pred = np.array([0.9, 0.1, 0.8, 0.7])
#   bce = -np.mean(y_true * np.log(y_pred) + (1-y_true) * np.log(1-y_pred))
#   # 0.178
#   
#   # Skewed data ke liye log transformation
#   income = np.array([30000, 50000, 100000, 500000, 1000000])
#   log_income = np.log(income)  # Skewness kam karta hai
#   
#   # Probabilities ko log space mein convert karna
#   probs = np.array([0.5, 0.3, 0.2])
#   log_probs = np.log(probs)
#   # Original space mein product = log space mein sum
#   product = np.prod(probs)  # 0.03
#   log_sum = np.sum(log_probs)  # -3.507
#   np.exp(log_sum)  # 0.03 ✓
# -----------------------------------------------------------------------------

print(np.log(arr))
# Output: [0.         0.69314718 1.09861229 1.38629436 1.60943791 1.79175947]
# Calculation: [ln(1), ln(2), ln(3), ln(4), ln(5), ln(6)]

# =============================================================================
# FUNCTION 6: np.log10() - BASE-10 LOGARITHM
# =============================================================================
# ENGLISH:
# - What it does: Computes logarithm base 10 of each element
# - Syntax: np.log10(array)
# - Formula: log10(x) = log₁₀(x)
# - Why use it: Useful for:
#   * Measuring orders of magnitude
#   * Scientific data (pH, decibels, Richter scale)
#   * Converting to log scale for visualization
#   * Power law relationships
# - Relationship to ln: log10(x) = ln(x) / ln(10)
# - Common use cases:
#   * pH calculations
#   * Decibel scale (sound intensity)
#   * Earthquake magnitude
#   * Log-scale plots
# - Example:
#   # Orders of magnitude
#   values = np.array([1, 10, 100, 1000, 10000])
#   log10_vals = np.log10(values)  # [0, 1, 2, 3, 4]
#   
#   # Converting between natural and base-10 log
#   x = 100
#   ln_x = np.log(x)      # 4.605
#   log10_x = np.log10(x) # 2.0
#   # Verify: ln(x) = log10(x) * ln(10)
#   log10_x * np.log(10)  # 4.605 ✓
#
# HINGLISH:
# - Yeh kya karta hai: Har element ka base 10 logarithm compute karta hai
# - Syntax: np.log10(array)
# - Formula: log10(x) = log₁₀(x)
# - Kab use karein: Useful hai:
#   * Orders of magnitude measure karne ke liye
#   * Scientific data (pH, decibels, Richter scale)
#   * Visualization ke liye log scale mein convert karna
#   * Power law relationships
# - ln ke saath relationship: log10(x) = ln(x) / ln(10)
# - Common use cases:
#   * pH calculations
#   * Decibel scale (sound intensity)
#   * Earthquake magnitude
#   * Log-scale plots
# - Example:
#   # Orders of magnitude
#   values = np.array([1, 10, 100, 1000, 10000])
#   log10_vals = np.log10(values)  # [0, 1, 2, 3, 4]
#   
#   # Natural aur base-10 log ke beech convert karna
#   x = 100
#   ln_x = np.log(x)      # 4.605
#   log10_x = np.log10(x) # 2.0
#   # Verify: ln(x) = log10(x) * ln(10)
#   log10_x * np.log(10)  # 4.605 ✓
# -----------------------------------------------------------------------------

print(np.log10(arr))
# Output: [0.         0.30103    0.47712125 0.60205999 0.69897    0.77815125]
# Calculation: [log₁₀(1), log₁₀(2), log₁₀(3), log₁₀(4), log₁₀(5), log₁₀(6)]

# =============================================================================
# FUNCTION 7: np.log2() - BASE-2 LOGARITHM
# =============================================================================
# ENGLISH:
# - What it does: Computes logarithm base 2 of each element
# - Syntax: np.log2(array)
# - Formula: log2(x) = log₂(x)
# - Why use it: Important in:
#   * Information theory (entropy, bits)
#   * Computer science (binary search complexity)
#   * Image processing (bit depth)
#   * Binary trees (depth calculations)
# - Common use cases:
#   * Entropy: -sum(p * log2(p))
#   * Bit requirements: log2(n) bits to represent n values
#   * Binary search complexity: O(log₂(n))
# - Example:
#   # Information entropy
#   probs = np.array([0.5, 0.25, 0.25])
#   entropy = -np.sum(probs * np.log2(probs))  # 1.5 bits
#   
#   # Bits needed to represent values
#   n_classes = 256
#   bits_needed = np.log2(n_classes)  # 8 bits
#
# HINGLISH:
# - Yeh kya karta hai: Har element ka base 2 logarithm compute karta hai
# - Syntax: np.log2(array)
# - Formula: log2(x) = log₂(x)
# - Kab use karein: Important hai:
#   * Information theory (entropy, bits)
#   * Computer science (binary search complexity)
#   * Image processing (bit depth)
#   * Binary trees (depth calculations)
# - Common use cases:
#   * Entropy: -sum(p * log2(p))
#   * Bit requirements: log2(n) bits n values represent karne ke liye
#   * Binary search complexity: O(log₂(n))
# - Example:
#   # Information entropy
#   probs = np.array([0.5, 0.25, 0.25])
#   entropy = -np.sum(probs * np.log2(probs))  # 1.5 bits
#   
#   # Values represent karne ke liye kitne bits chahiye
#   n_classes = 256
#   bits_needed = np.log2(n_classes)  # 8 bits
# -----------------------------------------------------------------------------

print(np.log2(arr))
# Output: [0.        1.        1.5849625 2.        2.32192809 2.5849625 ]
# Calculation: [log₂(1), log₂(2), log₂(3), log₂(4), log₂(5), log₂(6)]

# =============================================================================
# FUNCTION 8: np.cbrt() - CUBE ROOT
# =============================================================================
# ENGLISH:
# - What it does: Computes cube root (∛x) of each element. Equivalent to
#   x^(1/3) but handles negative numbers correctly.
# - Syntax: np.cbrt(array)
# - Formula: cbrt(x) = ∛x = x^(1/3)
# - Advantage over power: Works with negative numbers
#   * np.power(-8, 1/3) → complex number
#   * np.cbrt(-8) → -2 (correct!)
# - Common use cases:
#   * Volume calculations
#   * Statistical transformations
#   * Feature engineering
# - Example:
#   # Cube roots
#   cubes = np.array([1, 8, 27, 64, 125])
#   roots = np.cbrt(cubes)  # [1, 2, 3, 4, 5]
#   
#   # Handles negatives
#   np.cbrt(-8)  # -2.0 ✓
#   np.power(-8, 1/3)  # NaN or complex ✗
#
# HINGLISH:
# - Yeh kya karta hai: Har element ka cube root (∛x) compute karta hai.
#   x^(1/3) ke equivalent hai lekin negative numbers sahi handle karta hai.
# - Syntax: np.cbrt(array)
# - Formula: cbrt(x) = ∛x = x^(1/3)
# - Power se advantage: Negative numbers ke saath kaam karta hai
#   * np.power(-8, 1/3) → complex number
#   * np.cbrt(-8) → -2 (sahi!)
# - Common use cases:
#   * Volume calculations
#   * Statistical transformations
#   * Feature engineering
# - Example:
#   # Cube roots
#   cubes = np.array([1, 8, 27, 64, 125])
#   roots = np.cbrt(cubes)  # [1, 2, 3, 4, 5]
#   
#   # Negatives handle karta hai
#   np.cbrt(-8)  # -2.0 ✓
#   np.power(-8, 1/3)  # NaN ya complex ✗
# -----------------------------------------------------------------------------

print(np.cbrt(arr))
# Output: [1.         1.25992105 1.44224957 1.58740105 1.70997595 1.81712059]
# Calculation: [∛1, ∛2, ∛3, ∛4, ∛5, ∛6]

# =============================================================================
# FUNCTION 9: np.abs() / np.absolute() - ABSOLUTE VALUE
# =============================================================================
# ENGLISH:
# - What it does: Computes absolute value (magnitude) of each element.
#   Removes negative sign.
# - Syntax: np.abs(array) or np.absolute(array)
# - Formula: abs(x) = |x|
# - Why use it: Essential for:
#   * L1 regularization: sum(|weights|)
#   * Mean Absolute Error: mean(|y_pred - y_true|)
#   * Distance calculations
#   * Removing sign information
# - Works with complex numbers: returns magnitude
# - Common use cases:
#   * MAE loss: np.mean(np.abs(predictions - targets))
#   * L1 norm: np.sum(np.abs(vector))
#   * Lasso regression: alpha * np.sum(np.abs(weights))
#   * Gradient clipping by value: np.clip(grads, -max_val, max_val)
# - Example:
#   # Mean Absolute Error
#   y_true = np.array([3, 5, 2, 7])
#   y_pred = np.array([2.5, 5.5, 2, 6.8])
#   mae = np.mean(np.abs(y_pred - y_true))  # 0.3
#   
#   # L1 regularization
#   weights = np.array([0.5, -0.3, 0.8, -0.2])
#   l1_penalty = 0.01 * np.sum(np.abs(weights))  # 0.018
#   
#   # Handling errors (always positive)
#   errors = np.array([0.5, -0.3, 0.2, -0.1])
#   absolute_errors = np.abs(errors)  # [0.5, 0.3, 0.2, 0.1]
#
# HINGLISH:
# - Yeh kya karta hai: Har element ka absolute value (magnitude) compute
#   karta hai. Negative sign hata deta hai.
# - Syntax: np.abs(array) ya np.absolute(array)
# - Formula: abs(x) = |x|
# - Kab use karein: Zaroori hai:
#   * L1 regularization: sum(|weights|)
#   * Mean Absolute Error: mean(|y_pred - y_true|)
#   * Distance calculations
#   * Sign information remove karna
# - Complex numbers ke saath kaam karta hai: magnitude return karta hai
# - Common use cases:
#   * MAE loss: np.mean(np.abs(predictions - targets))
#   * L1 norm: np.sum(np.abs(vector))
#   * Lasso regression: alpha * np.sum(np.abs(weights))
#   * Gradient clipping by value: np.clip(grads, -max_val, max_val)
# - Example:
#   # Mean Absolute Error
#   y_true = np.array([3, 5, 2, 7])
#   y_pred = np.array([2.5, 5.5, 2, 6.8])
#   mae = np.mean(np.abs(y_pred - y_true))  # 0.3
#   
#   # L1 regularization
#   weights = np.array([0.5, -0.3, 0.8, -0.2])
#   l1_penalty = 0.01 * np.sum(np.abs(weights))  # 0.018
#   
#   # Errors handle karna (hamesha positive)
#   errors = np.array([0.5, -0.3, 0.2, -0.1])
#   absolute_errors = np.abs(errors)  # [0.5, 0.3, 0.2, 0.1]
# -----------------------------------------------------------------------------

negative_arr = np.array([-1, -2, 3, -4, 5, -6])
print(np.abs(negative_arr))
# Output: [1 2 3 4 5 6]
# All negative values become positive

# =============================================================================
# FUNCTION 10: np.sign() - SIGN FUNCTION
# =============================================================================
# ENGLISH:
# - What it does: Returns the sign of each element: -1, 0, or +1
# - Syntax: np.sign(array)
# - Formula: sign(x) = -1 if x<0, 0 if x=0, +1 if x>0
# - Why use it: Useful for:
#   * Direction information (ignoring magnitude)
#   * Binary encoding
#   * Gradient sign (sign descent)
#   * Activation functions (sign activation)
# - Common use cases:
#   * Sign gradient descent: weights -= lr * np.sign(gradients)
#   * Binary classification helper
#   * Direction vectors
# - Example:
#   mixed = np.array([-5, -2, 0, 3, 7])
#   signs = np.sign(mixed)  # [-1, -1, 0, 1, 1]
#   
#   # Sign gradient descent (robust to outliers)
#   gradients = np.array([0.1, -5.0, 0.3, -0.05])
#   weight_update = 0.01 * np.sign(gradients)
#   # [0.01, -0.01, 0.01, -0.01] - all same magnitude!
#
# HINGLISH:
# - Yeh kya karta hai: Har element ka sign return karta hai: -1, 0, ya +1
# - Syntax: np.sign(array)
# - Formula: sign(x) = -1 agar x<0, 0 agar x=0, +1 agar x>0
# - Kab use karein: Useful hai:
#   * Direction information (magnitude ignore karke)
#   * Binary encoding
#   * Gradient sign (sign descent)
#   * Activation functions (sign activation)
# - Common use cases:
#   * Sign gradient descent: weights -= lr * np.sign(gradients)
#   * Binary classification helper
#   * Direction vectors
# - Example:
#   mixed = np.array([-5, -2, 0, 3, 7])
#   signs = np.sign(mixed)  # [-1, -1, 0, 1, 1]
#   
#   # Sign gradient descent (outliers ke against robust)
#   gradients = np.array([0.1, -5.0, 0.3, -0.05])
#   weight_update = 0.01 * np.sign(gradients)
#   # [0.01, -0.01, 0.01, -0.01] - sab same magnitude!
# -----------------------------------------------------------------------------

print(np.sign(negative_arr))
# Output: [-1 -1  1 -1  1 -1]
# Negative → -1, Positive → +1, Zero → 0

# =============================================================================
# BONUS: ADDITIONAL USEFUL MATHEMATICAL FUNCTIONS
# =============================================================================

# FUNCTION 11: np.ceil() - CEILING (ROUND UP)
print("np.ceil():", np.ceil(np.array([1.2, 2.5, 3.8, 4.1])))
# Output: [2. 3. 4. 5.]
# Always rounds UP to nearest integer

# FUNCTION 12: np.floor() - FLOOR (ROUND DOWN)
print("np.floor():", np.floor(np.array([1.2, 2.5, 3.8, 4.1])))
# Output: [1. 2. 3. 4.]
# Always rounds DOWN to nearest integer

# FUNCTION 13: np.round() - ROUNDING
print("np.round():", np.round(np.array([1.2, 2.5, 3.8, 4.1])))
# Output: [1. 2. 4. 4.]
# Rounds to nearest integer (0.5 rounds to nearest even)

# FUNCTION 14: np.clip() - CLIPPING VALUES
print("np.clip():", np.clip(arr, 2, 5))
# Output: [2 2 3 4 5 5]
# Clips values to [min, max] range
# Values < 2 become 2, values > 5 become 5

# FUNCTION 15: np.reciprocal() - RECIPROCAL (1/x)
print("np.reciprocal():", np.reciprocal(arr.astype(float)))
# Output: [1.  0.5 0.33333333 0.25 0.2 0.16666667]
# Computes 1/x for each element

# =============================================================================
# PRACTICAL ML EXAMPLE: COMPLETE ACTIVATION FUNCTION LIBRARY
# =============================================================================
# ENGLISH:
# Real-world scenario: Implementing common neural network activation functions
#
# def sigmoid(x):
#     """Sigmoid: σ(x) = 1 / (1 + e^(-x))"""
#     return 1 / (1 + np.exp(-x))
#
# def tanh(x):
#     """Hyperbolic tangent (NumPy has np.tanh built-in)"""
#     return np.tanh(x)
#
# def relu(x):
#     """ReLU: max(0, x)"""
#     return np.maximum(0, x)
#
# def leaky_relu(x, alpha=0.01):
#     """Leaky ReLU: max(alpha*x, x)"""
#     return np.where(x > 0, x, alpha * x)
#
# def softmax(x):
#     """Softmax with numerical stability"""
#     exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
#     return exp_x / np.sum(exp_x, axis=-1, keepdims=True)
#
# def swish(x):
#     """Swish: x * sigmoid(x)"""
#     return x * sigmoid(x)
#
# # Test
# x = np.array([-2, -1, 0, 1, 2])
# print("Sigmoid:", sigmoid(x))
# print("ReLU:", relu(x))
# print("Softmax:", softmax(x))
#
# HINGLISH:
# Real-world scenario: Neural network activation functions implement karna
#
# (Same code as English)
# =============================================================================

[ 1  4  9 16 25 36]
[1.         1.41421356 1.73205081 2.         2.23606798 2.44948974]
[ 1  4  9 16 25 36]
[  1   8  27  64 125 216]
[  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591
 403.42879349]
[0.         0.69314718 1.09861229 1.38629436 1.60943791 1.79175947]
[0.         0.30103    0.47712125 0.60205999 0.69897    0.77815125]
[0.         1.         1.5849625  2.         2.32192809 2.5849625 ]
[1.         1.25992105 1.44224957 1.58740105 1.70997595 1.81712059]
[1 2 3 4 5 6]
[-1 -1  1 -1  1 -1]
np.ceil(): [2. 3. 4. 5.]
np.floor(): [1. 2. 3. 4.]
np.round(): [1. 2. 4. 4.]
np.clip(): [2 2 3 4 5 5]
np.reciprocal(): [1.         0.5        0.33333333 0.25       0.2        0.16666667]


In [31]:
# =============================================================================
# NUMPY LOGARITHMIC AND EXPONENTIAL FUNCTIONS - DEEP DIVE
# =============================================================================

# =============================================================================
# WHY LOGARITHMS AND EXPONENTIALS ARE CRITICAL IN ML
# =============================================================================
# ENGLISH:
# Logarithms (log) and exponentials (exp) are INVERSE operations and form
# the mathematical foundation of modern machine learning:
#
# KEY RELATIONSHIPS:
# - exp(log(x)) = x  (they cancel each other out)
# - log(exp(x)) = x  (inverse relationship)
# - log(a × b) = log(a) + log(b)  (converts multiplication to addition)
# - log(a / b) = log(a) - log(b)  (converts division to subtraction)
# - log(a^n) = n × log(a)  (brings exponents down)
#
# WHY THEY MATTER IN ML:
# 1. Numerical Stability: Working in log space prevents overflow/underflow
# 2. Loss Functions: Cross-entropy uses log for stable gradient computation
# 3. Probability: Converting products to sums (log-likelihood)
# 4. Activation Functions: Sigmoid and softmax use exp
# 5. Optimization: Many algorithms work better in log space
#
# HINGLISH:
# Logarithms (log) aur exponentials (exp) INVERSE operations hain aur modern
# machine learning ki mathematical foundation banate hain:
#
# KEY RELATIONSHIPS:
# - exp(log(x)) = x  (ek doosre ko cancel kar dete hain)
# - log(exp(x)) = x  (inverse relationship)
# - log(a × b) = log(a) + log(b)  (multiplication ko addition mein convert)
# - log(a / b) = log(a) - log(b)  (division ko subtraction mein convert)
# - log(a^n) = n × log(a)  (exponents ko neeche laata hai)
#
# ML MEIN KYUN IMPORTANT HAIN:
# 1. Numerical Stability: Log space mein kaam karne se overflow/underflow nahi hota
# 2. Loss Functions: Cross-entropy stable gradient computation ke liye log use karta hai
# 3. Probability: Products ko sums mein convert karna (log-likelihood)
# 4. Activation Functions: Sigmoid aur softmax exp use karte hain
# 5. Optimization: Bahut saare algorithms log space mein better kaam karte hain
# -----------------------------------------------------------------------------

arr = np.array([1, 2, 3, 4, 5, 6])

# =============================================================================
# FUNCTION: np.log() - NATURAL LOGARITHM (DETAILED)
# =============================================================================
# ENGLISH:
# - What it does: Computes the natural logarithm (base e, where e ≈ 2.71828)
#   of each element. This is the INVERSE of np.exp()
# - Syntax: np.log(array)
# - Mathematical notation: ln(x) or log_e(x)
# - Formula: If y = e^x, then x = log(y)
# - Domain: Only defined for POSITIVE numbers (x > 0)
#   * log(0) = -∞ (negative infinity)
#   * log(negative) = NaN (not a number)
#   * log(1) = 0 (important property!)
#   * log(e) = 1 (by definition)
# - Why use it: EXTREMELY important in ML for:
#   * Cross-entropy loss (classification)
#   * Log-likelihood (probabilistic models)
#   * Numerical stability (log-space computations)
#   * Information theory (entropy, KL divergence)
#   * Feature transformations (reducing skewness)
#   * Converting multiplications to additions
# - Key properties:
#   * Monotonic increasing: if x₁ < x₂, then log(x₁) < log(x₂)
#   * Compresses large values: log(1000) = 6.9, log(1000000) = 13.8
#   * Stretches small values: makes differences more visible
#   * Converts exponential growth to linear
# - Common use cases:
#   * BINARY CROSS-ENTROPY LOSS:
#     loss = -[y*log(p) + (1-y)*log(1-p)]
#     Used in binary classification (logistic regression)
#   
#   * CATEGORICAL CROSS-ENTROPY:
#     loss = -sum(y_true * log(y_pred))
#     Used in multi-class classification
#   
#   * LOG-LIKELIHOOD (Probabilistic Models):
#     log_likelihood = sum(log(P(data|model)))
#     Instead of product which can underflow
#   
#   * LOG TRANSFORMATION (Skewed Features):
#     log_income = np.log(income + 1)  # +1 to handle zeros
#     Reduces right skewness in data
#   
#   * KL DIVERGENCE (Distribution Comparison):
#     kl_div = sum(P * log(P/Q))
#     Measures difference between distributions
#   
#   * ENTROPY (Information Content):
#     entropy = -sum(p * log(p))
#     Measures uncertainty in distribution
# - Numerical stability considerations:
#   * Always add small epsilon to avoid log(0):
#     np.log(probs + 1e-15)
#   * For very small probabilities, use log-space arithmetic
#   * NumPy has np.log1p(x) = log(1+x) for better precision near 0
# - Example:
#   # Binary Cross-Entropy Loss (detailed)
#   y_true = np.array([1, 0, 1, 1, 0])  # Actual labels
#   y_pred = np.array([0.9, 0.1, 0.8, 0.7, 0.2])  # Predicted probabilities
#   
#   # Avoid log(0) by clipping
#   epsilon = 1e-15
#   y_pred_safe = np.clip(y_pred, epsilon, 1 - epsilon)
#   
#   # Binary cross-entropy formula
#   bce = -np.mean(
#       y_true * np.log(y_pred_safe) + 
#       (1 - y_true) * np.log(1 - y_pred_safe)
#   )
#   # Result: ~0.163
#   
#   # Why it works:
#   # - When y=1: loss = -log(p) → penalizes low predictions
#   # - When y=0: loss = -log(1-p) → penalizes high predictions
#   # - Log makes gradient smooth and well-behaved
#   
#   # Categorical Cross-Entropy (multi-class)
#   y_true_onehot = np.array([[1, 0, 0],  # Class 0
#                             [0, 1, 0],  # Class 1
#                             [0, 0, 1]]) # Class 2
#   y_pred_probs = np.array([[0.7, 0.2, 0.1],
#                            [0.1, 0.8, 0.1],
#                            [0.2, 0.2, 0.6]])
#   
#   cce = -np.mean(np.sum(y_true_onehot * np.log(y_pred_probs + 1e-15), axis=1))
#   # Result: ~0.356
#   
#   # Log-likelihood for Naive Bayes
#   # Instead of: P(data) = P(x₁) × P(x₂) × P(x₃) × ... (underflow risk!)
#   # Use: log P(data) = log P(x₁) + log P(x₂) + log P(x₃) + ... (stable!)
#   probabilities = np.array([0.8, 0.7, 0.9, 0.6, 0.85])
#   
#   # Bad way (can underflow):
#   joint_prob = np.prod(probabilities)  # 0.2286
#   
#   # Good way (numerically stable):
#   log_joint_prob = np.sum(np.log(probabilities))  # -1.476
#   # Convert back if needed:
#   joint_prob_from_log = np.exp(log_joint_prob)  # 0.2286 ✓
#   
#   # Feature transformation (reduce skewness)
#   income = np.array([30000, 50000, 100000, 500000, 1000000, 5000000])
#   # Income is heavily right-skewed
#   
#   log_income = np.log(income)
#   # [10.31, 10.82, 11.51, 13.12, 13.82, 15.42]
#   # Much more evenly distributed!
#   
#   # Entropy calculation (information theory)
#   probs = np.array([0.5, 0.3, 0.2])
#   entropy = -np.sum(probs * np.log(probs))
#   # Result: 1.029 nats (natural units)
#   # High entropy = high uncertainty
#
# HINGLISH:
# - Yeh kya karta hai: Har element ka natural logarithm (base e, jahan e ≈ 2.71828)
#   compute karta hai. Yeh np.exp() ka INVERSE hai
# - Syntax: np.log(array)
# - Mathematical notation: ln(x) ya log_e(x)
# - Formula: Agar y = e^x, to x = log(y)
# - Domain: Sirf POSITIVE numbers ke liye defined (x > 0)
#   * log(0) = -∞ (negative infinity)
#   * log(negative) = NaN (not a number)
#   * log(1) = 0 (important property!)
#   * log(e) = 1 (definition se)
# - Kab use karein: ML mein BAHUT important:
#   * Cross-entropy loss (classification)
#   * Log-likelihood (probabilistic models)
#   * Numerical stability (log-space computations)
#   * Information theory (entropy, KL divergence)
#   * Feature transformations (skewness kam karna)
#   * Multiplications ko additions mein convert karna
# - Key properties:
#   * Monotonic increasing: agar x₁ < x₂, to log(x₁) < log(x₂)
#   * Badi values ko compress karta: log(1000) = 6.9, log(1000000) = 13.8
#   * Chhoti values ko stretch karta: differences zyada visible
#   * Exponential growth ko linear mein convert
# - Common use cases:
#   * BINARY CROSS-ENTROPY LOSS:
#     loss = -[y*log(p) + (1-y)*log(1-p)]
#     Binary classification mein use (logistic regression)
#   
#   * CATEGORICAL CROSS-ENTROPY:
#     loss = -sum(y_true * log(y_pred))
#     Multi-class classification mein use
#   
#   * LOG-LIKELIHOOD (Probabilistic Models):
#     log_likelihood = sum(log(P(data|model)))
#     Product ki jagah jo underflow kar sakta hai
#   
#   * LOG TRANSFORMATION (Skewed Features):
#     log_income = np.log(income + 1)  # zeros handle karne ke liye +1
#     Data mein right skewness kam karta hai
#   
#   * KL DIVERGENCE (Distribution Comparison):
#     kl_div = sum(P * log(P/Q))
#     Distributions ke beech difference measure karta hai
#   
#   * ENTROPY (Information Content):
#     entropy = -sum(p * log(p))
#     Distribution mein uncertainty measure karta hai
# - Numerical stability considerations:
#   * Hamesha log(0) se bachne ke liye chhota epsilon add karo:
#     np.log(probs + 1e-15)
#   * Bahut chhoti probabilities ke liye log-space arithmetic use karo
#   * NumPy mein np.log1p(x) = log(1+x) hai jo 0 ke paas better precision deta hai
# - Examples upar English section mein detail se diye gaye hain
# -----------------------------------------------------------------------------

print(np.log(arr))
# Output: [0.         0.69314718 1.09861229 1.38629436 1.60943791 1.79175947]
#
# Detailed calculation:
# ln(1) = 0         (because e^0 = 1)
# ln(2) ≈ 0.693     (because e^0.693 ≈ 2)
# ln(3) ≈ 1.099     (because e^1.099 ≈ 3)
# ln(4) ≈ 1.386     (because e^1.386 ≈ 4)
# ln(5) ≈ 1.609     (because e^1.609 ≈ 5)
# ln(6) ≈ 1.792     (because e^1.792 ≈ 6)
#
# Notice: Values grow slowly - log compresses large numbers

# =============================================================================
# FUNCTION: np.exp() - EXPONENTIAL FUNCTION (DETAILED)
# =============================================================================
# ENGLISH:
# - What it does: Computes e^x (Euler's number raised to power x) for each
#   element. This is the INVERSE of np.log()
# - Syntax: np.exp(array)
# - Mathematical constant: e ≈ 2.718281828459045...
# - Formula: If x = log(y), then y = exp(x)
# - Domain: Defined for ALL real numbers (-∞ to +∞)
# - Range: Always POSITIVE (0 to +∞), never negative or zero
# - Why e is special:
#   * Derivative of e^x is e^x (only function with this property!)
#   * Natural base for continuous growth/decay
#   * Appears everywhere in calculus and probability
# - Why use it: ABSOLUTELY CRITICAL in ML for:
#   * SIGMOID ACTIVATION: σ(x) = 1/(1 + e^(-x))
#     Used in binary classification, gates in LSTM/GRU
#   
#   * SOFTMAX ACTIVATION: softmax(x) = e^x / sum(e^x)
#     Used in multi-class classification output layer
#   
#   * PROBABILITY DISTRIBUTIONS:
#     - Gaussian/Normal: (1/√(2πσ²)) × e^(-(x-μ)²/(2σ²))
#     - Exponential: λe^(-λx)
#     - Poisson: (λ^k × e^(-λ)) / k!
#   
#   * LOG-SPACE TO NORMAL-SPACE:
#     Converting log probabilities back to probabilities
#   
#   * GRADIENT COMPUTATIONS:
#     Derivatives in backpropagation involve exp
#   
#   * GROWTH/DECAY MODELS:
#     Exponential learning rate decay, population growth
# - Warning - NUMERICAL ISSUES:
#   * OVERFLOW: e^100 ≈ 2.7 × 10^43 (huge!)
#     e^1000 causes overflow (returns inf)
#   
#   * Solution: For softmax, subtract max before exp:
#     exp(x - max(x)) / sum(exp(x - max(x)))
#   
#   * Never compute exp of very large numbers directly
# - Common use cases:
#   # SIGMOID ACTIVATION FUNCTION (detailed)
#   def sigmoid(x):
#       """
#       Sigmoid: Maps any input to (0, 1) range
#       σ(x) = 1 / (1 + e^(-x))
#       
#       Properties:
#       - Output range: (0, 1) - perfect for probabilities
#       - σ(0) = 0.5 (midpoint)
#       - σ(large positive) → 1
#       - σ(large negative) → 0
#       - Derivative: σ'(x) = σ(x) × (1 - σ(x))
#       """
#       return 1 / (1 + np.exp(-x))
#   
#   z = np.array([-5, -2, 0, 2, 5])
#   probs = sigmoid(z)
#   # [0.007, 0.119, 0.5, 0.881, 0.993]
#   
#   # Use in logistic regression:
#   # 1. Linear combination: z = w·x + b
#   # 2. Sigmoid: p = sigmoid(z)
#   # 3. Prediction: class = 1 if p > 0.5 else 0
#   
#   # SOFTMAX ACTIVATION (with numerical stability)
#   def softmax(x):
#       """
#       Softmax: Converts logits to probability distribution
#       
#       Properties:
#       - Output sums to 1.0
#       - All outputs in (0, 1)
#       - Differentiable everywhere
#       - Used in multi-class classification
#       """
#       # Numerical stability: subtract max
#       exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
#       return exp_x / np.sum(exp_x, axis=-1, keepdims=True)
#   
#   # Example: 3-class classification
#   logits = np.array([[2.0, 1.0, 0.1],    # Sample 1
#                      [0.5, 2.5, 1.0],    # Sample 2
#                      [3.0, 0.5, 2.0]])   # Sample 3
#   
#   probs = softmax(logits)
#   # [[0.659, 0.242, 0.099],  # Class 0 most likely
#   #  [0.131, 0.717, 0.152],  # Class 1 most likely
#   #  [0.705, 0.058, 0.237]]  # Class 0 most likely
#   
#   # Verify: sums to 1
#   np.sum(probs, axis=1)  # [1.0, 1.0, 1.0] ✓
#   
#   # Converting log probabilities to probabilities
#   log_probs = np.array([-0.5, -1.0, -1.5, -2.0])
#   # These might come from log-space computations
#   
#   probs = np.exp(log_probs)
#   # [0.607, 0.368, 0.223, 0.135]
#   
#   # Exponential decay (learning rate scheduling)
#   def exponential_decay(initial_lr, decay_rate, step):
#       """
#       Learning rate decay: lr = lr₀ × e^(-decay_rate × step)
#       Smooth continuous decay
#       """
#       return initial_lr * np.exp(-decay_rate * step)
#   
#   initial_lr = 0.1
#   decay_rate = 0.05
#   steps = np.arange(0, 100, 10)
#   lr_schedule = exponential_decay(initial_lr, decay_rate, steps)
#   # [0.1, 0.061, 0.037, 0.022, 0.014, 0.008, ...]
#   
#   # Gaussian (Normal) distribution
#   def gaussian(x, mu=0, sigma=1):
#       """Normal distribution PDF"""
#       return (1 / np.sqrt(2 * np.pi * sigma**2)) * \
#              np.exp(-(x - mu)**2 / (2 * sigma**2))
#   
#   x = np.linspace(-3, 3, 100)
#   pdf = gaussian(x, mu=0, sigma=1)
#   # Bell curve centered at 0
#
# HINGLISH:
# - Yeh kya karta hai: Har element ke liye e^x compute karta hai (Euler's
#   number ko power x tak raise). Yeh np.log() ka INVERSE hai
# - Syntax: np.exp(array)
# - Mathematical constant: e ≈ 2.718281828459045...
# - Formula: Agar x = log(y), to y = exp(x)
# - Domain: SAARE real numbers ke liye defined (-∞ se +∞ tak)
# - Range: Hamesha POSITIVE (0 se +∞ tak), kabhi negative ya zero nahi
# - e kyun special hai:
#   * e^x ka derivative e^x hai (yeh property sirf isi function mein!)
#   * Continuous growth/decay ke liye natural base
#   * Calculus aur probability mein har jagah aata hai
# - Kab use karein: ML mein BILKUL CRITICAL:
#   * SIGMOID ACTIVATION: σ(x) = 1/(1 + e^(-x))
#     Binary classification, LSTM/GRU mein gates
#   
#   * SOFTMAX ACTIVATION: softmax(x) = e^x / sum(e^x)
#     Multi-class classification output layer
#   
#   * PROBABILITY DISTRIBUTIONS:
#     - Gaussian/Normal: (1/√(2πσ²)) × e^(-(x-μ)²/(2σ²))
#     - Exponential: λe^(-λx)
#     - Poisson: (λ^k × e^(-λ)) / k!
#   
#   * LOG-SPACE SE NORMAL-SPACE:
#     Log probabilities ko wapas probabilities mein convert karna
#   
#   * GRADIENT COMPUTATIONS:
#     Backpropagation mein derivatives exp involve karte hain
#   
#   * GROWTH/DECAY MODELS:
#     Exponential learning rate decay, population growth
# - Warning - NUMERICAL ISSUES:
#   * OVERFLOW: e^100 ≈ 2.7 × 10^43 (bahut bada!)
#     e^1000 overflow karta hai (inf return)
#   
#   * Solution: Softmax ke liye, exp se pehle max subtract:
#     exp(x - max(x)) / sum(exp(x - max(x)))
#   
#   * Kabhi bahut bade numbers ka directly exp compute mat karo
# - Common use cases ke examples upar English section mein detail se hain
# -----------------------------------------------------------------------------

print(np.exp(arr))
# Output: [  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591  403.42879349]
#
# Detailed calculation:
# e^1 ≈ 2.718      (definition of e)
# e^2 ≈ 7.389      (e × e)
# e^3 ≈ 20.086     (e × e × e)
# e^4 ≈ 54.598
# e^5 ≈ 148.413
# e^6 ≈ 403.429
#
# Notice: Values grow VERY FAST - exponential growth!

# =============================================================================
# VERIFYING INVERSE RELATIONSHIP
# =============================================================================
# ENGLISH:
# Demonstrating that log and exp are inverse operations:
#
# Property 1: exp(log(x)) = x
original = np.array([2.0, 5.0, 10.0, 100.0])
log_values = np.log(original)
back_to_original = np.exp(log_values)
print("exp(log(x)) =", back_to_original)
# Output: [  2.   5.  10. 100.]  ✓ (same as original)

# Property 2: log(exp(x)) = x
original2 = np.array([1.0, 2.0, 3.0, 4.0])
exp_values = np.exp(original2)
back_to_original2 = np.log(exp_values)
print("log(exp(x)) =", back_to_original2)
# Output: [1. 2. 3. 4.]  ✓ (same as original)
#
# HINGLISH:
# Demonstrate kar rahe hain ki log aur exp inverse operations hain:
# (Same code as above)
# -----------------------------------------------------------------------------

# =============================================================================
# PRACTICAL ML EXAMPLE: COMPLETE LOSS FUNCTION IMPLEMENTATIONS
# =============================================================================
# ENGLISH:
# Real-world scenario: Implementing common ML loss functions

# 1. BINARY CROSS-ENTROPY LOSS (Logistic Regression)
def binary_crossentropy(y_true, y_pred, epsilon=1e-15):
    """
    Binary classification loss
    
    Args:
        y_true: True labels (0 or 1)
        y_pred: Predicted probabilities (0 to 1)
        epsilon: Small value to avoid log(0)
    
    Returns:
        Average loss across samples
    """
    # Clip predictions to avoid log(0) and log(1)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    
    # BCE formula: -[y*log(p) + (1-y)*log(1-p)]
    loss = -np.mean(
        y_true * np.log(y_pred) + 
        (1 - y_true) * np.log(1 - y_pred)
    )
    return loss

# Test
y_true_binary = np.array([1, 0, 1, 1, 0])
y_pred_binary = np.array([0.9, 0.1, 0.8, 0.7, 0.2])
bce_loss = binary_crossentropy(y_true_binary, y_pred_binary)
print(f"Binary Cross-Entropy Loss: {bce_loss:.4f}")
# Output: ~0.1630

# 2. CATEGORICAL CROSS-ENTROPY (Multi-class Classification)
def categorical_crossentropy(y_true, y_pred, epsilon=1e-15):
    """
    Multi-class classification loss
    
    Args:
        y_true: True labels (one-hot encoded)
        y_pred: Predicted probabilities (softmax output)
        epsilon: Small value to avoid log(0)
    
    Returns:
        Average loss across samples
    """
    # Clip predictions
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    
    # CCE formula: -sum(y_true * log(y_pred)) per sample
    loss = -np.mean(np.sum(y_true * np.log(y_pred), axis=1))
    return loss

# Test
y_true_cat = np.array([[1, 0, 0],   # Class 0
                       [0, 1, 0],   # Class 1
                       [0, 0, 1]])  # Class 2
y_pred_cat = np.array([[0.7, 0.2, 0.1],
                       [0.1, 0.8, 0.1],
                       [0.2, 0.2, 0.6]])
cce_loss = categorical_crossentropy(y_true_cat, y_pred_cat)
print(f"Categorical Cross-Entropy Loss: {cce_loss:.4f}")
# Output: ~0.3567

# 3. SOFTMAX FUNCTION (with numerical stability)
def softmax_stable(x):
    """
    Numerically stable softmax
    Subtracts max to prevent overflow
    """
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

# Test
logits = np.array([[2.0, 1.0, 0.1],
                   [100.0, 99.0, 98.0]])  # Large values!
probs = softmax_stable(logits)
print("Softmax (stable):")
print(probs)
print("Sums:", np.sum(probs, axis=1))  # Should be [1.0, 1.0]

# 4. SIGMOID FUNCTION
def sigmoid(x):
    """Sigmoid activation with overflow protection"""
    # For very negative values, exp(-x) overflows
    # Use np.exp(x) / (1 + np.exp(x)) for positive x
    return np.where(
        x >= 0,
        1 / (1 + np.exp(-x)),
        np.exp(x) / (1 + np.exp(x))
    )

# Test
z_values = np.array([-10, -2, 0, 2, 10])
sig_output = sigmoid(z_values)
print("Sigmoid:", sig_output)
# Output: [0.0000454, 0.119, 0.5, 0.881, 0.99995]

#
# HINGLISH:
# Real-world scenario: Common ML loss functions implement karna
# (Same code as English - code is universal!)
# =============================================================================

# =============================================================================
# ADDITIONAL USEFUL VARIATIONS
# =============================================================================

# np.log1p(x) - Computes log(1 + x) with better precision for small x
small_values = np.array([0.001, 0.01, 0.1])
print("log1p (better for small x):", np.log1p(small_values))
# More accurate than np.log(1 + small_values) for tiny values

# np.expm1(x) - Computes exp(x) - 1 with better precision for small x
print("expm1 (better for small x):", np.expm1(small_values))
# Inverse of log1p: expm1(log1p(x)) = x

# np.logaddexp(x1, x2) - Computes log(exp(x1) + exp(x2)) stably
# Useful for combining log probabilities
log_p1 = np.array([-2.0, -1.0])
log_p2 = np.array([-1.5, -0.5])
log_sum = np.logaddexp(log_p1, log_p2)
print("Log add exp:", log_sum)
# Equivalent to: np.log(np.exp(log_p1) + np.exp(log_p2))
# But numerically stable!

[0.         0.69314718 1.09861229 1.38629436 1.60943791 1.79175947]
[  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591
 403.42879349]
exp(log(x)) = [  2.   5.  10. 100.]
log(exp(x)) = [1. 2. 3. 4.]
Binary Cross-Entropy Loss: 0.2027
Categorical Cross-Entropy Loss: 0.3635
Softmax (stable):
[[0.65900114 0.24243297 0.09856589]
 [0.66524096 0.24472847 0.09003057]]
Sums: [1. 1.]
Sigmoid: [4.53978687e-05 1.19202922e-01 5.00000000e-01 8.80797078e-01
 9.99954602e-01]
log1p (better for small x): [0.0009995  0.00995033 0.09531018]
expm1 (better for small x): [0.0010005  0.01005017 0.10517092]
Log add exp: [-1.02592302 -0.02592302]


In [33]:
#rounding
print(np.round(3.13))
print(np.floor(3.44))
print(np.trunc(3.22))

arr = np.array([1,2,3,4,5,6,6,-2])
print(np.unique(arr))
print(np.sort(arr))
print(np.abs(arr))


3.0
3.0
3.0
[-2  1  2  3  4  5  6]
[-2  1  2  3  4  5  6  6]
[1 2 3 4 5 6 6 2]
