In [2]:
# Understanding Numpy

import numpy as np
import matplotlib.pyplot as plt
import time

In [3]:
# Check Numpy version
print(f"Numpy version: {np.__version__}")

Numpy version: 2.3.2


In [9]:
# Display setting for clearer output
np.set_printoptions(precision=3, suppress=True)

Creating Numpy Arrays

In [7]:
# Creating arrays from Python lists
# 1D array: A simple sequence of numbers

arr1d = np.array([1, 2, 3, 4, 5])

# 2D array: Think of this as a matrix or table with rows and columns
arr2d = np.array([[1, 2, 3], [4, 5, 6]])

# 3D array: Like a steak of 2D arrays - useful for images, time series, etc.
arr3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

print("1D array:", arr1d)
print("2D array:\n", arr2d)
print("3D array:\n", arr3d)

1D array: [1 2 3 4 5]
2D array:
 [[1 2 3]
 [4 5 6]]
3D array:
 [[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


Creating Special Arrays in Numpy

In [11]:
# Creating arrays filled with zeros - useful for initializing arrays
# Shape (3, 4) means 3 rows and 4 columns
zeros = np.zeros((3, 4))

# Creating arrays filled with ones - often used as starting points
ones = np.ones((2, 3, 4))      # 3D array: 2 layers, 3 rows, 4 columns

# Empty array - faster than zeros/ones but contains random values
# Use when you'll immediately fill the array with real data
empty = np.empty((2, 2))

print("Zeros array (3x4):\n", zeros)
print("Ones array shape:", ones.shape)
print("Empty array (contains random values):\n", empty)

Zeros array (3x4):
 [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Ones array shape: (2, 3, 4)
Empty array (contains random values):
 [[0. 0.]
 [0. 0.]]


NOTE: zeros() and ones() are memory-efficient ways to create arrays of specific sizes. empty() is fastest but contains garbage values, so only use it when you'll immediately overwrite the cotents.

In [12]:
# Range arrays - like Python's range() but more powerful
range_arr = np.arange(0, 10, 2)  # Start at 0, end before 10, step by 2: [0, 2, 4, 6, 8]
print("Range array:", range_arr)

# Linearly spaced arrays - divide a range into equal parts
# From 0 to 1 with exactly 5 points (including endpoints)
linspace_arr = np.linspace(0, 1, 5)
print("Linspace array:", linspace_arr)

# Logarithmically spaced arrays - useful for scientific data
# From 10^0 to 10^2 (1 to 100) with 5 points
logspace_arr = np.logspace(0, 2, 5)
print("Logspace array:", logspace_arr)

Range array: [0 2 4 6 8]
Linspace array: [0.   0.25 0.5  0.75 1.  ]
Logspace array: [  1.      3.162  10.     31.623 100.   ]


arange() works like Pyhton's range() but returns a Numpy array and works with floats.
linspace() divides a range into equal segments - useful for plotting smooth curves.
logspace() creates points that are evenly spaced on a logarithmic scale.

In [13]:
# Identity matrix - diagonal of ones, zeros elsewhere
# Essential for linear algebra operations
identity = np.eye(4)    # 4x4 identity matrix

# Diagonal matrix - put values on the diagonal
diagonal = np.diag([1, 2, 3, 4]) # Diagonal matrix with specified diagonal values

# Array filled with a specific value
full_arr = np.full((3, 3), 7)      # 3x3 array filled with 7s

print("Identity matrix:\n", identity)
print("Diagonal matrix:\n", diagonal)
print("Full array (filled with 7s):\n", full_arr)

Identity matrix:
 [[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
Diagonal matrix:
 [[1 0 0 0]
 [0 2 0 0]
 [0 0 3 0]
 [0 0 0 4]]
Full array (filled with 7s):
 [[7 7 7]
 [7 7 7]
 [7 7 7]]


Numpy Data types (dtypes)


Understanding data types is crucial for memory efficiency and numerical precision.

In [14]:
# Explicit data types - control memory usage and precision
int_arr = np.array([1, 2, 3], dtype=np.int32)    # 32-bit integers
float_arr = np.array([1, 2, 3], dtype=np.float64) # 64-bit floats (double precision)
bool_arr = np.array([True, False, True], dtype=np.bool_)     # Boolean values

# Types conversion - control dtype of existing array
converted = int_arr.astype(np.float32)   # Convert to 32-bit floats

print("Integer array:", int_arr.dtype)
print("Float array dtype:", float_arr.dtype)
print("Boolean array dtype:", bool_arr)
print("Converted array dtype:", converted.dtype)

# Memory usage comparison
print(f"int32 uses {int_arr.itemsize} bytes per element")
print(f"float64 uses {float_arr.itemsize} bytes per element")

Integer array: int32
Float array dtype: float64
Boolean array dtype: [ True False  True]
Converted array dtype: float32
int32 uses 4 bytes per element
float64 uses 8 bytes per element


Array Properties & Attributes

Understanding array properties helps you work effectively with your data and debug issue

In [15]:
# Create a sample 3D array for demonstration
# Think of this as 3 layers, each with 4 rows 5 columns
arr = np.random.randn(3, 4, 5)

# Shape: The dimensions of the array (layers, rows, columns)
print("Shape:", arr.shape)

# Size: Total number of elements (3 x 4 x 5 = 60)
print("Size:", arr.size)

# Ndim: Number of dimensions (3D in this case)
print("Ndim:", arr.ndim)

# Dtype: Data type of elements
print("Dtype:", arr.dtype)

# Itemize: Memory size of each element in bytes
print("Itemsize:", arr.itemsize)   # 8 bytes for float64

# Total memory usage in bytes
print("Memory usage:", arr.nbytes, "bytes")     # size x itemsize
print("Memory usage:", arr.nbytes / 1024, "KB")   # Convert to KB

Shape: (3, 4, 5)
Size: 60
Ndim: 3
Dtype: float64
Itemsize: 8
Memory usage: 480 bytes
Memory usage: 0.46875 KB


NOTE: These properties are essential for understanding your data's structure and memory requirements. Large datasets require careful attention to memory usage.

Array Indexing & Slicing

Basic Indexing - Accessing Individual Elements

>>> Numpy indexing is similar to Python lists but more powerful for multi-dimensional arrays

In [None]:
# 1D array indexing - similar to Python lists
arr1d = np.array([10, 20, 30, 40, 50])

print("First element:", arr1d[0])    # Index 0: 10
print("Last element:", arr1d[-1])    # Negative indexing: 50
print("Slicing [1:4]:", arr1d[1:4])   # Elements 1, 2, 3: [20, 30, 40]
print("Every 2nd element:", arr1d[::2])  # Step of 2: [10, 30, 50]