# Numpy

In [2]:
import numpy as np
import time

In [3]:
arr = np.array([1,2,3,4,5])
print (arr, type(arr))

[1 2 3 4 5] <class 'numpy.ndarray'>


In [4]:
# list vs numpy

size = 10_000_000
py_list =  list(range(size))

start = time.time()
sq=[x**2 for x in py_list]
end = time.time()
print(f"python list time = {end-start}")

np_arr = np.array(py_list)
start = time.time()
# vectorization 
squ = np_arr**2

end = time.time()

print(f"python array time = {end-start}")

python list time = 2.5700933933258057
python array time = 0.06739330291748047


In [5]:
# memory
import sys

print(f"python list size = {sys.getsizeof(py_list)*len(py_list)}")
print(f"python list size = {np_arr.nbytes}")

python list size = 800000560000000
python list size = 80000000


In [6]:
# create nparray from list

lst = (1,2,3,4,5,6,7,8)

arr = np.array(lst)
print(arr, arr.dtype, arr.shape)


arr2 = np.array([1,2,3,4,5,6,7,"hello"])
print(arr2, arr2.dtype)

arr3 = np.array([[[1,2,3,4],[1,2,3,4]],[[3,4,5,6],[0,1,2,3]],[[1,2,3,1],[0,1,2,3]]])
print(arr3, arr3.dtype,arr3.shape)


[1 2 3 4 5 6 7 8] int64 (8,)
['1' '2' '3' '4' '5' '6' '7' 'hello'] <U21
[[[1 2 3 4]
  [1 2 3 4]]

 [[3 4 5 6]
  [0 1 2 3]]

 [[1 2 3 1]
  [0 1 2 3]]] int64 (3, 2, 4)


In [7]:
arr1 = np.zeros((2,3),dtype="int")      # prefill
print(arr1)
arr2 = np.ones((9,),dtype="bool")       # prefill
print(arr2)
arr3 = np.full((2,3),100,dtype="int64") # prefill with val
print(arr3)
arr4 = np.eye((5))  #identity matrix
print(arr4)
arr5 = np.arange(0,10,2) #start,stop,step   # arange the value
print(arr5)
arr6 = np.linspace(0,10,9) #start,stop,step   # arange the value in evenly space
print(arr6)

[[0 0 0]
 [0 0 0]]
[ True  True  True  True  True  True  True  True  True]
[[100 100 100]
 [100 100 100]]
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
[0 2 4 6 8]
[ 0.    1.25  2.5   3.75  5.    6.25  7.5   8.75 10.  ]


In [8]:
# array properties

arr = np.array([[1,2,3],[1,2,3],[4,5,6]])
print(arr, arr.shape, arr.size, arr.dtype, arr.ndim)
# shape - shape, dimension = m x n
# size - total element
# dtype - data type
# ndim - dimension

[[1 2 3]
 [1 2 3]
 [4 5 6]] (3, 3) 9 int64 2


In [9]:
# type casting

float_arr = arr.astype(np.float64)
print(float_arr, float_arr.dtype)
int_arr = arr.astype(np.int32)
print(int_arr, int_arr.dtype)



[[1. 2. 3.]
 [1. 2. 3.]
 [4. 5. 6.]] float64
[[1 2 3]
 [1 2 3]
 [4 5 6]] int32


In [10]:
arr = np.array([1,2,3,4,3,"satr"])
print(arr, arr.dtype)

['1' '2' '3' '4' '3' 'satr'] <U21


### operation in np arrays

- reshape   -   2 x 3 = 3 x 2
- indexin (1D & 2D arr)
- fancy & Boolean indexing
- slicing


In [11]:
arr = np.array([
    [1,2,3],
    [1,2,3],
    [1,2,3],
    [1,2,3]
    ])
print(arr,arr.shape)
reshaped= arr.reshape((3,4))
print(reshaped)

[[1 2 3]
 [1 2 3]
 [1 2 3]
 [1 2 3]] (4, 3)
[[1 2 3 1]
 [2 3 1 2]
 [3 1 2 3]]


In [12]:
# Basic array creation from Python lists
arr_1d = np.array([1, 2, 3, 4, 5])
arr_2d = np.array([[1, 2, 3], [4, 5, 6]])
arr_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

In [13]:
print(f"1D array shape: {arr_1d.shape}")    # (5,) - 5 elements in one dimension
print(f"2D array shape: {arr_2d.shape}")    # (2, 3) - 2 rows, 3 columns
print(f"3D array shape: {arr_3d.shape}")    # (2, 2, 2) - 2 matrices of 2x2

1D array shape: (5,)
2D array shape: (2, 3)
3D array shape: (2, 2, 2)


In [14]:
print(arr_1d.ndim)
print(f"Number of dimensions: {arr_2d.ndim}") # 2 - two axes (rows, columns)
print(arr_3d.ndim)

1
Number of dimensions: 2
3


In [15]:
print(arr_1d.size)
print(f"Total elements: {arr_2d.size}")      # 6 - total elements in array
print(arr_3d.size)

5
Total elements: 6
8


In [16]:
print(f"Bytes per element: {arr_1d.itemsize}") # 8 - for int64 dtype
print(f"Bytes per element: {arr_2d.itemsize}") # 8 - for int64 dtype
print(f"Bytes per element: {arr_3d.itemsize}") # 8 - for int64 dtype


Bytes per element: 8
Bytes per element: 8
Bytes per element: 8


In [17]:
print(f"Data type: {arr_1d.dtype}")           # int64 - default integer type
print(f"Data type: {arr_2d.dtype}")           # int64 - default integer type
print(f"Data type: {arr_3d.dtype}")           # int64 - default integer type

Data type: int64
Data type: int64
Data type: int64


In [18]:
zeros = np.zeros(5)
print(zeros)
zeros = np.zeros((5,2))
print(f"\n{zeros}")
zeros = np.zeros((2, 3), dtype=int)  # Specify data type
print(f"\n{zeros}")


[0. 0. 0. 0. 0.]

[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]

[[0 0 0]
 [0 0 0]]


In [19]:
ones = np.ones(5)
print(ones)
ones = np.ones((5,2))
print(f"\n{ones}")
ones = np.ones_like(arr_3d, dtype=int)  # Specify data type
print(f"\n{ones}")

[1. 1. 1. 1. 1.]

[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]

[[[1 1]
  [1 1]]

 [[1 1]
  [1 1]]]


In [20]:
# Create empty arrays (with uninitialized values) - fastest creation

empty = np.empty(7) # Values are whatever was in memory
print(empty)
empty = np.empty((2,6))
print(f"\n{empty}")
empty = np.empty_like(arr_3d, dtype=int)  # Specify data type
print(f"\n{empty}")

[0.0000000e+000 0.0000000e+000 1.3951719e-311 1.3951719e-311
 1.9762626e-323 1.4821969e-322 0.0000000e+000]

[[1.39516641e-311 2.81617418e-322 0.00000000e+000 0.00000000e+000
  4.24512503e+175 6.82116729e-043]
 [4.22374551e-090 1.42648538e-071 1.56150678e+184 2.83484896e-032
  6.48224659e+170 4.93432906e+257]]

[[[4608308318706860032 4612811918334230528]
  [4615626668101337088 4617315517961601024]]

 [[4618722892845154304 4620130267728707584]
  [4621115430147194880 4621819117588971520]]]


In [21]:
# Create identity matrices - crucial for linear algebra operations

eye = np.eye(5)
print(eye)
eye = np.eye(3, dtype=int)  # Specify data type
print(f"\n{eye}")


[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]

[[1 0 0]
 [0 1 0]
 [0 0 1]]


In [22]:
# Create evenly spaced arrays - essential for feature engineering

even = np.arange(2,9,2)
even

array([2, 4, 6, 8])

In [23]:
arange = np.arange(0, 10, 2)   # [0, 2, 4, 6, 8] - start, stop (exclusive), step
print(arange)

[0 2 4 6 8]


In [24]:
linespace = np.linspace(0, 1, 5)     # [0.  0.25 0.5  0.75 1. ] - start, stop (inclusive), num_points
linespace

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [25]:
random_uniform = np.random.rand(3, 3)    # Uniform distribution [0, 1)
random_uniform

array([[0.9391712 , 0.91279381, 0.47703998],
       [0.6545489 , 0.33843725, 0.02898968],
       [0.05504629, 0.50846643, 0.95388442]])

In [26]:
rand_int = np.random.randint(1,10,2)    
print(rand_int)

[3 2]


In [27]:
# type conversion

int_arr = np.array([1,2,3])
float_arr = int_arr+ 1.5        # auto convert
float_arr

array([2.5, 3.5, 4.5])

In [28]:
# Explicit type conversion using astype() - use carefully to avoid overflow

float_arr = float_arr.astype(np.int64)  
float_arr       # convert   

array([2, 3, 4])

In [29]:
# Downcasting for memory optimization - risky but can save memory in large datasets
int_arr = np.array(float_arr + 1.2, dtype=np.int32)
print(int_arr)

[3 4 5]


In [30]:
a = np.full((3,6), 5)
a

array([[5, 5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5, 5]])

In [31]:
a =np.arange(0,10,2).reshape(5,1)
a

array([[0],
       [2],
       [4],
       [6],
       [8]])

In [32]:
a = np.ones(5)
a

array([1., 1., 1., 1., 1.])

In [33]:
X = np.arange(0,10,3)
X

array([0, 3, 6, 9])

In [34]:
a = np.arange(0, 1, 0.1)
a

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

In [35]:
# Normalize feature into 10 bins
a = np.linspace(-10, 10, 10)
a

array([-10.        ,  -7.77777778,  -5.55555556,  -3.33333333,
        -1.11111111,   1.11111111,   3.33333333,   5.55555556,
         7.77777778,  10.        ])

In [36]:
arr = np.array([1,2,3,4,5,6,7])
arr[::2]

array([1, 3, 5, 7])

In [37]:
# indexing in 2d array

arr2d = np.array([[1,2,3],[4,5,6]])
print(arr2d[1,1])
print(arr2d[1])
print(arr2d[1, :])
print(arr2d[:, 1])

5
[4 5 6]
[4 5 6]
[2 5]


In [38]:
X = np.array([
    [180, 75, 22],
    [165, 60, 20],
    [170, 68, 25],
    [175, 72, 23]
])
Y = np.array([[[1,2,3],[4,5,6]],
              [[8,7,6],[1,3,4]],
              [[1,2,4],[5,2,7]]
              ])
X[:][2:]


array([[170,  68,  25],
       [175,  72,  23]])

In [39]:
x = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]])
x[:2,1]

array([2, 5])

In [40]:
x[:,0]

array([ 1,  4,  7, 10, 13])

In [41]:
x[0:3, 0:2]

array([[1, 2],
       [4, 5],
       [7, 8]])

In [42]:
x[0:,:].ndim        

2

In [43]:
x.ndim

2

In [44]:
x[1:,:2]

array([[ 4,  5],
       [ 7,  8],
       [10, 11],
       [13, 14]])

In [45]:
# 2d array with different name, ages, roll no

names = np.array(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"])
age = np.array([25, 30, 35, 40, 45, 50, 55, 60, 65, 70])
roll_no = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
array = np.array([names, age, roll_no])
print(f"name is : {names}")
print(f"age is : {age}")
print(f"roll no is : {roll_no}")
print(f"array is : {array}")

name is : ['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J']
age is : [25 30 35 40 45 50 55 60 65 70]
roll no is : [ 1  2  3  4  5  6  7  8  9 10]
array is : [['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J']
 ['25' '30' '35' '40' '45' '50' '55' '60' '65' '70']
 ['1' '2' '3' '4' '5' '6' '7' '8' '9' '10']]


In [46]:
X = np.array([
    [180, 75, 22],
    [165, 60, 20],
    [170, 68, 25],
    [175, 72, 23]
])

ages = X[:, :]

adults = X[(ages >= 22) & (ages <= 75)]
adults


array([75, 22, 60, 68, 25, 72, 23])

In [47]:
a = np.array([1,2,3,4])
b = np.array([5,6,7,8])
x = np.array([[1,2,3],[4,5,6]])
y = np.array([[7,8,9],[10,11,12]])

c = x%y
c

array([[1, 2, 3],
       [4, 5, 6]])

In [48]:
arr = np.array([1,2,3])
np.sqrt(arr)

array([1.        , 1.41421356, 1.73205081])

In [49]:
x = x*x
x

array([[ 1,  4,  9],
       [16, 25, 36]])

In [50]:
X = np.array([
    [180, 75, 22],
    [165, 60, 20],
    [170, 68, 25]
])

mask = X[:, 2] > 21      # vectorized comparison
filtered = X[mask]      # vectorized filtering
filtered

array([[180,  75,  22],
       [170,  68,  25]])

In [51]:
arr = np.array([1,2,3])
np.sqrt(arr)

array([1.        , 1.41421356, 1.73205081])

In [52]:
np.log(arr)

array([0.        , 0.69314718, 1.09861229])

In [53]:
np.exp(arr)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [54]:
np.sin(arr)

array([0.84147098, 0.90929743, 0.14112001])

In [55]:
X = np.array([
    [1, 2],
    [3, 4],
    [5, 6]
])

weights = np.array([0.5, 0.2])


In [56]:
y_pred = X @ weights
y_pred


array([0.9, 2.3, 3.7])

In [57]:
# normalization and standard deviation

data = np.array([2,6,4,8,10])

np.mean(data)   # mean is the sum of all elements divided by the number of elements

np.float64(6.0)

In [58]:
np.median(data) # median is the middle element of a sorted array

np.float64(6.0)

In [59]:
np.std(data)    # standard deviation is the square root of the variance

np.float64(2.8284271247461903)

In [60]:
np.var(data)    # variance is the average of the squared differences from the mean

np.float64(8.0)

In [61]:
# normalization

data = np.array([2,6,4,8,10])

normalized_data = (data - np.mean(data)) / np.std(data)
normalized_data

array([-1.41421356,  0.        , -0.70710678,  0.70710678,  1.41421356])

In [62]:
# logical operation

data = np.array([2,6,4,8,10])
data[data > 5]

array([ 6,  8, 10])

In [63]:
arr = np.array([[1,2],[3,4]])

normalze = (arr - np.mean(arr)) / np.std(arr)
normalze

array([[-1.34164079, -0.4472136 ],
       [ 0.4472136 ,  1.34164079]])

In [64]:
np.mean(arr)

np.float64(2.5)

In [65]:
np.std(arr)

np.float64(1.118033988749895)