# NumPy

In [2]:
# List -> slow, NumPy -> fast
# List stores data with significant overhead, NumPy only stores the data
# List does not use contiguous memory, list contains the pointers of all the data stored in
# different memory locations
# numPy uses contiguous memory- this can utilize SIMD vector processing units for faster processing, 
# effective cache utilization
# Lists and NumPy: insertion, deletion, appending, concatenation, 
# Only NumPy: matrix multiplications, element wise multiplication
# Applications of NumPy: mathematics, plotting, backend, Machine learning

import numpy as np

In [3]:
# 1D array
# By default, it is int32
# But we can specify the type if we want to
# int8, int16, int32, int64
# uint8, uint16, uint32, uint64
# int, uint
a = np.array([1, 80000, 3, 5/8], dtype = 'int32')# -> 0000...0000 -> 1111....1111: 2^16 -> 
print(a)

[    1 80000     3     0]


In [4]:
strings = np.array(["aaa", "abc", "c"], dtype='<U2')
print(strings)

['aa' 'ab' 'c']


In [5]:
# 2D array
b = np.array([[2.3, 4.5, 5], [9.2, 3.0, 4/3]], dtype = 'float64')
print(b)

[[2.3        4.5        5.        ]
 [9.2        3.         1.33333333]]


In [6]:
# Get dimension and shape
print(a.ndim)
print(a.shape)

print(b.ndim)
print(b.shape)

1
(4,)
2
(2, 3)


In [7]:
# Get the type of the array
print(a.dtype)
print(b.dtype)
print(strings.dtype)

int32
float64
<U2


In [8]:
# Size of items in bytes
print(a.itemsize)
print(b.itemsize)

4
8


In [9]:
# Find the total size: how many items in the array * itemsize
total_size = a.size * a.itemsize
print(total_size)

# Or
print(a.nbytes)

16
16


# Accessing/changing specific elements, rows, columns etc

In [10]:
a = np.array([[1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14], [15, 16, 17, 18, 19, 20, 21]])
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]
 [15 16 17 18 19 20 21]]


In [11]:
# Access a row
print(a[1, :])

[ 8  9 10 11 12 13 14]


In [12]:
# Access a column
print(a[:, 2])

[ 3 10 17]


In [13]:
# Access a particular value [row, column]
print(a[1, 2])

10


In [14]:
# Slicing: [startindex:endindex:stepsize]
print(a[1, 2:5])
print(a[0:2, 3:7])

[10 11 12]
[[ 4  5  6  7]
 [11 12 13 14]]


In [15]:
a[1, 5] = 20
print(a)

a[1, :] = 3
print(a)

a[1, :] = [3, 4, 5, 6, 8, 9, 40]
print(a)

b = np.array([3, 1, 99, 34, 45, 66, 88])
a[1, :] = b
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 20 14]
 [15 16 17 18 19 20 21]]
[[ 1  2  3  4  5  6  7]
 [ 3  3  3  3  3  3  3]
 [15 16 17 18 19 20 21]]
[[ 1  2  3  4  5  6  7]
 [ 3  4  5  6  8  9 40]
 [15 16 17 18 19 20 21]]
[[ 1  2  3  4  5  6  7]
 [ 3  1 99 34 45 66 88]
 [15 16 17 18 19 20 21]]


In [17]:
# 3D array
b = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [27]:
# All slicing configuration works
print(b[:, 1, :])

[[3 4]
 [7 8]]


In [28]:
b[:, 1, :] = 30
print(b)

b[:, 1, :] = [[40, 50], [60, 70]]
print(b)

[[[ 1  2]
  [30 30]]

 [[ 5  6]
  [30 30]]]
[[[ 1  2]
  [40 50]]

 [[ 5  6]
  [60 70]]]


In [29]:
a = np.array([1, 2, 3, 4, 5, 6, 7])
print(a)
print(a[[0, 3, 6]])

[1 2 3 4 5 6 7]
[1 4 7]


# Initializing different kinds of arrays

In [18]:
# All zero matrix
import numpy as np
a = np.zeros((3, 4))
print(a)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [19]:
# All 1 matrix
a = np.ones((2, 3))
print(a)

b = np.ones((2, 3), dtype='int32')
print(b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[1 1 1]
 [1 1 1]]


In [18]:
# Any other number - full
a = np.full((2, 2), 99)
print(a)

b = np.full((2, 2), 99, dtype='float32')
print(b)

[[99 99]
 [99 99]]
[[99. 99.]
 [99. 99.]]


In [21]:
# Any other number - full_like
a = np.array([[1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14], [15, 16, 17, 18, 19, 20, 21]])
print(a)

a = np.full_like(a, 5)
print(a)

b = np.full(a.shape, 5)
print(b)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]
 [15 16 17 18 19 20 21]]
[[5 5 5 5 5 5 5]
 [5 5 5 5 5 5 5]
 [5 5 5 5 5 5 5]]
[[5 5 5 5 5 5 5]
 [5 5 5 5 5 5 5]
 [5 5 5 5 5 5 5]]


In [31]:
# Random decimal numbers
import random
print(random.random())
a = np.random.rand(2, 2, 2)
print(a)

0.5891107162253139
[[[0.66187361 0.19088118]
  [0.80305215 0.60199349]]

 [[0.72938137 0.01506943]
  [0.1365603  0.32161367]]]


In [32]:
a = np.random.random_sample(a.shape)
print(a)

[[[0.82816444 0.40470616]
  [0.02011316 0.2757555 ]]

 [[0.13149092 0.10792597]
  [0.28483942 0.24460795]]]


In [36]:
# Random integers
a = np.random.randint(3) # Starts at 0, goes until 3, excluding 3
print(a)

a = np.random.randint(3, size = (3, 5))
print(a)

a = np.random.randint(-4, 5, size = (3, 5))
print(a)

2
[[1 1 0 1 1]
 [2 0 2 0 1]
 [0 0 2 0 2]]
[[ 3  2 -2  4  2]
 [-3  1  1  3 -4]
 [ 2  2 -1  4 -3]]


In [28]:
# The identity matrix
a = np.identity(4, dtype='int32')
print(a)

[[1 0 0 0]
 [0 1 0 0]
 [0 0 1 0]
 [0 0 0 1]]


In [3]:
# repeat array
import numpy as np
arr = np.array([[1, 2, 3], [4, 5, 6]])
print("This is the original array:\n", arr)

a = np.repeat(arr, 3)
print(a)

a = np.repeat(arr, 3, axis = 1) # axis 1 is column
print(a)

a = np.repeat(arr, 3, axis = 0) # axis 0 is row
print(a)

This is the original array:
 [[1 2 3]
 [4 5 6]]
[1 1 1 2 2 2 3 3 3 4 4 4 5 5 5 6 6 6]
[[1 1 1 2 2 2 3 3 3]
 [4 4 4 5 5 5 6 6 6]]
[[1 2 3]
 [1 2 3]
 [1 2 3]
 [4 5 6]
 [4 5 6]
 [4 5 6]]


In [30]:
# Recreate the matrix
a = np.ones((5, 5), dtype='int32')
a[1:4, 1:4] = 0
a[2, 2] = 9
print(a)

[[1 1 1 1 1]
 [1 0 0 0 1]
 [1 0 9 0 1]
 [1 0 0 0 1]
 [1 1 1 1 1]]


In [31]:
# Careful when copying arrays
a = np.array([1, 2, 3])
b = a
print("This is a:", a)
print("This is b:", b)

This is a: [1 2 3]
This is b: [1 2 3]


In [32]:
b[0] = 48
print("This is a:", a)
print("This is b:", b)

This is a: [48  2  3]
This is b: [48  2  3]


In [33]:
# To properly copy, use the copy method
a = np.array([1, 2, 3])
b = a.copy()
print("This is a:", a)
print("This is b:", b)

This is a: [1 2 3]
This is b: [1 2 3]


In [34]:
b[0] = 33
print("This is a:", a)
print("This is b:", b)

This is a: [1 2 3]
This is b: [33  2  3]


# Maths

In [4]:
a = np.array([1, 2, 3, 4])
print(a)

[1 2 3 4]


In [9]:
print(a + 2)

[3 4 5 6]


In [6]:
a - 2

array([-1,  0,  1,  2])

In [7]:
a += 2
print(a)

[3 4 5 6]


In [8]:
a -= 2
print(a)

[1 2 3 4]


In [12]:
a / 2

array([0.5, 1. , 1.5, 2. ])

In [13]:
a * 2

array([2, 4, 6, 8])

In [10]:
b = np.array([1, 0, 1, 0])
a * b

array([1, 0, 3, 0])

In [20]:
a ** 2

array([ 1,  4,  9, 16])

In [21]:
np.sin(a)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [22]:
np.cos(a)

array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])

# Linear algebra

In [11]:
a = np.ones((2, 3))
print(a)

b = np.full((3, 2), 2)
print(b)

a * b

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]


ValueError: operands could not be broadcast together with shapes (2,3) (3,2) 

In [12]:
np.matmul(a, b)

array([[6., 6.],
       [6., 6.]])

In [29]:
# Identity matrices have a determinant of 1
c = np.identity(3)
np.linalg.det(c)

1.0

In [30]:
# Some other things that can be done:

# Trace: Sum of the matrix in its main diagonal
a = np.array([[1, 2, 3],
             [4, 5, 6],
            [7, 8, 9]])
print("This is a:\n", a)
trace = np.trace(a)
print("\n\nTrace of the matrix is:", trace)

# Singular Value Decomposition: factorization of a matrix into 3 matrices
U, S, Vt = np.linalg.svd(a)
print(f"\n\nU, S and Vt:\n\n{U}\n\n{S}\n\n{Vt}")

# Eigenvalues: scales associated with a liner system that provide insights into a systems properties
values, vectors = np.linalg.eig(a)
print(f"\n\nThe eigenvalues, eigenvectores are:\n\n{values}\n\n{vectors}")


# Matrix Norm: measure of its size or length
norm = np.linalg.norm(a, "fro") # calculate the frobenius norm
print(f"\n\nFrobenius norm is:\n{norm}")


# Inverse: when multiplied with the original matrix, yeild identity matrix
inverse = np.linalg.inv(a)
print(f"\n\nInverse:\n{inverse}")

This is a:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]


Trace of the matrix is: 15


U, S and Vt:

[[-0.21483724  0.88723069  0.40824829]
 [-0.52058739  0.24964395 -0.81649658]
 [-0.82633754 -0.38794278  0.40824829]]

[1.68481034e+01 1.06836951e+00 1.47280825e-16]

[[-0.47967118 -0.57236779 -0.66506441]
 [-0.77669099 -0.07568647  0.62531805]
 [ 0.40824829 -0.81649658  0.40824829]]


The eigenvalues, eigenvectores are:

[ 1.61168440e+01 -1.11684397e+00 -1.30367773e-15]

[[-0.23197069 -0.78583024  0.40824829]
 [-0.52532209 -0.08675134 -0.81649658]
 [-0.8186735   0.61232756  0.40824829]]


Frobenius norm is:
16.881943016134134


Inverse:
[[-4.50359963e+15  9.00719925e+15 -4.50359963e+15]
 [ 9.00719925e+15 -1.80143985e+16  9.00719925e+15]
 [-4.50359963e+15  9.00719925e+15 -4.50359963e+15]]


# Statistics

In [1]:
import numpy as np
stats = np.array([[1, 2, 3], [4, 5, 6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [2]:
np.min(stats)

1

In [3]:
np.max(stats)

6

In [33]:
np.max(stats, axis=1) # Axis = 1, for rows

array([3, 6])

In [34]:
np.max(stats, axis=0) # Axis = 0, for columns

array([4, 5, 6])

In [35]:
np.sum(stats, axis=1)

array([ 6, 15])

In [36]:
np.sum(stats, axis=0)

array([5, 7, 9])

# Reorganizing array

In [11]:
import numpy as np
before = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(before)

after = before.reshape((8, 1)) # This shape needs to match the total number of items available in the array
print(after)

[[1 2 3 4]
 [5 6 7 8]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]


In [12]:
after = before.reshape((4, 2))
print(after)

[[1 2]
 [3 4]
 [5 6]
 [7 8]]


In [13]:
after = before.reshape((3, 2)) # This will not work since the number of items does not match the shape

ValueError: cannot reshape array of size 8 into shape (3,2)

In [53]:
# horizontal stacking arrays
h1 = np.ones((2, 4))
h2 = np.zeros((2, 2))
print(h1)
print(h2)

np.hstack((h1, h2))

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[0. 0.]
 [0. 0.]]


array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

In [11]:
in_arr1 = np.array([[ 1, 2, 3], [ -1, -2, -3]] ) 
print (in_arr1)  
  
in_arr2 = np.array([[ 4, 5, 6], [ -4, -5, -6]] ) 
print (in_arr2)  
 
out_arr = np.vstack((in_arr1, in_arr2)) 
print (out_arr) 

[[ 1  2  3]
 [-1 -2 -3]]
[[ 4  5  6]
 [-4 -5 -6]]
[[ 1  2  3]
 [-1 -2 -3]
 [ 4  5  6]
 [-4 -5 -6]]


# Miscellaneous

#### Load data from file

In [17]:
!touch test.txt

In [19]:
with open("test.txt", "w") as f:
    f.write("1,2,3,4,5\n6,7,8,9,10\n11,12,13,14,15")

In [20]:
!cat test.txt

1,2,3,4,5
6,7,8,9,10
11,12,13,14,15

In [21]:
filedata = np.genfromtxt("test.txt", delimiter=",")
print(filedata.dtype)
filedata = filedata.astype('int32')
filedata

float64


array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]], dtype=int32)

In [28]:
# Boolean masking
filedata > 5

array([[False, False, False, False, False],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

In [27]:
filedata[(filedata > 5)]

array([ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15], dtype=int32)

In [22]:
filedata

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]], dtype=int32)

In [29]:
np.any(filedata < 8)

True

In [31]:
np.any(filedata > 12, axis = 0) # 0 For the column

array([False, False,  True,  True,  True])

In [32]:
np.any(filedata > 5, axis = 1) # 1 For the rows

array([False,  True,  True])

In [33]:
np.all(filedata > 5)

False

In [35]:
np.all(filedata > 5, axis = 0) #columns

array([False, False, False, False, False])

In [36]:
np.all(filedata > 5, axis = 1) #rows

array([False,  True,  True])

In [37]:
(filedata > 5) & (filedata < 10)

array([[False, False, False, False, False],
       [ True,  True,  True,  True, False],
       [False, False, False, False, False]])

In [40]:
~((filedata > 5) & (filedata < 10))

array([[ True,  True,  True,  True,  True],
       [False, False, False, False,  True],
       [ True,  True,  True,  True,  True]])

## Index mapping

In [41]:
import numpy as np

arr = np.array([1, 2, 3, 2, 4, 2, 5, 3])

# Get unique values, their counts, and index mapping
unique_values, counts = np.unique(arr, return_counts=True)
print(unique_values, counts)

# Identify values that appear more than once
duplicate_values = unique_values[counts > 1]  # This should now work correctly

# Get indices of these duplicate values in the original array
duplicate_indices = np.where(np.isin(arr, duplicate_values))

print("Duplicate values:", duplicate_values)
print("Indices of duplicate values:", duplicate_indices)

[1 2 3 4 5] [1 3 2 1 1]
Duplicate values: [2 3]
Indices of duplicate values: (array([1, 2, 3, 5, 7]),)
