# Numpy Workshop

## Numpy Loading

In [2]:
import numpy as np

## The Basics

In [None]:
a = np.array([1,2,3])
print(a)

[1 2 3]


In [None]:
b = np.array([[4.0,5.0,6.0],[7.0,8.0,9.0]])
print(b)

[[4. 5. 6.]
 [7. 8. 9.]]


In [None]:
# Get dimensions
a.ndim

1

In [None]:
b.ndim

2

In [None]:
# Get shapes
a.shape

(3,)

In [None]:
b.shape

(2, 3)

In [None]:
a.dtype # by default, normal integers in python is considered as 'int64'

dtype('int64')

In [None]:
a.itemsize # for 'int64', single item takes 8 bytes

8

In [None]:
a = np.array([1,2,3], dtype='int32')
print(a)

[1 2 3]


In [None]:
a.dtype # directly speficied in the above cell

dtype('int32')

In [None]:
a.itemsize # for 'int32', single item takes 4 bytes

4

In [None]:
# Get total size
a.nbytes # 3 items each taking 4 bytes. So, total is 12 bytes

12

In [None]:
b.dtype

dtype('float64')

In [None]:
b.itemsize

8

In [None]:
b.nbytes # 6 items each taking 8 bytes. Total 48 bytes.

48

## Accessing / Changing specific elements, rows, columns etc.

In [None]:
a = np.array([[1,2,3,4,5],[6,7,8,9,10]])
print(a)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]


In [None]:
# Get a specific element [r,c] 
# Suppose, we want to select '7' 
a[1,1] 
# a[1,-4] can be also used.

7

In [None]:
# Get a specific row
# Suppose, the first row [1,2,3,4,5]
a[0,:]

array([1, 2, 3, 4, 5])

In [None]:
# Get a specific column
# Suppose, the column [4,9]
a[:,3]

array([4, 9])

In [None]:
# Getting a little more fancy (startindex:endindex:stepsize)
# Suppose, we want to get [1,3,5] from the first row
a[0, 0::2]

array([1, 3, 5])

In [None]:
a[1,2] = 18
print(a)

[[ 1  2  3  4  5]
 [ 6  7 18  9 10]]


In [None]:
a[:,3] = 50
print(a)

[[ 1  2  3 50  5]
 [ 6  7 18 50 10]]


In [None]:
a[:,3] = [72, 27]
print(a)

[[ 1  2  3 72  5]
 [ 6  7 18 27 10]]


## 3-D Example

In [None]:
b = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [None]:
# Get specific element (work outside in)
b[1,1,0]

7

In [None]:
# Replace specific element
b[:,1,:] = [[3,3],[7,7]]
print(b)

[[[1 2]
  [3 3]]

 [[5 6]
  [7 7]]]


## Initializing different types of arrays

In [None]:
# All 0's matrix
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [None]:
# All 1's matrix
np.ones((4,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [None]:
# Any other number
np.full((2,3), 5)

array([[5, 5, 5],
       [5, 5, 5]])

In [None]:
# Any other number (using full_like)
np.full_like(a, 5)

array([[5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5]])

In [None]:
# Random different values
np.random.rand(4,2) # random values between 0 and 1

array([[0.44076617, 0.05162454],
       [0.38625045, 0.57319044],
       [0.27015502, 0.03337979],
       [0.24745238, 0.48599827]])

In [None]:
# Randomize any existing matrix
np.random.random_sample(a.shape)

array([[0.1698318 , 0.05477319, 0.29072035, 0.28803744, 0.218159  ],
       [0.07339056, 0.57016213, 0.14987029, 0.25499618, 0.31311407]])

In [None]:
# Random integer values
np.random.randint(0,5, size=(2,3))

array([[3, 4, 2],
       [4, 2, 1]])

In [None]:
# The identity matrix
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [None]:
# Repeat an array
arr = np.array([[1,2,3,4,5]]) # two dimensional for the ease of printing
rep = np.repeat(arr, 3, axis=0) # axis = 0 is for vertical repeatation 
print(rep)

[[1 2 3 4 5]
 [1 2 3 4 5]
 [1 2 3 4 5]]


In [None]:
arr = np.array([[1,2,3,4,5]])
rep = np.repeat(arr, 3, axis=1) # axis = 1 for horizontal repeatation
print(rep)

[[1 1 1 2 2 2 3 3 3 4 4 4 5 5 5]]


In [None]:
# Creating matrix:
# 1 1 1 1 1
# 1 0 0 0 1
# 1 0 9 0 1
# 1 0 0 0 1
# 1 1 1 1 1

output = np.ones((5,5))
# print(output)
inner = np.zeros((3,3))
inner[1,1] = 9
# print(inner)
output[1:4,1:4] = inner
print(output)

[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


## Be careful when copying array !!!

In [None]:
a = np.array([1,2,3])
# b as a copy of a 
b = a.copy() # DO NOT WRITE "b = a". THIS WILL MAKE BOTH VARIABLE SAME
b[0] = 100
print(a)
print(b)

[1 2 3]
[100   2   3]


## Mathematics

In [None]:
a = np.array([1,2,3,4,5])
print(a)

[1 2 3 4 5]


In [None]:
a + 5 # elementwise arithmatic

array([ 6,  7,  8,  9, 10])

In [None]:
a - 3

array([-2, -1,  0,  1,  2])

In [None]:
a * 4

array([ 4,  8, 12, 16, 20])

In [None]:
a / 2

array([0.5, 1. , 1.5, 2. , 2.5])

In [None]:
b = np.array([4,0,3,1,2])
a + b

array([5, 2, 6, 5, 7])

In [None]:
a ** 2

array([ 1,  4,  9, 16, 25])

In [None]:
# Take the trigonometric values from the array
print(a)
np.sin(a)

[1 2 3 4 5]


array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427])

In [None]:
print(a)
np.cos(a)

[1 2 3 4 5]


array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362,  0.28366219])

## Linear Algebra

In [None]:
a = np.ones((2,3))
print(a)

b = np.full((3,2), 5)
print(b)

np.matmul(a,b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[5 5]
 [5 5]
 [5 5]]


array([[15., 15.],
       [15., 15.]])

In [None]:
# Find the determinant
c = np.identity(5)
np.linalg.det(c)

1.0

## Statistics

In [None]:
a = np.array([[1,2,3],[4,5,6]])
print(a)

[[1 2 3]
 [4 5 6]]


In [None]:
np.min(a)

1

In [None]:
np.max(a)

6

In [None]:
np.sum(a)

21

## Reorganizing arrays

In [None]:
a = np.array([[1,2,3,4],[5,6,7,8]])
print(a)

b = a.reshape((4,2))
print(b)

[[1 2 3 4]
 [5 6 7 8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]


In [None]:
# Vertically stacking arrays
a = np.array([1,2,3,4,5])
b = np.array([6,7,8,9,10])
np.vstack([a,b,b,a])

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [ 6,  7,  8,  9, 10],
       [ 1,  2,  3,  4,  5]])

In [None]:
# Horizontal stacking arrays
a = np.array([[1,2,3],[4,5,6]])
b = np.array([[7,8,9],[10,11,12]])
np.hstack([a,b,b,a])

array([[ 1,  2,  3,  7,  8,  9,  7,  8,  9,  1,  2,  3],
       [ 4,  5,  6, 10, 11, 12, 10, 11, 12,  4,  5,  6]])

## Miscellaneous

## Load Data from file

In [3]:
filedata = np.genfromtxt('data.txt', delimiter=',') # by default, the data type would be float
filedata = filedata.astype('int32')
print(filedata) # number of data points in each line should be same

[[  1  13  21  11 196  75   4   3  34   6   7   8   0   1   2   3   4   5]
 [  3  42  12  33 766  75   4  55   6   4   3   4   5   6   7   0  11  12]
 [  1  22  33  11 999  11   2   1  78   0   1   2   9   8   7   1  76  88]]


## Boolean masking and advanced indexing

In [None]:
filedata > 50 # return true if the data is greater than 50, or return false if 
#              the data is less than 50


array([[False, False, False, False, False, False,  True, False,  True,
         True,  True, False,  True,  True,  True, False, False, False,
        False,  True],
       [False, False,  True,  True, False, False,  True,  True, False,
        False, False, False,  True,  True,  True, False,  True,  True,
        False, False],
       [False, False,  True,  True,  True,  True, False,  True, False,
         True, False, False, False, False,  True,  True, False,  True,
         True, False]])

In [None]:
filedata[filedata > 50] # shows the data that are greater than 50

array([ 413,  218,   56,  513,  351,  651,   98, 2032,   66,  518,   51,
        561,   54,   89, 7623, 5341,  656,  316, 5416,   51,   65, 4564,
         65,   64,   98,   89,   78], dtype=int32)

In [None]:
# We can index with a list in NumPy
a = np.array([1,2,3,4,5,6,7,8,9])
# we want only 1,4,6,8
a[[0,3,5,7]] # indexing with the list [0,3,5,7]

array([1, 4, 6, 8])

In [5]:
((filedata > 50) & (filedata < 100))

array([[False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [7]:
np.all(filedata > 50, axis=0) # if all the elements in a single column greater
#                               than 50, then return True for that column.

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [8]:
filedata = np.genfromtxt('data.txt', delimiter=',') # by default, the data type would be float
filedata = filedata.astype('int32')
print(filedata) # number of data points in each line should be same

[[  1  13  21  11 196  75   4   3  34   6   7   8   0   1   2   3   4   5]
 [  3  42  12  33 766  75   4  55   6   4   3   4   5   6   7   0  11  12]
 [  1  22  33  11 999  11   2   1  78   0   1   2   9   8   7   1  76  88]]


In [9]:
(~(filedata > 50) & (filedata < 100)) # '~' is used as NOT operator.

array([[ True,  True,  True,  True, False, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True, False, False,  True, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True, False,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False, False]])

# Exercise

In [10]:
a = np.array([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15],[16,17,18,19,20],[21,22,23,24,25],[26,27,28,29,30]])
print(a)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]
 [26 27 28 29 30]]


## Find [[11,12],[16,17]]

In [13]:
a[2:4, 0:2]

array([[11, 12],
       [16, 17]])

## Find [2,8,14,20]

In [16]:
a[[0,1,2,3], [1,2,3,4]]

array([ 2,  8, 14, 20])

## Find [[4,5],[24,25],[29,30]]

In [17]:
a[[0,4,5], 3:]

array([[ 4,  5],
       [24, 25],
       [29, 30]])

# Thank You So Much