In [1]:
# Numpy is a multidimensional array type of data structures, here we can store 1D arrays, 2D arrays, and so on

In [2]:
# List is slow, where as numpy are fast. Reason is that :
# 1) Faster to use less bytes of memory
# 2) No type checking when iterating through objects
# 3) Numpy utilizes continuous memory

In [3]:
# Benefits of Numpy
# 1) SIMD vector processing
# 2) Effective Cache utilization

In [4]:
# Applications of Numpy
# 1) Mathematics (MATLAB replacement)
# 2) Ploting (Matplotlib)
# 3) Backend of many different applications (Pandas, connect 4, Digital Photography)
# 4) Machine learning

In [5]:
import numpy as np

##  The Basics (creating arrays, shape, size, data type)

In [6]:
a = np.array([2, 5, 1])
print(a)

[2 5 1]


In [7]:
b = np.array([[0.2, 2.4, 1.56], [4.23, 1.32, 9.43]])
print(b)

[[0.2  2.4  1.56]
 [4.23 1.32 9.43]]


In [8]:
# Get Dimension
print(a.ndim)
print(b.ndim)

1
2


In [9]:
# Get shape
print(a.shape)
print(b.shape)

(3,)
(2, 3)


In [10]:
# Get type
# dtype -> datatype
print(a.dtype)
print(b.dtype)

int32
float64


In [11]:
# Get size
print(a.itemsize)
print(b.itemsize)

4
8


In [12]:
# Get number of elements
print(a.size)
print(b.size)

3
6


In [13]:
# Total size = (a.size * a.itemsize)
print(a.size * a.itemsize)
print(b.size * b.itemsize)

# or

print(a.nbytes)
print(b.nbytes)

12
48
12
48


## Accessing/Changing Specific Elements, Rows, Columns, etc (slicing)

In [14]:
a = np.array([ [1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14]])
print(a)
print(a.shape)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]
(2, 7)


In [15]:
# To get a specific cell (R, C)
a[0, 4]

5

In [16]:
# To get a specific row
print(a[1, : ])

[ 8  9 10 11 12 13 14]


In [17]:
# To get a specific column
print(a[ : , 3])

[ 4 11]


In [18]:
# Getting a little more fancy a[startindex : endindex : stepsize]
a[0, 1:6:2]

array([2, 4, 6])

In [19]:
a[0, -4:-1:2]

array([4, 6])

In [20]:
a[0, 5] = 2432
print(a)

[[   1    2    3    4    5 2432    7]
 [   8    9   10   11   12   13   14]]


In [21]:
a[0, : ] = 34
print(a)

[[34 34 34 34 34 34 34]
 [ 8  9 10 11 12 13 14]]


In [22]:
a[0, : ] = [1, 2, 3, 4, 5, 6, 7]
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [23]:
a[ : , 2] = 345
print(a)

[[  1   2 345   4   5   6   7]
 [  8   9 345  11  12  13  14]]


In [24]:
a[ : , 2] = [2345, 4041]
print(a)

[[   1    2 2345    4    5    6    7]
 [   8    9 4041   11   12   13   14]]


In [25]:
# 3D example
c = np.array([ [ [1, 2], [3, 4] ], [ [5, 6], [7, 8] ] ])
print(c)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [26]:
c[0, 1, : ]

array([3, 4])

In [27]:
c[ : , 1, :]

array([[3, 4],
       [7, 8]])

In [29]:
c[ : , 1, : ] = [[0, 0], [0, 0]]
print(c)

[[[1 2]
  [0 0]]

 [[5 6]
  [0 0]]]


## Initializing Different Arrays (1s, 0s, full, random, etc...)

In [32]:
# All 0s matrix
z = np.zeros(5)
print(z)

z = np.zeros((2,3))
print(z)

z = np.zeros((2,3,3,2))
print(z)

[0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[[[[0. 0.]
   [0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]
   [0. 0.]]]


 [[[0. 0.]
   [0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]
   [0. 0.]]]]


In [33]:
# All 1s matrix
one = np.ones((2,3,3), np.int64)
print(one)

[[[1 1 1]
  [1 1 1]
  [1 1 1]]

 [[1 1 1]
  [1 1 1]
  [1 1 1]]]


In [35]:
# Any other number
a = np.full((2,3), 99, np.float32)
print(a)

[[99. 99. 99.]
 [99. 99. 99.]]


In [36]:
print(b)

[[0.2  2.4  1.56]
 [4.23 1.32 9.43]]


In [40]:
# Any other method (full_like)
a = np.full(b.shape, 5)
print(a)

a = np.full_like(b, 4)
print(a)

[[5 5 5]
 [5 5 5]]
[[4. 4. 4.]
 [4. 4. 4.]]


In [42]:
# Random decimal numbers
# Note : here there is exception , that we do not use tuple inside ((2,4))
np.random.rand(2,4)

array([[0.74547161, 0.33855607, 0.92205046, 0.57717183],
       [0.75392417, 0.09950819, 0.51283337, 0.95080582]])

In [44]:
# To use tuple
np.random.random_sample((2,5))

array([[0.46356394, 0.33462751, 0.16420107, 0.61098523, 0.4581942 ],
       [0.15028921, 0.11452544, 0.615442  , 0.61440701, 0.15760479]])

In [45]:
# Random integer values
print(np.random.randint(7))

print(np.random.randint(4, size = (3, 3)))

3
[[3 3 0]
 [0 3 1]
 [0 2 3]]


In [46]:
# All numbers between [4,7)
print(np.random.randint(4,7,size = (2,4)))

[[5 4 6 5]
 [4 6 4 6]]


In [51]:
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [48]:
# Repeat an array
arr =np.array([1, 2, 3])
r1 = np.repeat(arr, 3)
print(r1)

[1 1 1 2 2 2 3 3 3]


In [52]:
arr =np.array([1, 2, 3])
r1 = np.repeat(arr, 3)
print(r1)

[1 1 1 2 2 2 3 3 3]


In [50]:

arr =np.array([ [1, 2, 3] ])
r1 = np.repeat(arr, 3, axis = 0)
print(r1)

[[1 2 3]
 [1 2 3]
 [1 2 3]]


## Problem #1 (How do you initialize this array?)

In [55]:
arr = np.ones((5, 5))
arr[ 1:-1, 1:-1 ] = 0
arr[2][2] = 9;
print(arr)

[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


## Be careful when copying variables!

In [56]:
a = np.array([1, 2, 3])
b = a
b[0] = 100
print(a)

[100   2   3]


In [57]:
a = np.array([1, 2, 3])
b = a.copy()
b[0] = 100
print(a)

[1 2 3]


## Basic Mathematics (arithmetic, trigonometry, etc.)

In [58]:
a = np.array([1, 2, 3, 4])
print(a)

[1 2 3 4]


In [59]:
a + 2

array([3, 4, 5, 6])

In [60]:
a - 2

array([-1,  0,  1,  2])

In [61]:
a * 2

array([2, 4, 6, 8])

In [62]:
a / 2

array([0.5, 1. , 1.5, 2. ])

In [63]:
b = np.array([1,0,1,0])
a+=b
print(a)

[2 2 4 4]


In [64]:
a ** 2

array([ 4,  4, 16, 16], dtype=int32)

In [65]:
print(np.sin(a))
print(np.cos(a))

[ 0.90929743  0.90929743 -0.7568025  -0.7568025 ]
[-0.41614684 -0.41614684 -0.65364362 -0.65364362]


## Linear Algebra

In [67]:
a = np.ones((2,3))
print(a)

b  = np.full((3,2), 2)
print(b)

# print(a*b) this doesnot work because both have different sizes

print(np.matmul(a, b))

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]
[[6. 6.]
 [6. 6.]]


In [69]:
# To get determinant of a matrix

np.linalg.det(np.identity())

1.0

## Statistics

In [70]:
stats = np.array([[1,2,3],[4,5,6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [71]:
print(np.min(stats))
print(np.min(stats, axis = 0))
print(np.min(stats, axis = 1))

1
[1 2 3]
[1 4]


In [72]:
print(np.max(stats))
print(np.max(stats, axis = 0))
print(np.max(stats, axis = 1))

6
[4 5 6]
[3 6]


In [73]:
np.sum(stats, axis = 0)

array([5, 7, 9])

## Reorganizing Arrays (reshape, vstack, hstack)

In [76]:
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)

after = before.reshape((4,2))
print(after)

print(before.reshape(2, 2, 2))

[[1 2 3 4]
 [5 6 7 8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [77]:
# Vertically stacking vectors
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack([v1,v2,v1,v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [1, 2, 3, 4],
       [5, 6, 7, 8]])

In [78]:
# Horizontal  stack
h1 = np.ones((2,4))
h2 = np.zeros((2,2))

np.hstack((h1,h2))

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

## Load data in from a file

In [82]:

'''

data.txt looks like ::

1,13,21,11,196,75,4,3,34,6,7,8,0,1,2,3,4,5
3,42,12,33,766,75,4,55,6,4,3,4,5,6,7,0,11,12
1,22,33,11,999,11,2,1,78,0,1,2,9,8,7,1,76,88



'''

filedata = np.genfromtxt('data.txt', delimiter=',')
print(filedata)
filedata = filedata.astype('int32')
print(filedata)

[[  1.  13.  21.  11. 196.  75.   4.   3.  34.   6.   7.   8.   0.   1.
    2.   3.   4.   5.]
 [  3.  42.  12.  33. 766.  75.   4.  55.   6.   4.   3.   4.   5.   6.
    7.   0.  11.  12.]
 [  1.  22.  33.  11. 999.  11.   2.   1.  78.   0.   1.   2.   9.   8.
    7.   1.  76.  88.]]
[[  1  13  21  11 196  75   4   3  34   6   7   8   0   1   2   3   4   5]
 [  3  42  12  33 766  75   4  55   6   4   3   4   5   6   7   0  11  12]
 [  1  22  33  11 999  11   2   1  78   0   1   2   9   8   7   1  76  88]]


## Advanced Indexing and Boolean Masking

In [83]:
# It returns boolean values of a query in the form of a matrix
filedata > 50

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [84]:
filedata[ filedata> 50]

array([196,  75, 766,  75,  55, 999,  78,  76,  88])

In [85]:
# You can index with a list in numpy
a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])

a[[1, 2, 4]]

array([2, 3, 5])

In [86]:
# Lets say, i want to know that is there any column , where any data in that column is > a particular value

np.any(filedata > 50, axis = 0)

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [87]:
np.all( filedata > 50 , axis = 0)

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [91]:
# Here we use &(and) |(or) irrespetive of python language
( (filedata > 50) & (filedata < 100))

array([[False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

## Problem #2 (How do you index these values?)

In [95]:
a = np.arange(1,26)
print(a)

a = a.reshape((5,5))
print(a)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25]
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]]


In [96]:
print(a[ 2:-1 , 0:2])

[[11 12]
 [16 17]]


In [97]:
print(a[[0, 1, 2, 3] , [1, 2, 3, 4]])

[ 2  8 14 20]


In [98]:
print(a[[0, 3, 4] , 3: ])

[[ 4  5]
 [19 20]
 [24 25]]
