# Computation on NumPy Arrays: Universal Functions

In [13]:
import numpy as np

In [2]:
x = np.arange(4)

In [3]:
x

array([0, 1, 2, 3])

In [7]:
# Addition

print("Addition:", x + 5)

Addition: [5 6 7 8]


In [6]:
# Subtraction

print("Subtraction:", x-5)

Subtraction: [-5 -4 -3 -2]


In [8]:
# Multiply

print("Multiply: ", x*5)

Multiply:  [ 0  5 10 15]


In [9]:
# Division

print("Division: ", x/5)

Division:  [0.  0.2 0.4 0.6]


In [13]:
# Power
print("Power/exponential:", x**2)

Power/exponential: [0 1 4 9]


In [14]:
# Modulus

print("Modulus: ", x%2)

Modulus:  [0 1 0 1]


In [15]:
# NumPy Arithmatic operation
# Addition
np.add(x, 2)

array([2, 3, 4, 5])

In [16]:
# Subtraction

np.subtract(x, 2)

array([-2, -1,  0,  1])

In [17]:
# Negation of array

np.negative(x)

array([ 0, -1, -2, -3])

In [18]:
# Multiply

np.multiply(x, 4)

array([ 0,  4,  8, 12])

In [19]:
# Divide

np.divide(x, 2)

array([0. , 0.5, 1. , 1.5])

In [20]:
# floor divide

np.floor_divide(x, 2)

array([0, 0, 1, 1], dtype=int32)

In [21]:
# Modulus

np.mod(x, 2)

array([0, 1, 0, 1], dtype=int32)

In [22]:
x = np.array([-2, -1, 0, 1, 2])

In [23]:
x

array([-2, -1,  0,  1,  2])

In [24]:
# Absolute

abs(x)

array([2, 1, 0, 1, 2])

In [26]:
print("Abs:", np.abs(x))
print("Absolute:", np.absolute(x))

Abs: [2 1 0 1 2]
Absolute: [2 1 0 1 2]


# Aggregations: Min, Max and Other

In [27]:
L = np.random.random(100)

In [28]:
sum(L) #Python in-built aggregation function

52.17273722101692

In [29]:
np.sum(L) #Numpy in-built aggregation function

52.172737221016895

In [30]:
%timeit sum(L)
%timeit np.sum(L)

6.64 µs ± 56.9 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
2.76 µs ± 7.57 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [31]:
big_array = np.random.random(1000000)

In [32]:
%timeit sum(big_array)
%timeit np.sum(big_array)

54.8 ms ± 122 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
349 µs ± 6.75 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [33]:
#Python in-built min and max

print(min(L))
print(max(L))

0.01559934505695959
0.987579448128065


In [34]:
# Numpy in-built min and max

print(np.min(L))
print(np.max(L))

0.01559934505695959
0.987579448128065


In [35]:
# NumPy aggregates short syntax

print(big_array.min(), big_array.max(), big_array.sum())

7.878817986917142e-08 0.9999963429854416 499405.88695242745


# Multi-dimensional aggregates
- Aggregation against rows or columns

In [36]:
M = np.random.random((3,4))

In [37]:
M

array([[0.97987918, 0.69789217, 0.97812341, 0.66392729],
       [0.56935697, 0.09413005, 0.70346412, 0.86695607],
       [0.07012882, 0.36246085, 0.96182725, 0.64959436]])

In [38]:
sum(M)

array([1.61936497, 1.15448307, 2.64341479, 2.18047772])

In [39]:
M.sum()

7.597740552044026

In [42]:
M

array([[0.97987918, 0.69789217, 0.97812341, 0.66392729],
       [0.56935697, 0.09413005, 0.70346412, 0.86695607],
       [0.07012882, 0.36246085, 0.96182725, 0.64959436]])

In [40]:
M.min(axis=0) #min number column wise

array([0.07012882, 0.09413005, 0.70346412, 0.64959436])

In [41]:
M.max(axis=1) #max number row wise

array([0.97987918, 0.86695607, 0.96182725])

# Broadcasting

In [43]:
a = np.array([1,2,3])
b = np.array([4,5,6])

In [44]:
a + b

array([5, 7, 9])

- Additon of scalar value

In [45]:
a + 5

array([6, 7, 8])

- Broadcasting single array

In [46]:
M = np.ones((3,3))

In [47]:
M

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [48]:
a

array([1, 2, 3])

In [49]:
M.shape

(3, 3)

In [50]:
a.shape

(3,)

In [51]:
M + a 

array([[2., 3., 4.],
       [2., 3., 4.],
       [2., 3., 4.]])

- Broadcasting of both arrays

In [52]:
a = np.arange(3)
b = np.arange(3)[:,np.newaxis]

In [53]:
print(a.shape)
print(b.shape)


(3,)
(3, 1)


In [55]:
print(a)
print(b)

[0 1 2]
[[0]
 [1]
 [2]]


In [54]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [56]:
# Broadcasting of imncompatible array

M = np.ones((3,2))
a = np.arange(3)

In [57]:
print(M.shape)
print(a.shape)

(3, 2)
(3,)


In [58]:
M + a 

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

In [59]:
a[:,np.newaxis].shape

(3, 1)

In [60]:
M + a[:, np.newaxis]

array([[1., 1.],
       [2., 2.],
       [3., 3.]])

# Comparisons, Masks, and Boolean Logic

- Comparison operators

1. less than <
2. greater than >
3. less than equal <=
4. greater than equal >=
5. not equal !=
6. equal ==

In [63]:
x = np.array([1,2,3,4,5,6])

In [64]:
x

array([1, 2, 3, 4, 5, 6])

In [65]:
print(x<3) #less than

[ True  True False False False False]


In [66]:
np.less(x, 3)

array([ True,  True, False, False, False, False])

In [68]:
# greater than

print("Python greater: ", x>3)
print("NumPy greater: ", np.greater(x,3))

Python greater:  [False False False  True  True  True]
NumPy greater:  [False False False  True  True  True]


In [69]:
# less than equal

print("Python less than equal:", x<=3)
print("NumPy less than equal:", np.less_equal(x,3))


Python less than equal: [ True  True  True False False False]
NumPy less than equal: [ True  True  True False False False]


In [70]:
# greater than equal
print("Python greater than equal:", x>=3)
print("NumPy greater than equal:", np.greater_equal(x,3))

Python greater than equal: [False False  True  True  True  True]
NumPy greater than equal: [False False  True  True  True  True]


- Two-dimensional array

In [74]:
rng = np.random.RandomState(0)

x = rng.randint(10, size=(3,4))

In [75]:
x

array([[5, 0, 3, 3],
       [7, 9, 3, 5],
       [2, 4, 7, 6]])

In [76]:
x<6

array([[ True,  True,  True,  True],
       [False, False,  True,  True],
       [ True,  True, False, False]])

In [77]:
print(x)

[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]]


In [78]:
np.count_nonzero(x<6)

8

In [79]:
np.sum(x<6)

8

In [80]:
np.sum(x<6, axis=1) # values in each row with less than 6

array([4, 2, 2])

In [81]:
np.sum(x<6, axis=0) #values in each column with less than 6

array([2, 2, 2, 2])

In [82]:
print(x)

[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]]


In [83]:
np.any(x>8)

True

In [84]:
np.any(x<0)

False

In [85]:
np.all(x<8, axis=1)

array([ True, False,  True])

# Example: Seattle Rainfall

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv(r"seattle-weather.csv", encoding='utf8', engine='python')

In [3]:
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012/1/1,0.0,12.8,5.0,4.7,drizzle
1,2012/1/2,10.9,10.6,2.8,4.5,rain
2,2012/1/3,0.8,11.7,7.2,2.3,rain
3,2012/1/4,20.3,12.2,5.6,4.7,rain
4,2012/1/5,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,2015/12/27,8.6,4.4,1.7,2.9,fog
1457,2015/12/28,1.5,5.0,1.7,1.3,fog
1458,2015/12/29,0.0,7.2,0.6,2.6,fog
1459,2015/12/30,0.0,5.6,-1.0,3.4,sun


In [97]:
df['precipitation']

0        0.0
1       10.9
2        0.8
3       20.3
4        1.3
        ... 
1456     8.6
1457     1.5
1458     0.0
1459     0.0
1460     0.0
Name: precipitation, Length: 1461, dtype: float64

In [98]:
# Days without rain

np.sum(df['precipitation']==0)

838

In [99]:
# Number of days with rain

np.sum(df['precipitation']!=0)

623

In [100]:
df['precipitation']

0        0.0
1       10.9
2        0.8
3       20.3
4        1.3
        ... 
1456     8.6
1457     1.5
1458     0.0
1459     0.0
1460     0.0
Name: precipitation, Length: 1461, dtype: float64

# Boolean Operator

- & bitwise_and
- | bitwise_or
- ~ bitwise_not

In [104]:
# All rainy days will less than 15mm and greater than 0mm

In [107]:
np.sum((df['precipitation']<15) & (df['precipitation']>0))

533

# Boolean Arrays as Masks

In [108]:
x

array([[5, 0, 3, 3],
       [7, 9, 3, 5],
       [2, 4, 7, 6]])

In [109]:
x<5

array([[False,  True,  True,  True],
       [False, False,  True, False],
       [ True,  True, False, False]])

In [110]:
x[x<5]

array([0, 3, 3, 3, 2, 4])

In [111]:
# Construct a mask on all rainy days

rain = df['precipitation']>0

In [112]:
rain

0       False
1        True
2        True
3        True
4        True
        ...  
1456     True
1457     True
1458    False
1459    False
1460    False
Name: precipitation, Length: 1461, dtype: bool

In [113]:
df['precipitation'][rain]

1       10.9
2        0.8
3       20.3
4        1.3
5        2.5
        ... 
1452     6.1
1453     2.5
1454     5.8
1456     8.6
1457     1.5
Name: precipitation, Length: 623, dtype: float64

In [4]:
df['year'] = pd.DatetimeIndex(df['date']).year

In [5]:
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,year
0,2012/1/1,0.0,12.8,5.0,4.7,drizzle,2012
1,2012/1/2,10.9,10.6,2.8,4.5,rain,2012
2,2012/1/3,0.8,11.7,7.2,2.3,rain,2012
3,2012/1/4,20.3,12.2,5.6,4.7,rain,2012
4,2012/1/5,1.3,8.9,2.8,6.1,rain,2012
...,...,...,...,...,...,...,...
1456,2015/12/27,8.6,4.4,1.7,2.9,fog,2015
1457,2015/12/28,1.5,5.0,1.7,1.3,fog,2015
1458,2015/12/29,0.0,7.2,0.6,2.6,fog,2015
1459,2015/12/30,0.0,5.6,-1.0,3.4,sun,2015


In [6]:
df2012 = df[df['year']==2012]

In [7]:
df2012

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,year
0,2012/1/1,0.0,12.8,5.0,4.7,drizzle,2012
1,2012/1/2,10.9,10.6,2.8,4.5,rain,2012
2,2012/1/3,0.8,11.7,7.2,2.3,rain,2012
3,2012/1/4,20.3,12.2,5.6,4.7,rain,2012
4,2012/1/5,1.3,8.9,2.8,6.1,rain,2012
...,...,...,...,...,...,...,...
361,2012/12/27,4.1,7.8,3.3,3.2,rain,2012
362,2012/12/28,0.0,8.3,3.9,1.7,rain,2012
363,2012/12/29,1.5,5.0,3.3,1.7,rain,2012
364,2012/12/30,0.0,4.4,0.0,1.8,drizzle,2012


In [10]:
# Extract precipitation data from the year 2012

rainfall = df2012['precipitation']

In [11]:
rainfall

0       0.0
1      10.9
2       0.8
3      20.3
4       1.3
       ... 
361     4.1
362     0.0
363     1.5
364     0.0
365     0.0
Name: precipitation, Length: 366, dtype: float64

In [14]:
# All rainy days between 15mm and 5mm in the year 2012

np.sum((rainfall<15) & (rainfall>5))

55

In [None]:
n

In [8]:
# Construct a mask on all rainy days

rain2012 = df2012['precipitation']>0

In [9]:
rain2012

0      False
1       True
2       True
3       True
4       True
       ...  
361     True
362    False
363     True
364    False
365    False
Name: precipitation, Length: 366, dtype: bool

In [18]:
# Calculate number of days without rain in year 2012

withoutrain2012 = np.sum([df2012['precipitation']==0])

In [19]:
withoutrain2012

189

In [20]:
# Calculate number of days with rain in year 2012

withrain2012 = np.sum([df2012['precipitation']!=0])

In [21]:
withrain2012

177

In [23]:
# Calculate number of days greater than 5mm rain in year 2012

greater5 = np.sum([df2012['precipitation']>5])

In [24]:
greater5

78

# Fancy Indexing

In [25]:
# Fancy indexing in single dimension

rand = np.random.RandomState(42)

x = rand.randint(100, size=10)

In [26]:
x

array([51, 92, 14, 71, 60, 20, 82, 86, 74, 74])

In [28]:
x[2], x[3], x[7]

(14, 71, 86)

In [29]:
ind = [2,7,3]

x[ind]

array([14, 86, 71])

In [31]:
# Creating a new array using x

ind = np.array([[3,7],
                [4,5]])

In [32]:
x[ind]

array([[71, 86],
       [60, 20]])

# Fancy indexing in multiple dimension

In [33]:
X = np.arange(12).reshape(3,4)

In [34]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [35]:
# Creating a new array by X with below index

row = np.array([0,1,2])
col = np.array([2,1,3])

In [36]:
X[row,col]

array([ 2,  5, 11])

In [38]:
# Create a new array that is a submatrix of x
X[row[:,np.newaxis], col]

array([[ 2,  1,  3],
       [ 6,  5,  7],
       [10,  9, 11]])

In [39]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

# Combined Indexing

In [40]:
# Combine fancy indices and simple indices

X[2, [2, 0, 2]]

array([10,  8, 10])

In [42]:
# Combine fancy indices with slicing

X[1:,[2,0,1]]

array([[ 6,  4,  5],
       [10,  8,  9]])

In [43]:
# Combine fancy indices with mask

mask = np.array([1,0,1,0], dtype= bool)

In [44]:
mask

array([ True, False,  True, False])

In [45]:
X[row[:,np.newaxis], mask]

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10]])

# Modify values with Fancy indexing

In [46]:
x = np.arange(10)

i = np.array([2,1,8,4])

In [47]:
print(x)

[0 1 2 3 4 5 6 7 8 9]


In [48]:
print(i)

[2 1 8 4]


In [49]:
x[i]

array([2, 1, 8, 4])

In [50]:
x[i] =99

In [51]:
x

array([ 0, 99, 99,  3, 99,  5,  6,  7, 99,  9])

In [52]:
# adding values

x[i]+=10

In [56]:
x[i]= x[i]+10

In [57]:
x

array([  0, 109, 109,   3, 109,   5,   6,   7, 109,   9])

In [58]:
# Subtracting value

x[i]-=10

In [55]:
x

array([ 0, 99, 99,  3, 99,  5,  6,  7, 99,  9])

In [60]:
x[[0,0]] = [4,6]

In [61]:
x

array([ 6, 99, 99,  3, 99,  5,  6,  7, 99,  9])

In [62]:
x = np.zeros(10)

In [63]:
x

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [64]:
i = [1,2,2,3,3,3,4,4,4,4]

In [65]:
x[i]+=1

In [66]:
x

array([0., 1., 1., 1., 1., 0., 0., 0., 0., 0.])

In [67]:
# Repeating the adding function

np.add.at(x,i,1)

In [68]:
x

array([0., 2., 3., 4., 5., 0., 0., 0., 0., 0.])

# Sorting Arrays

In [69]:
x = np.array([5,4,32,1])

In [70]:
np.sort(x) #NumPy built-in function np.sort()

array([ 1,  4,  5, 32])

In [72]:
x.sort() #Python buil-in function
print(x)

[ 1  4  5 32]


In [73]:
# To get the indices of sorted elements

i = np.argsort(x)

In [75]:
print("Sorted indices:", i)
print("Sorted elements:", x)

Sorted indices: [0 1 2 3]
Sorted elements: [ 1  4  5 32]


In [76]:
# Sorting of arrays along with rows and columns

rand = np.random.RandomState(42)
x = rand.randint(0,10,(4,6))

In [77]:
x

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [78]:
# Sort each element of column x

np.sort(x, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [79]:
# Sort each row of x

np.sort(x, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

# Partial sorting: Partitioning

In [80]:
# When we are interested to find the K smallest values in the array

x = np.array([99,88,77,66,55,44,33,22,11])

In [84]:
x

array([99, 88, 77, 66, 55, 44, 33, 22, 11])

In [82]:
# Give smallest values to the left of the partition, and 
# remaining values to the right in arbitrary order
np.partition(x,3)

array([22, 11, 33, 44, 55, 88, 66, 77, 99])

## Two dimensional array sorting

In [89]:
rand = np.random.RandomState(42)
x = rand.randint(0,10,(4,6))

In [91]:
x

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [94]:
np.partition(x,2, axis=1) # row wise sorting

array([[3, 4, 6, 7, 6, 9],
       [2, 3, 4, 7, 6, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 9, 5]])

In [92]:
# column wise sorting
np.partition(x,2, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

# Structured Data


In [96]:
# Create a compound dtype by specifying them as a list(square brackets)
# of tuples(parenthesis)

In [97]:
datatype = [('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]

In [98]:
datatype

[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]

In [99]:
# create values by specifying them as a list(square brackets)
# of tuples(parenthesis)

values = [('Rex', 9, 81.0), ('Fido', 3, 27.0), ('Bob', 45, 85.5)]

In [100]:
F = np.array(values, dtype= datatype)

In [102]:
print(F)

[('Rex',  9, 81. ) ('Fido',  3, 27. ) ('Bob', 45, 85.5)]


In [103]:
F['name']

array(['Rex', 'Fido', 'Bob'], dtype='<U10')