# Computation on numpy arrays: universal functions

In [1]:
import numpy as np

In [3]:
x = np.arange(4)
x

array([0, 1, 2, 3])

In [7]:
print(x + 5)
print(x - 5)
print(x * 5)
print(x / 5)

[5 6 7 8]
[-5 -4 -3 -2]
[ 0  5 10 15]
[0.  0.2 0.4 0.6]


In [9]:
print(x**2)
print(x%2) # mod

[0 1 4 9]
[0 1 0 1]


In [10]:
np.add(x, 2)

array([2, 3, 4, 5])

In [12]:
np.subtract(x, 2)

array([-2, -1,  0,  1])

In [13]:
np.negative(x)

array([ 0, -1, -2, -3])

In [14]:
np.multiply(x, 3)

array([0, 3, 6, 9])

In [16]:
np.divide(x, 0.5)

array([0., 2., 4., 6.])

In [17]:
np.floor_divide(x, 2)

# 0/2=0   -> 0
# 1/2=0.5 -> 0
# 2/2=1   -> 1
# 3/2=1.5 -> 1

array([0, 0, 1, 1], dtype=int32)

In [18]:
np.mod(x, 2)

array([0, 1, 0, 1], dtype=int32)

In [19]:
x = np.array([2, -1, 0, 1, 2])
abs(x)

array([2, 1, 0, 1, 2])

In [21]:
# they are the same
print(np.abs(x))
print(np.absolute(x))

[2 1 0 1 2]
[2 1 0 1 2]


# Aggregations: min, max and other

In [6]:
# produce an array of 100 random numbers between 0 and 1
L = np.random.random(100)
type(L)

numpy.ndarray

In [8]:
sum(L)

52.463544585484016

In [7]:
np.sum(L)

52.46354458548402

In [24]:
%timeit sum(L)
%timeit np.sum(L) # numpy is faster

15.5 µs ± 599 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
5.94 µs ± 292 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [12]:
big_array = np.random.random(1000000)

In [27]:
# now the time difference is quite obvious
%timeit sum(big_array)
%timeit np.sum(big_array)

122 ms ± 11.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.35 ms ± 25.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [9]:
# python in-built min and max

print(min(L))
print(max(L))

0.005359131605988332
0.996201701919808


In [10]:
# numpy in-built min and max

print(np.min(L))
print(np.max(L))

0.005359131605988332
0.996201701919808


In [13]:
print(big_array.min(), big_array.max(), big_array.sum())

1.4254474171027098e-06 0.9999970770903721 499556.37221217394


# Multi-dimensional aggregate
aggregation against rows and columns

In [5]:
m = np.random.random((3, 4))
m

array([[0.04956284, 0.55509981, 0.47327781, 0.42612472],
       [0.59458573, 0.05878395, 0.08771825, 0.09188336],
       [0.6547474 , 0.84888779, 0.53722206, 0.38123456]])

In [8]:
sum(m) # sum of each columns

array([1.29889597, 1.46277155, 1.09821812, 0.89924264])

In [9]:
m.sum() # sum of all the values in the array, default

4.759128280672045

In [10]:
m.min(axis=0) # the smallest number of each columns

array([0.04956284, 0.05878395, 0.08771825, 0.09188336])

In [11]:
m.max(axis=1) # the biggest number of each rows

array([0.55509981, 0.59458573, 0.84888779])

# Broadcasting

In [15]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

print(a + b)
print(a + 3)

[5 7 9]
[4 5 6]


In [16]:
m = np.ones((3, 3))
print(m.shape)
print(a.shape)

(3, 3)
(3,)


In [17]:
m + a

#[[1., 1., 1.]    [[1, 2, 3]
# [1., 1., 1.]  +  [1, 2, 3]
# [1., 1., 1.]]    [1, 2, 3]]

array([[2., 3., 4.],
       [2., 3., 4.],
       [2., 3., 4.]])

In [23]:
a = np.arange(3)
b = np.arange(3)[:, np.newaxis] # convert it to a 3x1 matrix
c = np.arange(3).reshape(3, 1) # same as b

In [27]:
print(a, '\n')
print(b, '\n')
print(c, '\n')
print(a.shape)
print(b.shape)
print(c.shape)

[0 1 2] 

[[0]
 [1]
 [2]] 

[[0]
 [1]
 [2]] 

(3,)
(3, 1)
(3, 1)


In [50]:
a + b

# how does it add up?
# a -> [0, 1, 2]  duplicate the row
#      [0, 1, 2]
#      [0, 1, 2]
#
# b -> [0,[0,[0, duplicate the column
#       1, 1, 1,
#       2] 2] 2]
# then we can combine the same size arrays.

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [58]:
m = np.ones((3, 2))
a = np.arange(3)
print(m, a, '\n')
print(m.shape)
print(a.shape)

[[1. 1.]
 [1. 1.]
 [1. 1.]] [0 1 2] 

(3, 2)
(3,)


In [56]:
m + a

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

In [61]:
print(a[:, np.newaxis])
a[:, np.newaxis].shape

[[0]
 [1]
 [2]]


(3, 1)

In [60]:
m + a[:, np.newaxis]

array([[1., 1.],
       [2., 2.],
       [3., 3.]])

# Comparisions, masks and boolean logic

In [62]:
x = np.array([1, 2, 3, 4, 5])
x

array([1, 2, 3, 4, 5])

In [66]:
# they are the same thing
print(x < 3)
print(np.less(x, 3))

[ True  True False False False]
[ True  True False False False]


In [68]:
print(x >= 3)
print(np.greater_equal(x, 3))

[False False  True  True  True]
[False False  True  True  True]


In [28]:
# two dimentional array

rng = np.random.RandomState(0) # a fixed seed? 
x = rng.randint(10, size=(3, 4)) # It is originally random.randint, we change it to our seed
x

array([[5, 0, 3, 3],
       [7, 9, 3, 5],
       [2, 4, 7, 6]])

In [76]:
x < 6

array([[ True,  True,  True,  True],
       [False, False,  True,  True],
       [ True,  True, False, False]])

In [77]:
# they are same
print(np.count_nonzero(x<6)) # nonzero == True
print(np.sum(x<6)) # True == 1

8
8


In [78]:
print(np.sum(x<6, axis=1)) # values that are less than 6 in each row 
print(np.sum(x<6, axis=0)) # values that are less than 6 in each column

[4 2 2]
[2 2 2 2]


In [33]:
print(np.any(x>8)) # if there is one element greater than 8 in the array, it returns true
print(np.any(x<0))

True
False


In [32]:
print(np.any(x>8, axis=0)) #  if there is a element greater than 8 in each column

[False  True False False]


In [83]:
np.all(x<8, axis=1) # rowwise == 每個 column 都要取到

array([ True, False,  True])

In [84]:
np.all(x<8, axis=0) # columnwise == 每個 row 都要取到

array([ True, False,  True,  True])

# Example: Seattle rainfall

In [35]:
import pandas as pd

In [36]:
df = pd.read_csv(r"../data/seattle-Weather.csv", encoding='utf8', engine='python')
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012/1/1,0.0,12.8,5.0,4.7,drizzle
1,2012/1/2,10.9,10.6,2.8,4.5,rain
2,2012/1/3,0.8,11.7,7.2,2.3,rain
3,2012/1/4,20.3,12.2,5.6,4.7,rain
4,2012/1/5,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,2015/12/27,8.6,4.4,1.7,2.9,fog
1457,2015/12/28,1.5,5.0,1.7,1.3,fog
1458,2015/12/29,0.0,7.2,0.6,2.6,fog
1459,2015/12/30,0.0,5.6,-1.0,3.4,sun


In [91]:
df['precipitation']

0        0.0
1       10.9
2        0.8
3       20.3
4        1.3
        ... 
1456     8.6
1457     1.5
1458     0.0
1459     0.0
1460     0.0
Name: precipitation, Length: 1461, dtype: float64

In [92]:
# days without rain (precipitation = 0)
np.sum(df['precipitation'] == 0)

838

In [93]:
# days with rain
np.sum(df['precipitation'] != 0)

623

# Boolean operator
- & bitwise_and           
- | bitwise_or           
- ~ bitwise_not

In [95]:
# all rainy days less than 15mm but greater than 5mm
np.sum((df['precipitation'] < 15) & (df['precipitation'] > 5))

173

In [101]:
print(x, '\n')
print(x < 5, '\n')
print(x[x < 5]) # put the boolean mask on the data to filter the one we want

[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]] 

[[False  True  True  True]
 [False False  True False]
 [ True  True False False]] 

[0 3 3 3 2 4]


In [102]:
rain = df['precipitation'] > 0 # a mask
rain

0       False
1        True
2        True
3        True
4        True
        ...  
1456     True
1457     True
1458    False
1459    False
1460    False
Name: precipitation, Length: 1461, dtype: bool

In [103]:
# put the mask on the data
df['precipitation'][rain]

1       10.9
2        0.8
3       20.3
4        1.3
5        2.5
        ... 
1452     6.1
1453     2.5
1454     5.8
1456     8.6
1457     1.5
Name: precipitation, Length: 623, dtype: float64

In [41]:
# DatetimeIndex 會把日期的年月日分開，後面加 .year 是取年份
df['year'] = pd.DatetimeIndex(df['date']).year
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,year
0,2012/1/1,0.0,12.8,5.0,4.7,drizzle,2012
1,2012/1/2,10.9,10.6,2.8,4.5,rain,2012
2,2012/1/3,0.8,11.7,7.2,2.3,rain,2012
3,2012/1/4,20.3,12.2,5.6,4.7,rain,2012
4,2012/1/5,1.3,8.9,2.8,6.1,rain,2012
...,...,...,...,...,...,...,...
1456,2015/12/27,8.6,4.4,1.7,2.9,fog,2015
1457,2015/12/28,1.5,5.0,1.7,1.3,fog,2015
1458,2015/12/29,0.0,7.2,0.6,2.6,fog,2015
1459,2015/12/30,0.0,5.6,-1.0,3.4,sun,2015


In [108]:
df2012 = df[df['year'] == 2012]
df2012

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,year
0,2012/1/1,0.0,12.8,5.0,4.7,drizzle,2012
1,2012/1/2,10.9,10.6,2.8,4.5,rain,2012
2,2012/1/3,0.8,11.7,7.2,2.3,rain,2012
3,2012/1/4,20.3,12.2,5.6,4.7,rain,2012
4,2012/1/5,1.3,8.9,2.8,6.1,rain,2012
...,...,...,...,...,...,...,...
361,2012/12/27,4.1,7.8,3.3,3.2,rain,2012
362,2012/12/28,0.0,8.3,3.9,1.7,rain,2012
363,2012/12/29,1.5,5.0,3.3,1.7,rain,2012
364,2012/12/30,0.0,4.4,0.0,1.8,drizzle,2012
