# Python Day 2

In [12]:
# List comprehension
l1 = [i**2 for i in range(5)]
l2 = [5, 9.4, 4.4, 8.7, 9.7]

print(l1)
print(l2)
print(l1 + l2)
print(3*l1)

In [20]:
l3 = l2*5

In [26]:
# calculate the mean
mean = 0
for element in l3:
    mean = mean + element

mean = mean / len(l3)
print(f'{mean:.5e}')

7.44000e+00


Numpy = Numerical Python

In [29]:
import numpy as np

The most important object in numpy is the numpy array, which is a lot like a list but with extra special features which make it more performant and practical to use.

In [33]:
a1 = np.array(l1)
a2 = np.array(l2)

In [35]:
print(a1)
print(a2)

[ 0  1  4  9 16]
[5.  9.4 4.4 8.7 9.7]


In [48]:
print(l1 + l2)
print(a1 + a2) # adding arrays does the addition element by element instead of concatenating

[0, 1, 4, 9, 16, 5, 9.4, 4.4, 8.7, 9.7]
[ 5.  10.4  8.4 17.7 25.7]


In [50]:
a1 + np.array([1, 2, 3])  # but this means that the arrays should have the same length

ValueError: operands could not be broadcast together with shapes (5,) (3,) 

In [52]:
lnew = []

for i in range(len(l1)):
    lnew.append(l1[i] + l2[i])

print(lnew)

[5, 10.4, 8.4, 17.7, 25.7]


In [54]:
a1 + a2

array([ 5. , 10.4,  8.4, 17.7, 25.7])

In [56]:
# this is vectorisation: the loop is implicit and you can do all operations element-wise
np.sin(a1)

array([ 0.        ,  0.84147098, -0.7568025 ,  0.41211849, -0.28790332])

In [60]:
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [62]:
np_matrix = np.array(matrix)

In [66]:
np_matrix

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [68]:
np_matrix.shape

(3, 3)

In [70]:
np.cos(np_matrix) + np_matrix**2 - 6

array([[-4.45969769, -2.41614684,  2.0100075 ],
       [ 9.34635638, 19.28366219, 30.96017029],
       [43.75390225, 57.85449997, 74.08886974]])

We can ask an array about certain of its properties

In [77]:
np_matrix.dtype

dtype('float64')

In [77]:
np.sin(matrix).dtype

dtype('float64')

In [79]:
np_matrix.shape

(3, 3)

In [85]:
np_matrix.ndim

2

In [89]:
print(np_matrix)
print(np_matrix.T)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 4 7]
 [2 5 8]
 [3 6 9]]


In [91]:
print(np_matrix.reshape(1, 9))

[[1 2 3 4 5 6 7 8 9]]


In [93]:
mat1 = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                 [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])

In [95]:
mat1

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])

In [101]:
print(mat1.reshape(4, 5))
print(mat1.reshape(5, 4))

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [17 18 19 20]]


# Three properties of Numpy arrays
## Vectorization

In [108]:
a = np.random.normal(0, 1, size = 10000)
print(a)

[ 0.69134007 -1.05370678 -0.18749959 ...  2.13920277  0.26902399
 -0.07458608]


In [None]:
# we want to get the list of 1/x

In [110]:
# method 1 : custom function
def explicit_loop(array):
    result = []
    for element in array:
        result.append(1/element)
    return np.array(result)

In [114]:
%timeit explicit_loop(a)

3.39 ms ± 117 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [120]:
# method 2 : list comprehension
%timeit a2 = np.array([1/x for x in a])

2.31 ms ± 493 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [122]:
# method 3 : numpy array vectorisation
%timeit a3 = 1/a

27 μs ± 2.47 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [130]:
max(1/a)

566.849659265584

In [132]:
np.argmax(1/a)

2547

## Broadcasting

In [155]:
vector3D = np.array(np.random.normal(5, 2, size = (3, 100000)))
vector3D

array([[7.14312319, 5.89656329, 6.70717189, ..., 6.29287278, 3.65116048,
        5.6672553 ],
       [6.05578042, 2.24316258, 2.32360283, ..., 5.43026786, 3.64868116,
        5.43547817],
       [9.17693622, 6.39195847, 4.21422374, ..., 8.24317679, 2.38855569,
        4.23479477]])

In [139]:
vector3D*0.5

array([[0.85549142, 3.58644956, 2.04928878, ..., 2.51140058, 1.70387665,
        4.34711028],
       [2.18310922, 2.95583294, 3.24973766, ..., 1.76146835, 2.58887221,
        2.45390601],
       [1.96061471, 3.01520252, 4.12182507, ..., 1.25988637, 1.52928198,
        2.49018322]])

In [180]:
vector3D.shape

(3, 100000)

In [186]:
np.array([1.5, 7.4, -0.6]).shape

(3,)

In [188]:
vector3D + np.array([[1.5], [7.4], [-0.6]])

array([[ 8.64312319,  7.39656329,  8.20717189, ...,  7.79287278,
         5.15116048,  7.1672553 ],
       [13.45578042,  9.64316258,  9.72360283, ..., 12.83026786,
        11.04868116, 12.83547817],
       [ 8.57693622,  5.79195847,  3.61422374, ...,  7.64317679,
         1.78855569,  3.63479477]])

In [190]:
vector3D + np.array([6, 8])

ValueError: operands could not be broadcast together with shapes (3,100000) (2,) 

In [194]:
vector3D + np.array([[1], [2], [3]])

array([[ 8.14312319,  6.89656329,  7.70717189, ...,  7.29287278,
         4.65116048,  6.6672553 ],
       [ 8.05578042,  4.24316258,  4.32360283, ...,  7.43026786,
         5.64868116,  7.43547817],
       [12.17693622,  9.39195847,  7.21422374, ..., 11.24317679,
         5.38855569,  7.23479477]])

## Fancy indexing

In [209]:
a = np.random.normal(4, 3, size = (100, 3))

In [211]:
a

array([[ 4.99085034e+00,  5.36874099e+00,  5.98392208e+00],
       [ 8.50658502e-01,  3.75257209e+00,  3.25602626e+00],
       [ 3.94846660e+00, -1.13444091e+00, -3.94878842e-01],
       [ 2.60829364e+00,  4.18383443e+00,  3.74578486e+00],
       [ 5.40402714e+00, -4.01026915e+00,  7.43046454e+00],
       [ 7.65472284e-01,  6.51283344e+00,  2.28595669e+00],
       [ 1.13416138e+01,  4.72665331e+00,  7.76670748e+00],
       [ 8.90437999e+00,  4.25408150e+00,  2.77724649e+00],
       [ 8.19137975e+00, -9.62780231e-01,  3.09805888e+00],
       [-1.80783239e+00,  4.98880557e+00,  4.92040839e+00],
       [ 1.72156651e-01,  8.62680670e-01,  2.67354688e+00],
       [ 2.96217787e+00,  3.10454961e+00,  3.81741722e+00],
       [ 6.86161643e+00,  2.40102510e+00,  3.89191375e+00],
       [ 4.30210545e+00,  4.29791255e+00,  5.94436363e+00],
       [ 1.90335579e+00,  2.96108617e+00,  6.29868376e+00],
       [ 6.62729927e+00,  6.00181875e+00,  4.68989613e+00],
       [ 3.14035570e-01, -7.32885771e-01

In [213]:
a[1]  # access a row

array([0.8506585 , 3.75257209, 3.25602626])

In [223]:
a[5,1]

6.512833440548625

In [225]:
l = [1, 2, 3, 8, 5, 8, 9]

In [227]:
l[4:7]

[5, 8, 9]

In [229]:
b = np.random.normal(0, 1, size = 10)

In [231]:
b

array([-0.27858357,  1.2880122 , -0.15590617, -0.24955726,  0.28358629,
       -2.02089566, -2.11758006, -0.54873616, -0.12613032,  0.42531999])

In [233]:
b[[1, 2, 3, 7]]

array([ 1.2880122 , -0.15590617, -0.24955726, -0.54873616])

In [237]:
b[[7, 3, 1, 2]]

array([-0.54873616, -0.24955726,  1.2880122 , -0.15590617])

### Masking

In [240]:
b

array([-0.27858357,  1.2880122 , -0.15590617, -0.24955726,  0.28358629,
       -2.02089566, -2.11758006, -0.54873616, -0.12613032,  0.42531999])

In [244]:
mask = b < 0

In [246]:
b[mask]

array([-0.27858357, -0.15590617, -0.24955726, -2.02089566, -2.11758006,
       -0.54873616, -0.12613032])

In [248]:
b[b < 0]

array([-0.27858357, -0.15590617, -0.24955726, -2.02089566, -2.11758006,
       -0.54873616, -0.12613032])

# Exercise

In [281]:
import pandas as pd
import numpy as np

In [275]:
pulse = pd.read_csv('lecture-python/data/pulse.csv', skiprows=14)
# pulse = np.loadtxt('lecture-python/data/pulse.csv', skiprows = 15, delimiter = ',')

In [277]:
pulse.head()

Unnamed: 0,TIME,CH1
0,-4e-08,-0.43775
1,-3.96e-08,-0.437547
2,-3.92e-08,-0.437109
3,-3.88e-08,-0.437016
4,-3.84e-08,-0.436938


In [342]:
# Calculate mean for 'TIME'

time_mean = 0

for element in pulse['TIME']:
    time_mean = time_mean + element

time_mean = time_mean / len(pulse['TIME'])

# Calculate mean for 'CH1'

ch1_mean = 0

for element in pulse['CH1']:
    ch1_mean = ch1_mean + element

ch1_mean = ch1_mean / len(pulse['CH1'])

6e-08
-0.2856137573353294


TIME    6.000000e-08
CH1    -2.856138e-01
dtype: float64

In [364]:
# Calculate standard deviation for 'TIME'

time_diff = 0

for element in pulse['TIME']:
    time_diff = time_diff + (element - time_mean)**2

time_std = np.sqrt(time_diff / len(pulse['TIME']))

5.785038173311103e-08


TIME    5.785038e-08
CH1     2.068782e-01
dtype: float64

In [368]:
# Calculate standard deviation for 'CH1'

ch1_diff = 0

for element in pulse['CH1']:
    ch1_diff = ch1_diff + (element - ch1_mean)**2

ch1_std = np.sqrt(ch1_diff / len(pulse['CH1']))

0.20687816110734641


TIME    5.785038e-08
CH1     2.068782e-01
dtype: float64

In [386]:
# Calculate standard deviations with vectorisation

time_std = np.sqrt((np.sum((pulse['TIME'] - np.mean(pulse['TIME']))**2)) / len(pulse['TIME']))
ch1_std = np.sqrt((np.sum((pulse['CH1'] - np.mean(pulse['CH1']))**2)) / len(pulse['CH1']))

print(time_std, ch1_std)

5.785038173311103e-08 0.20687816110734647


TIME    5.785038e-08
CH1     2.068782e-01
dtype: float64

In [388]:
# Calculate standard deviations using pandas
np.std(pulse, axis = 0)

TIME    5.785038e-08
CH1     2.068782e-01
dtype: float64

In [409]:
mean = 0
for i in range(len(pulse['TIME'])):
    mean = mean + pulse['TIME'][i]
mean = mean / len(pulse['TIME'])
print(mean)

6e-08


In [417]:
mean = 0
for t in pulse['TIME']:
    mean = mean + t / len(pulse['TIME'])
print(mean, np.mean(pulse['TIME']))

6.000000000000001e-08 6e-08


In [419]:
sum(pulse['TIME']) / len(pulse['TIME'])

6e-08

In [421]:
std = 0
for t in pulse['TIME']:
    std += (t - mean)**2 / len(pulse['TIME'])
std = np.sqrt(std)

print(std, pulse['TIME'].std())

5.785038173311104e-08 5.7908203218542355e-08


In [453]:
def std_dev(x):
    
    mean = sum(x) / len(x)
    
    for i in x:
        std += (i - mean)**2 / len(x)
        
    std = np.sqrt(std)
    return(std)