7.1 NumBy (Numerical Python) is the preferred Python array implementation. It offers a high-performance, richly functional n-dimentional array type called ndarray. 

7.2 

In [2]:
import numpy as np

In [3]:
numbers = np.array([2, 3, 5, 6, 11])

In [4]:
type(numbers)

numpy.ndarray

In [5]:
numbers

array([ 2,  3,  5,  6, 11])

In [6]:
np.array([[1, 2, 4], [4, 5, 6]])

array([[1, 2, 4],
       [4, 5, 6]])

Function array creates an array from an array or other collection of elements. 

In [7]:
import numpy as np

In [8]:
np.array([x for x in range(2, 21, 2)])

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [9]:
np.array([x for x in range(2, 50, 3)])

array([ 2,  5,  8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47])

In [10]:
np.array([[2, 4, 6, 8, 10], [1, 3, 5, 7, 9]])

array([[ 2,  4,  6,  8, 10],
       [ 1,  3,  5,  7,  9]])

7.3 An array object provides attributes that enable you to discover information about its struction and contents. 

In [11]:
import numpy as np

In [13]:
integers = np.array([[1, 2, 3], [4, 5, 6]])

In [14]:
integers

array([[1, 2, 3],
       [4, 5, 6]])

In [15]:
floats = np.array([0.0, 0.1, 0.2, 0.3, 0.4])

In [16]:
floats

array([0. , 0.1, 0.2, 0.3, 0.4])

In [17]:
integers.dtype

dtype('int64')

In [18]:
floats.dtype

dtype('float64')

In [19]:
integers.ndim

2

In [20]:
floats.ndim

1

In [21]:
integers.shape

(2, 3)

In [22]:
floats.shape

(5,)

In [23]:
integers.size

6

In [25]:
integers.itemsize

8

In [26]:
floats.size

5

In [27]:
floats.itemsize

8

In [28]:
for row in integers:
    for column in row:
        print(column, end='  ')
    print()

1  2  3  
4  5  6  


In [29]:
for i in integers.flat:
    print(i, end='  ')

1  2  3  4  5  6  

By default, NumPy does not display trailing 0s in the fractional part of a floating-point value

In [30]:
import numpy as np 

In [31]:
a = np.array([[2, 4, 6, 8, 10], [1, 3, 5, 7, 9]])

In [32]:
a.ndim

2

In [33]:
a.shape

(2, 5)

7.4 NumPy provide functions zeros, one and full for creating arrays containing 0s, 1s, or a specified value, respectively. By default, zero and ones create arrays containing float64 values. 

In [34]:
import numpy as np

In [35]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [36]:
np.ones((2, 4), dtype=int)

array([[1, 1, 1, 1],
       [1, 1, 1, 1]])

In [37]:
np.full((3, 5), 13)

array([[13, 13, 13, 13, 13],
       [13, 13, 13, 13, 13],
       [13, 13, 13, 13, 13]])

7.5 NumPy's arange function creates integers ranges similar to using built-in function range. 

In [38]:
import numpy as np

In [39]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [40]:
np.arange(5, 10)

array([5, 6, 7, 8, 9])

In [41]:
np.arange(10, 1, -2)

array([10,  8,  6,  4,  2])

In [42]:
np.linspace(0.0, 1.0, num=5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [43]:
np.arange(1, 21).reshape(4, 5)

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20]])

In [45]:
np.arange(1, 21)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20])

In [46]:
np.arange(1, 100001).reshape(4, 25000)

array([[     1,      2,      3, ...,  24998,  24999,  25000],
       [ 25001,  25002,  25003, ...,  49998,  49999,  50000],
       [ 50001,  50002,  50003, ...,  74998,  74999,  75000],
       [ 75001,  75002,  75003, ...,  99998,  99999, 100000]])

In [47]:
np.arange(1, 100001).reshape(100, 1000)

array([[     1,      2,      3, ...,    998,    999,   1000],
       [  1001,   1002,   1003, ...,   1998,   1999,   2000],
       [  2001,   2002,   2003, ...,   2998,   2999,   3000],
       ...,
       [ 97001,  97002,  97003, ...,  97998,  97999,  98000],
       [ 98001,  98002,  98003, ...,  98998,  98999,  99000],
       [ 99001,  99002,  99003, ...,  99998,  99999, 100000]])

NumPy function linspace retruns an ndarray containing evenly spaced floating point values. 

In [48]:
import numpy as np

In [49]:
np.arange(2, 41, 2).reshape(4, 5)

array([[ 2,  4,  6,  8, 10],
       [12, 14, 16, 18, 20],
       [22, 24, 26, 28, 30],
       [32, 34, 36, 38, 40]])

7.6 Most array operations execute significantly faster than corresponding list operations. IPython's %timeit magic command which times the average duration of operations. 

In [51]:
import random 

In [52]:
%timeit rolls_list = \[random.randrange(1, 7) for i in range(0, 6_000_000)]

2.92 s ± 15.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [53]:
import numpy as np

In [54]:
%timeit rolls_array = np.random.randint(1, 7, 6_000_000)

67 ms ± 976 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [55]:
%timeit rolls_array = np.random.randint(1, 7, 60_000_000)

754 ms ± 7.23 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [56]:
%timeit rolls_array = np.random.randint(1, 7, 600_000_000)

7.59 s ± 85.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [57]:
import numpy as np

In [58]:
%timeit sum([x for x in range(10_000_000)])

348 ms ± 14.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [60]:
%timeit np.arange(10_000_000).sum()

5.64 ms ± 32.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


NumPy provides many operators which enable you to write simple expressions that perform operations on entire arrays. Below is a demonstration of arithmetic between arrays and numeric values and between arrays of the same shape. 

In [61]:
import numpy as np 

In [62]:
numbers = np.arange(1, 6)

In [63]:
numbers

array([1, 2, 3, 4, 5])

In [64]:
numbers * 2 

array([ 2,  4,  6,  8, 10])

In [65]:
numbers ** 3

array([  1,   8,  27,  64, 125])

In [66]:
numbers

array([1, 2, 3, 4, 5])

In [67]:
numbers += 10

In [68]:
numbers

array([11, 12, 13, 14, 15])

In [70]:
numbers2 = np.linspace(1.1, 5.5, 5)

In [71]:
numbers2

array([1.1, 2.2, 3.3, 4.4, 5.5])

In [72]:
numbers * numbers2

array([12.1, 26.4, 42.9, 61.6, 82.5])

In [73]:
numbers 

array([11, 12, 13, 14, 15])

In [74]:
numbers >= 13

array([False, False,  True,  True,  True])

In [75]:
numbers2

array([1.1, 2.2, 3.3, 4.4, 5.5])

In [76]:
numbers2 < numbers

array([ True,  True,  True,  True,  True])

In [77]:
numbers == numbers2

array([False, False, False, False, False])

In [78]:
numbers == numbers

array([ True,  True,  True,  True,  True])

When one of the operands of an array operator is a scalar, NumPy uses broadcasting to perform the calculatiion as if the scalar were an array of the same shape as the other operand, but containing the scalar value in all its elements. 

In [79]:
import numpy as np

In [80]:
np.arange(1, 6) ** 2

array([ 1,  4,  9, 16, 25])

You can calculate on each dimension of an array

In [81]:
import numpy as np

In [82]:
grades = np.array([[87, 96, 70], [100, 87, 90], 
                   [94, 77, 90], [100, 81, 82]])

In [84]:
grades

array([[ 87,  96,  70],
       [100,  87,  90],
       [ 94,  77,  90],
       [100,  81,  82]])

In [85]:
grades.sum()

1054

In [86]:
grades.min()

70

In [87]:
grades.max()

100

In [88]:
grades.mean()

87.83333333333333

In [89]:
grades.std()

8.792357792739987

In [90]:
grades.var()

77.30555555555556

In [91]:
grades.mean(axis=0)

array([95.25, 85.25, 83.  ])

In [92]:
grades.mean(axis=1)

array([84.33333333, 92.33333333, 87.        , 87.66666667])

NumPy functions var and std calculate variance and deviation respectively. 

In [93]:
import numpy as np

In [94]:
grades = np.random.randint(60, 101, 12).reshape(3, 4)

In [95]:
grades

array([[ 63,  98, 100,  73],
       [ 63,  89,  67,  86],
       [ 70,  96,  98,  92]])

In [96]:
grades.mean()

82.91666666666667

In [97]:
grades.mean(axis=0)

array([65.33333333, 94.33333333, 88.33333333, 83.66666667])

In [98]:
grades.mean(axis=1)

array([83.5 , 76.25, 89.  ])

7.9 NumPy offers dozens of standalone universal functions (ufuncs) that perform various elements-wise operations, such as the sqrt unversal function that calculates the sqaure root of its values. 

In [99]:
import numpy as np

In [108]:
numbers = np.array([1, 4, 9, 16, 25, 36])

In [109]:
np.sqrt(numbers)

array([1., 2., 3., 4., 5., 6.])

In [102]:
numbers2 = np.arange(1, 7) * 10

In [103]:
numbers2

array([10, 20, 30, 40, 50, 60])

In [106]:
numbers

array([ 1,  4,  9, 25, 36])

In [110]:
np.add(numbers, numbers2)

array([11, 24, 39, 56, 75, 96])

In [111]:
np.multiply(numbers2, 5)

array([ 50, 100, 150, 200, 250, 300])

In [113]:
numbers3 = numbers2.reshape(2, 3)

In [114]:
numbers3

array([[10, 20, 30],
       [40, 50, 60]])

In [115]:
numbers4 = np.array([2, 4, 6])

In [116]:
np.multiply(numbers3, numbers4)

array([[ 20,  80, 180],
       [ 80, 200, 360]])

7.10 One-dimentional arrays can be indexed and sliced using the same syntax and techniques as lists and tuples

In [117]:
import numpy as np

In [118]:
grades = np.array([[87, 96, 70], [100, 87, 90], 
                   [94, 77, 90], [100, 81, 82]])

In [119]:
grades

array([[ 87,  96,  70],
       [100,  87,  90],
       [ 94,  77,  90],
       [100,  81,  82]])

In [120]:
grades[0, 1]

96

In [121]:
grades[0, 0]

87

In [122]:
grades[1]

array([100,  87,  90])

In [123]:
grades[0:2]

array([[ 87,  96,  70],
       [100,  87,  90]])

In [124]:
grades[[1, 3]]

array([[100,  87,  90],
       [100,  81,  82]])

In [125]:
grades[:, 0]

array([ 87, 100,  94, 100])

In [126]:
grades[:, 1:3]

array([[96, 70],
       [87, 90],
       [77, 90],
       [81, 82]])

In [127]:
grades[:, 1:2]

array([[96],
       [87],
       [77],
       [81]])

In [128]:
grades[:, 1:1]

array([], shape=(4, 0), dtype=int64)

In [129]:
grades[:, 2:3]

array([[70],
       [90],
       [90],
       [82]])

In [130]:
grades[:, 1:4]

array([[96, 70],
       [87, 90],
       [77, 90],
       [81, 82]])

In [131]:
grades[:, [0, 2]]

array([[ 87,  70],
       [100,  90],
       [ 94,  90],
       [100,  82]])

In [132]:
import numpy as np

In [133]:
a = np.arange(1, 16).reshape(3, 5)

In [134]:
a

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [135]:
a[1]

array([ 6,  7,  8,  9, 10])

In [136]:
a[[0, 2]]

array([[ 1,  2,  3,  4,  5],
       [11, 12, 13, 14, 15]])

In [137]:
a[:, 1:4]

array([[ 2,  3,  4],
       [ 7,  8,  9],
       [12, 13, 14]])

7.11 View are also known as shallow copies. THe array method view returns a new array object with a view of the original object's data. 

In [138]:
import numpy as np

In [139]:
numbers = np.arange(1, 6)

In [140]:
numbers

array([1, 2, 3, 4, 5])

In [141]:
numbers2 = numbers.view()

In [142]:
numbers2

array([1, 2, 3, 4, 5])

In [143]:
id(numbers)

140654408696528

In [144]:
id(numbers2)

140654408697584

In [145]:
numbers[1] *= 10

In [146]:
numbers2

array([ 1, 20,  3,  4,  5])

In [147]:
numbers

array([ 1, 20,  3,  4,  5])

In [148]:
numbers2[1] /= 10

In [149]:
numbers

array([1, 2, 3, 4, 5])

In [150]:
numbers2

array([1, 2, 3, 4, 5])

In [151]:
numbers2 = numbers[0:3]

In [152]:
numbers2

array([1, 2, 3])

In [153]:
id(numbers)

140654408696528

In [154]:
id(numbers2)

140654408696048

In [155]:
numbers2[3]

IndexError: index 3 is out of bounds for axis 0 with size 3

In [156]:
numbers[1] *= 20

In [157]:
numbers

array([ 1, 40,  3,  4,  5])

In [158]:
numbers2

array([ 1, 40,  3])

A view is also known as a shallow copy

7.12 Deep copies are independent copies of the original data -  important when sharing mutable values. This is especially important with multi-core programming, where seperate parts of your program could attempt to modify your data at the same time, possibly corrupting it. The array method copy returns a new array object with a deep copy of the original array object's data. 

In [159]:
import numpy as np

In [160]:
numbers = np.arange(1, 6)

In [161]:
numbers

array([1, 2, 3, 4, 5])

In [162]:
numbers2 = numbers.copy()

In [163]:
numbers2

array([1, 2, 3, 4, 5])

In [164]:
numbers[1] *= 10

In [165]:
numbers

array([ 1, 20,  3,  4,  5])

In [166]:
numbers2

array([1, 2, 3, 4, 5])

The array method copy produces a deep copy of the original array

Module copy provides fuction deepcopy which returns a deep copy of its argument

In [167]:
import numpy as np

In [168]:
grades = np.array([[87, 96, 79], [100, 87, 90]])

In [169]:
grades

array([[ 87,  96,  79],
       [100,  87,  90]])

In [170]:
grades.reshape(1, 6)

array([[ 87,  96,  79, 100,  87,  90]])

In [171]:
grades

array([[ 87,  96,  79],
       [100,  87,  90]])

In [172]:
grades.resize(1, 6)

In [173]:
grades

array([[ 87,  96,  79, 100,  87,  90]])

In [174]:
grades = np.array([[87, 96, 79], [100, 87, 90]])

In [175]:
grades

array([[ 87,  96,  79],
       [100,  87,  90]])

In [176]:
flattened = grades.flatten()

In [177]:
flattened

array([ 87,  96,  79, 100,  87,  90])

In [178]:
grades

array([[ 87,  96,  79],
       [100,  87,  90]])

In [179]:
flattened[0] = 100

In [180]:
flattened

array([100,  96,  79, 100,  87,  90])

In [181]:
grades

array([[ 87,  96,  79],
       [100,  87,  90]])

In [182]:
raveled = grades.ravel()

In [183]:
raveled

array([ 87,  96,  79, 100,  87,  90])

In [184]:
grades

array([[ 87,  96,  79],
       [100,  87,  90]])

In [185]:
raveled[0] = 100

In [186]:
raveled

array([100,  96,  79, 100,  87,  90])

In [187]:
grades

array([[100,  96,  79],
       [100,  87,  90]])

In [188]:
grades.T

array([[100, 100],
       [ 96,  87],
       [ 79,  90]])

In [189]:
grades

array([[100,  96,  79],
       [100,  87,  90]])

In [190]:
grades2 = np.array([[94, 77, 90], [100, 81, 82]])

In [191]:
np.hstack((grades, grades2))

array([[100,  96,  79,  94,  77,  90],
       [100,  87,  90, 100,  81,  82]])

In [192]:
np.vstack((grades, grades2))

array([[100,  96,  79],
       [100,  87,  90],
       [ 94,  77,  90],
       [100,  81,  82]])

In [193]:
import numpy as np 

In [194]:
a = np.arange(1, 7).reshape(2, 3)

In [195]:
a = np.hstack((a, a))

In [196]:
a = np.vstack((a, a))

In [197]:
a

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6],
       [1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

pandas Series is an enhaced one-dimensional array. Arrays unly use zero-based integer indices, while Series support custom indexing, including even non-integer indicies like strings. 

In [198]:
import pandas as pd

In [200]:
grades = pd.Series([87, 100, 94])

In [201]:
grades

0     87
1    100
2     94
dtype: int64

In [203]:
pd.Series(98.6, range(3))

0    98.6
1    98.6
2    98.6
dtype: float64

In [204]:
pd.Series(98.6, range(10))

0    98.6
1    98.6
2    98.6
3    98.6
4    98.6
5    98.6
6    98.6
7    98.6
8    98.6
9    98.6
dtype: float64

In [205]:
grades[0]

87

In [206]:
grades.count()

3

In [207]:
grades.mean()

93.66666666666667

In [208]:
grades.min()

87

In [209]:
grades.max()

100

In [210]:
grades.std()

6.506407098647712

In [211]:
grades.describe()

count      3.000000
mean      93.666667
std        6.506407
min       87.000000
25%       90.500000
50%       94.000000
75%       97.000000
max      100.000000
dtype: float64

In [212]:
grades = pd.Series([87, 100, 94], index=['Wally', 'Eva', 'Sam'])

In [213]:
grades

Wally     87
Eva      100
Sam       94
dtype: int64

In [214]:
grades = pd.Series({'Wally' : 87, 'Eva' : 100, 'Sam' : 94})

In [215]:
grades

Wally     87
Eva      100
Sam       94
dtype: int64

In [216]:
grades['Eva']

100

In [217]:
grades.Wally

87

In [218]:
grades.dtype

dtype('int64')

In [219]:
grades.values

array([ 87, 100,  94])

In [220]:
hardware = pd.Series(['Hammer', 'Saw', 'Wrench'])

In [221]:
hardware

0    Hammer
1       Saw
2    Wrench
dtype: object

In [222]:
hardware.str.contains('a')

0     True
1     True
2    False
dtype: bool

In [223]:
hardware.str.upper()

0    HAMMER
1       SAW
2    WRENCH
dtype: object

In [224]:
import numpy as np

In [225]:
import pandas as pd

In [232]:
temps = np.random.randint(60, 101, 6)

In [233]:
temperatures = pd.Series(temps)

In [234]:
temperatures

0    74
1    75
2    90
3    87
4    70
5    71
dtype: int64

In [236]:
temperatures.min()

70

In [237]:
temperatures.max()

90

In [238]:
temperatures.mean()

77.83333333333333

In [239]:
temperatures.describe()

count     6.000000
mean     77.833333
std       8.518607
min      70.000000
25%      71.750000
50%      74.500000
75%      84.000000
max      90.000000
dtype: float64

7.14.2 Dataframes 

In [240]:
import pandas as pd

In [241]:
temps = {'Mon': [68, 89], 'Tue': [71, 93], 'Wed': [66, 82], 'Thu': [75, 97], 'Fri': [62, 79]}

In [242]:
temperatures = pd.DataFrame(temps, index=['Low', 'High'])

In [243]:
temperatures

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Low,68,71,66,75,62
High,89,93,82,97,79


In [244]:
temperatures.loc[:, 'Mon':'Wed']

Unnamed: 0,Mon,Tue,Wed
Low,68,71,66
High,89,93,82


In [246]:
temperatures.loc['Low']

Mon    68
Tue    71
Wed    66
Thu    75
Fri    62
Name: Low, dtype: int64

In [247]:
pd.set_option("display.precision", 2)

In [249]:
temperatures.mean()

Mon    78.5
Tue    82.0
Wed    74.0
Thu    86.0
Fri    70.5
dtype: float64

In [250]:
temperatures.mean(axis=1)

Low     68.4
High    88.0
dtype: float64