In [58]:
import numpy as np
import sys

## A basic exploration of numpy, following the FreeCodeCamp tutuorial for Data Analysis
[issue]https://www.freecodecamp.org/learn/data-analysis-with-python

How many bits to store a number up to 120?
Critically important for processing data, less so for storage (but still has an impact).

In [3]:
2**7

128

Numpy will allow us to explicitly select number of bits an integer requires. Python is very high level & object orientated, so even a simple integer will take up 20 bytes (20*8bits)

In [5]:
x=5

In [8]:
np.int8

numpy.int8

We can select 8,16,32,64 with numpy.
List, arrays etc. in python are not built for efficient computing. Python won't place array numbers for instances in none-contiguous positions. Additionally, cpus may not be strong at array computations.
With numpy taking control, numbers will be now int8 long, stored together in the memory, and very fast computationally.
The larger the dataset, the more important numpy becomes.

In [11]:
A = np.array([
    ['a', 'b', 'c'],
    ['d', 'e', 'f'],
    ['g', 'h', 'i']
])

print(A[:,:2])

[['a' 'b']
 ['d' 'e']
 ['g' 'h']]


In [22]:
print(A[(1,1)])

e


In [24]:
print(A[[1,0]])

[['d' 'e' 'f']
 ['a' 'b' 'c']]


In [25]:
A.dtype

dtype('<U1')

Numpy is usually used for numeric processing. We can create multi-dimensional arrays efficiently.  
These martrices will be processed -very- efficiently compared to python

In [26]:
B = np.array([
    [
        [12,11,10],
        [9,8,7]
    ],
    [
        [6,5,4],
        [3,2,1]
    ]
])

In [27]:
B

array([[[12, 11, 10],
        [ 9,  8,  7]],

       [[ 6,  5,  4],
        [ 3,  2,  1]]])

In [28]:
B.shape

(2, 2, 3)

In [30]:
B.ndim

3

In [31]:
B.size

12

Dimensions must match. If incorrect, objects will be made instead.  
### Splicing Matricies

In [36]:
A[1]

array(['d', 'e', 'f'], dtype='<U1')

Displays first row of a 2D Array.

In [37]:
A[1][0]

'd'

Explicitly select first element of 2nd row.

In [39]:
A[1,0]

'd'

Multidemsional selection /w numpy.  
Sytax selects dim1, dim2, dim3, dim4...

In [40]:
B[0,0,0]

12

Allows slicing

In [43]:
B[0:1]

array([[[12, 11, 10],
        [ 9,  8,  7]]])

In [46]:
B[:,0:1]

array([[[12, 11, 10]],

       [[ 6,  5,  4]]])

In [52]:
B[:,:,2:]

array([[[10],
        [ 7]],

       [[ 4],
        [ 1]]])

Last example selects from each matrix, from each array, the 3rd digit and beyond.  
Additionally we can modify arrays utilizing a similar syntax.

In [61]:
A[2] = np.array(['j','k','l'])

In [60]:
A

array([['a', 'b', 'c'],
       ['d', 'e', 'f'],
       ['j', 'k', 'l']], dtype='<U1')

In [62]:
A[2] = 'z'

In [63]:
A

array([['a', 'b', 'c'],
       ['d', 'e', 'f'],
       ['z', 'z', 'z']], dtype='<U1')

In [64]:
B.sum()

78

In [65]:
B.mean()

6.5

In [66]:
B.std()

3.452052529534663

In [67]:
B.var()

11.916666666666666

In [70]:
B

array([[[12, 11, 10],
        [ 9,  8,  7]],

       [[ 6,  5,  4],
        [ 3,  2,  1]]])

In [72]:
B.sum(axis=0)

array([[18, 16, 14],
       [12, 10,  8]])

With this 3d array, adds first element in each 2d array together.  
i.e. 12+6=18, 9+3=12

In [74]:
B.sum(axis=1)

array([[21, 19, 17],
       [ 9,  7,  5]])

In [75]:
B.sum(axis=2)

array([[33, 24],
       [15,  6]])

All of the stats commands also accept axes as inputs

### Numpy Operations

In [4]:
C = np.arange(4)

In [5]:
C

array([0, 1, 2, 3])

In [6]:
C+10

array([10, 11, 12, 13])

In [7]:
C*10

array([ 0, 10, 20, 30])

In [9]:
C+=100

In [10]:
C

array([200, 201, 202, 203])

In [11]:
L = [0,1,2,3]

In [14]:
[i * 10 for i in L]

[0, 10, 20, 30]

In [16]:
C=np.arange(4)

In [17]:
C

array([0, 1, 2, 3])

In [18]:
D=np.array([10,10,10,10])

In [19]:
C+D

array([10, 11, 12, 13])

In [20]:
C*D

array([ 0, 10, 20, 30])

Numpy is an immutable first library, i.e. C|D will not update unless set to a new value.

### Boolean Arrays

In [21]:
E = np.arange(4)

In [23]:
E

array([0, 1, 2, 3])

In [24]:
E[[0,-1]]

array([0, 3])

In [25]:
E[[1,-2]]

array([1, 2])

In [26]:
E[[True,False,False,True]]

array([0, 3])

These arrays are the result of broadcasting boolean operations

In [27]:
E >= 2

array([False, False,  True,  True])

In [28]:
E[E>=2]

array([2, 3])

Above: Use this boolean array to construct a list of values in an array that match the statement

In [30]:
E.mean()

1.5

In [31]:
E[E>E.mean()]

array([2, 3])

In [32]:
E[~(E>E.mean())]

array([0, 1])

~: NOT

In [34]:
E[(E ==0) | (E==1)]

array([0, 1])

|: OR

In [38]:
E[(E <= 2)&(E%2==0)]

array([0, 2])

&: AND  
%: REMAINDER of x/y

In [40]:
F = np.random.randint(100,size=(3,3))

In [41]:
F

array([[36, 48, 86],
       [17, 27,  7],
       [35, 82, 53]])

In [42]:
F[np.array([
    [True,False,True],
    [False,True,False],
    [True,False,True]
])]

array([36, 86, 27, 35, 53])

In [43]:
F > 30

array([[ True,  True,  True],
       [False, False, False],
       [ True,  True,  True]])

In [44]:
F[F>30]

array([36, 48, 86, 35, 82, 53])

### Linear Algebra

In [46]:
G = np.array([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])

In [47]:
H=np.array([
    [6,5],
    [4,3],
    [2,1]
])

In [48]:
G.dot(H)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [50]:
G @ H

array([[20, 14],
       [56, 41],
       [92, 68]])

@: Linear Dot Product

In [52]:
H.T

array([[6, 4, 2],
       [5, 3, 1]])

.T: View of Transposed Array

In [53]:
G

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [54]:
H.T @ G

array([[36, 48, 60],
       [24, 33, 42]])

### Size of Objects in Memory
Or how I learned to stop worrying and love the numpy

In [59]:
# An Integer in Python is > 20bytes
sys.getsizeof(1)

28

In [60]:
# Longs are even larger
sys.getsizeof(10**100)

72

In [61]:
# Numpy size is much smaller
np.dtype(int).itemsize

4

In [62]:
np.dtype(float).itemsize

8

In [63]:
# Lists are Even LARGER
sys.getsizeof([1])

64

In [64]:
# When compared to an array of one element in numpy
np.array([1]).nbytes

4

In [70]:
# And performance is deeply impacted
J = list(range(100000))

In [71]:
K = np.arange(100000)

In [72]:
%time np.sum(K **2)

CPU times: total: 0 ns
Wall time: 989 μs


216474736

In [73]:
%time sum([x**2 for x in K])

CPU times: total: 31.2 ms
Wall time: 51.3 ms




216474736

### Useful Numpy Functions

In [76]:
np.random.random(size=2)

array([0.31991719, 0.55917306])

In [77]:
np.random.normal(size=2)

array([0.23039117, 0.22827436])

In [78]:
np.random.rand(2,4)

array([[0.25840284, 0.9898813 , 0.91126706, 0.35358998],
       [0.96152503, 0.44421928, 0.28034049, 0.51306507]])

In [79]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [80]:
np.arange(5,10)

array([5, 6, 7, 8, 9])

In [81]:
np.arange(0,1,.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

In [82]:
np.arange(10).reshape(2,5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [83]:
np.arange(10).reshape(5,2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [84]:
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [85]:
np.linspace(0,1,20)

array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [88]:
np.linspace(0,1,20,False)

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

In [89]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [91]:
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [95]:
np.zeros((3,3), dtype=np.int8)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [96]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [97]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [98]:
np.empty(5)

array([1., 1., 1., 1., 1.])

In [99]:
np.empty((2,2))

array([[0.25, 0.5 ],
       [0.75, 1.  ]])

In [100]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [101]:
np.eye(3,3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [103]:
np.eye(8,4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [109]:
np.eye(8,4,k=1)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [107]:
np.eye(8,4, k=-3)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])

In [110]:
"Hello World"[6]

'W'