# Numpy
**Note:** From here on out I am not going to be posting links to documentation for each and every piece of code unless I think there is something outstanding or something that is not included in the internal pydocs that you can get evaluating the function/object/value using the "?".  I am going to assume that any function that you see you'll check out it's documentation to get fully acquainted with it.

In [3]:
import numpy as np
from scipy import stats # can be accessed stats.*

## Creating Arrays

In [10]:
np.array([1, 2, 3]) # Creating an array through a list

array([1, 2, 3])

In [11]:
np.array([[1, 2], [3, 4]]) # 2D array

array([[1, 2],
       [3, 4]])

In [12]:
a = np.array(range(16), dtype=int) # Create 16 int elements
print(a)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]


In [13]:
np.ones(((2, 2)))

array([[1., 1.],
       [1., 1.]])

In [14]:
np.zeros((2,2))

array([[0., 0.],
       [0., 0.]])

In [15]:
np.eye(4) #identify matrix

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [16]:
np.random.randint(0, 10, 30) #numbers between 0-10, 30 of them


array([9, 1, 9, 1, 9, 6, 5, 3, 8, 6, 0, 7, 0, 0, 6, 0, 0, 7, 3, 9, 0, 9,
       8, 7, 4, 0, 6, 5, 6, 5])

In [17]:
np.random.normal(0, 1, size=30) #normal distribution

array([ 0.32861415, -0.58632113, -0.26784669,  0.76437566, -2.2824553 ,
        0.57384571,  0.19590276, -1.06849754, -1.53251095, -0.34155401,
        1.28619541, -0.33550062, -0.33744718,  1.18052406,  0.88240534,
        0.65767089,  0.11672267,  1.0098026 ,  0.88812326,  0.46646279,
        0.32840954,  0.50915707,  1.24873982,  0.93898645, -0.51849067,
       -1.15075397, -0.3746418 ,  1.0808008 , -0.56912045,  0.19665593])

In [4]:
np.random.normal?

[0;31mDocstring:[0m
normal(loc=0.0, scale=1.0, size=None)

Draw random samples from a normal (Gaussian) distribution.

The probability density function of the normal distribution, first
derived by De Moivre and 200 years later by both Gauss and Laplace
independently [2]_, is often called the bell curve because of
its characteristic shape (see the example below).

The normal distributions occurs often in nature.  For example, it
describes the commonly occurring distribution of samples influenced
by a large number of tiny, random disturbances, each with its own
unique distribution [2]_.

.. note::
    New code should use the `~numpy.random.Generator.normal`
    method of a `~numpy.random.Generator` instance instead;
    please see the :ref:`random-quick-start`.

Parameters
----------
loc : float or array_like of floats
    Mean ("centre") of the distribution.
scale : float or array_like of floats
    Standard deviation (spread or "width") of the distribution. Must be
    non-negative.


## Loading from CSV

In [24]:
#print(open("data.csv").read(100))


data = np.loadtxt("data.csv", skiprows=1, delimiter=",", dtype=int) #skip the first line since it's the header
print(data)

[[1000025       5       2       2]
 [1002945       5       7       2]
 [1015425       3       2       2]
 ...
 [ 841769       2       2       2]
 [ 888820       5       7       4]
 [ 897471       4       3       4]]


## Accessing Arrays

In [25]:
data.shape # Prints how many rows and columns

(645, 4)

In [51]:
data[0,0] # Accessing element in first row and first column

1000025

In [59]:
data[:,0] # Accessing all elements in column 0

array([ 1000025,  1002945,  1015425,  1016277,  1017023,  1017122,
        1018099,  1018561,  1033078,  1035283,  1036172,  1041801,
        1043999,  1044572,  1047630,  1048672,  1049815,  1050670,
        1050718,  1054590,  1054593,  1056784,  1057013,  1059552,
        1065726,  1066373,  1066979,  1067444,  1070935,  1071760,
        1072179,  1074610,  1075123,  1079304,  1080185,  1081791,
        1084584,  1091262,  1096800,  1099510,  1100524,  1102573,
        1103608,  1103722,  1105257,  1105524,  1106095,  1106829,
        1108370,  1108449,  1110102,  1110503,  1110524,  1111249,
        1112209,  1113038,  1113483,  1113906,  1115282,  1115293,
        1116116,  1116132,  1116192,  1116998,  1117152,  1118039,
        1120559,  1121732,  1121919,  1123061,  1124651,  1125035,
        1126417,  1131294,  1132347,  1133041,  1133136,  1136142,
        1137156,  1143978,  1147044,  1147699,  1147748,  1148278,
        1148873,  1152331,  1155546,  1156272,  1156948,  1157

In [53]:
data[0, :] # Accessing all elements in row 2, using : 

array([1000025,       5,       2,       2])

In [35]:
data[0, 1:-1] 

array([5, 2])

## Changing shape
Arrays can be quickly changed from one shape to another given that they have the same number of elemnts 

In [37]:
print(a)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]


In [38]:
a.reshape((2,8)) # matrix to 2 rows and 8 coumns, 2x8=16

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])

In [40]:
a = a.reshape((-1, 4)) # -1 means use a many rows/columns as needed to make the defined dimensions correct
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [41]:
a.reshape(-1) # turns 2d array into 1s

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [42]:
a.reshape((4,2,2)) # turns an array 3d

array([[[ 0,  1],
        [ 2,  3]],

       [[ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15]]])

## Doing Math with Numpy

In [43]:
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [48]:
b = np.array(range(4))
bCol = b.reshape((-1,1))
bCol

array([[0],
       [1],
       [2],
       [3]])

In [45]:
bRow = b.reshape((1,-1))
bRow

array([[0, 1, 2, 3]])

In [46]:
a + 5

array([[ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20]])

In [47]:
a + bRow

array([[ 0,  2,  4,  6],
       [ 4,  6,  8, 10],
       [ 8, 10, 12, 14],
       [12, 14, 16, 18]])

In [49]:
a + bCol

array([[ 0,  1,  2,  3],
       [ 5,  6,  7,  8],
       [10, 11, 12, 13],
       [15, 16, 17, 18]])

In [50]:
a @ bCol # dot-product multiplication

array([[14],
       [38],
       [62],
       [86]])

## Using booleans as selectors (array masks)
Just like we can do int/float math on whole arrays at once, we can do boolean math on those same arrays. The results of those arrays can then be used to mask or select individual elements in the array

In [60]:
thirdCol = data[:, 2]
print(thirdCol)
evens = (thirdCol % 2 == 0) # Evaulates if the value are even or odd in the third column
evens

[ 2  7  2  3  2  7  2  2  2  1  2  2  2  7  6  2  2  4  2  5  6  2  2  2
  2  1  2  2  1  2  8  2  2  2  6  1  2  6  6  3  8 10  8  2  4  2  2  4
  2  2  3 10  8  4  3  5  6  2  3  2 10  5  2  3  2  8  4  2  2 10  2  6
  3  2  2  2  2  3  1  2  2  8 10  5  5  2  3  2  2  2  2  2  2  2  2 10
  5 10  2  2  6 10  3  2  8  2  2  5  2 10  2  2  4  4  2  2 10  8  9  2
  4  2  5 10  2  2  8  2  3  2  2  2  2  1  2  2  4  2  2  2  6  3  8 10
  1  6  4  2  2  3  2  2  3  6  5  2  2  1  2  2  8  6  2  1  2  2  2  8
  3 10  2  8  2  6  1  2  2  5  4  1  5  6  5  2  6 10  2  2  2  4  2  2
  2  5 10  2  2  2  6  7  1  1  5  4  2  7  3  5  2  2  6  2  2 10  1  3
  3  2  4  3  1 10  3  6  3  5  3  2  8  6  6  3  2  1  2  2  2  2  5  2
  2  2  2  2  5  3 10  5  6 10  3  3  2  3  2  3  2  2  2  2  2  3  2  2
  4  2  2  8 10  3  4  6  2  2  2  2  2  6  4  2  5  4  4  2  9  2  3  4
  2  2  4  2  3  2 10  2  1  7  5  6  5  6  5  2  2  2  2  2  2  6  5  2
  2  2  4  3  2  4  2  2  2  2  2  2 10  2  3  1  5

array([ True, False,  True, False,  True, False,  True,  True,  True,
       False,  True,  True,  True, False,  True,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True, False,  True,
        True, False,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
       False, False,  True,  True, False,  True,  True, False,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
       False,  True,  True,  True,  True, False, False,  True,  True,
        True,  True, False, False,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True,  True, False,  True,  True,  True,
        True,  True,

In [57]:
print(evens[:5]) # View the first 5 elements

[ True False  True False  True]


In [None]:
print(np.sum(evens)) # when summing booleans True=1 False=0, so we count the # of trues here

In [62]:
even_data = data[evens] #even_data will include only the rows where the third column is even.
# This is because it captures rows that are True
print(even_data)

[[1000025       5       2       2]
 [1015425       3       2       2]
 [1017023       4       2       2]
 ...
 [ 714039       3       2       2]
 [ 763235       3       2       2]
 [ 841769       2       2       2]]


In [63]:
print(data.shape)
print(even_data.shape)

(645, 4)
(485, 4)


In [68]:
even_data[:5]

array([[1000025,       5,       2,       2],
       [1015425,       3,       2,       2],
       [1017023,       4,       2,       2],
       [1018099,       1,       2,       2],
       [1018561,       2,       2,       2]])

## Statistics

In [69]:
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [70]:
np.min(a), np.max(a)

(0, 15)

In [71]:
a.mean()

7.5

In [72]:
a.std()

4.6097722286464435

In [85]:
b = np.random.randint(0,10,10)
b

array([0, 4, 3, 9, 8, 7, 5, 6, 0, 3])

In [86]:

print(stats.mode(b).mode)
print(stats.mode(b).count)


0
2
