This notebook is inspired from:
[Jake VanderPlas - Python Data Science Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/index.html)

# Numpy

In [1]:
## importing numpy
import numpy as np

np.__version__

'1.16.5'

## Creating Numpy Arrays

__From lists__

In [15]:
my_array1 = np.array([4, 1, 7, 13, 2.76])

In [16]:
type(my_array1)

numpy.ndarray

In [18]:
my_array1.shape

(5,)

In [19]:
# if we want we can also specify the data types


In [20]:
# but aware that they have certain range and limitations.


Unlike lists, arrays can be multidimensional

In [21]:
multidim = np.array([[1,2,3,12],
          [4,5,6,11], 
          [7,8,9,10]])

In [22]:
multidim.shape

(3, 4)

__From Scratch__

In [33]:
# we can create a numpy array with zeros of any shape
np.zeros((6,2))
np.zeros(((4,5,3)))

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [160]:
## Again we can pass the dtype

np.zeros((2,6), dtype = float)

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [35]:
# we can create an array of any shape filled with any number:

np.full((3, 7), .23)

array([[0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23],
       [0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23],
       [0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23]])

In [39]:
## np.ones?
np.ones(((4,3,2)))

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

Other useful methods for creating arrays:

- `np.arrange`

- `np.linspace`

- `np.random.random`

In [43]:
np.arange(5, 25, 3)

array([ 5,  8, 11, 14, 17, 20, 23])

In [51]:
np.linspace(0,100,70)

array([  0.        ,   1.44927536,   2.89855072,   4.34782609,
         5.79710145,   7.24637681,   8.69565217,  10.14492754,
        11.5942029 ,  13.04347826,  14.49275362,  15.94202899,
        17.39130435,  18.84057971,  20.28985507,  21.73913043,
        23.1884058 ,  24.63768116,  26.08695652,  27.53623188,
        28.98550725,  30.43478261,  31.88405797,  33.33333333,
        34.7826087 ,  36.23188406,  37.68115942,  39.13043478,
        40.57971014,  42.02898551,  43.47826087,  44.92753623,
        46.37681159,  47.82608696,  49.27536232,  50.72463768,
        52.17391304,  53.62318841,  55.07246377,  56.52173913,
        57.97101449,  59.42028986,  60.86956522,  62.31884058,
        63.76811594,  65.2173913 ,  66.66666667,  68.11594203,
        69.56521739,  71.01449275,  72.46376812,  73.91304348,
        75.36231884,  76.8115942 ,  78.26086957,  79.71014493,
        81.15942029,  82.60869565,  84.05797101,  85.50724638,
        86.95652174,  88.4057971 ,  89.85507246,  91.30

In [62]:
np.random.random(size = 10)

array([0.40961547, 0.1433371 , 0.24737833, 0.76891211, 0.54328629,
       0.98715395, 0.30083621, 0.35289021, 0.05106573, 0.87548367])

In [63]:
np.random.randn(10)

array([-1.73524923,  0.625618  , -0.02357523,  0.83727519,  1.27179458,
        0.77986113,  0.60167931,  0.55450879,  0.02207911,  0.25755767])

In [69]:
np.random.normal(loc = 69, scale = 2, size = 20)

array([68.63505692, 69.84483998, 69.70076538, 70.76697615, 68.16294593,
       69.22105334, 72.19247315, 64.56918105, 68.36030197, 69.30925579,
       68.56255875, 70.96965475, 67.54442187, 70.56693316, 72.97217833,
       72.07087898, 68.92744956, 68.67363682, 69.87519039, 70.78371066])

## Descriptive Statistics with Numpy

In [None]:
## let's create a sample from normally distributed population of size = 10

In [138]:
np.random.seed(60120)
sample1 = np.random.normal(loc = 10, scale = 1, size = 10)

In [139]:
sample1

array([11.09688495,  9.82876882, 10.89658301,  7.64402997,  9.05907328,
       10.00666684,  9.72806703,  9.95520623, 12.27507952, 10.28825701])

10.077861664932225

In [None]:
## what is the mean of sample1?

In [145]:
sample1.mean()

10.077861664932225

In [None]:
## what is the median of sample1?

In [142]:
np.median(sample1)

9.980936535974632

In [None]:
## sorting sample1


In [150]:
sample1.sort()
np.sort(sample1)

array([ 7.64402997,  9.05907328,  9.72806703,  9.82876882,  9.95520623,
       10.00666684, 10.28825701, 10.89658301, 11.09688495, 12.27507952])

In [None]:
## what is the 0.1 percentile of sample1?

In [154]:
np.percentile(sample1, q=0.1, interpolation = 'lower')

7.6440299682270565

In [158]:
## Where is the max/min in sample1
np.max(sample1)
sample1.min()

7.6440299682270565

In [159]:
## We can use different formattings as we print values
print('Maximum of sample1 is %.2f'%sample1.max())
print('The index of the max in sample1 is {}'.format(sample1.argmax()))

Maximum of sample1 is 12.28
The index of the max in sample1 is 9


[Comparison between % and format](https://stackoverflow.com/questions/5082452/string-formatting-vs-format)

[Descriptive Statistics](https://www.hackerearth.com/blog/developers/descriptive-statistics-python-numpy/)

In [161]:
multidim

array([[ 1,  2,  3, 12],
       [ 4,  5,  6, 11],
       [ 7,  8,  9, 10]])

In [162]:
multidim.shape

(3, 4)