In [1]:
import numpy as np

## numpy attributes

In [4]:
x = np.array([[1, 2, 3], [2, 3, 4]])
print("ndim", x.ndim)
print("shape", x.shape)
print("size", x.size)
print("itemsize", x.itemsize)
print("nbytes", x.nbytes)
print("dtype", x.dtype)

ndim 2
shape (2, 3)
size 6
itemsize 4
nbytes 24
dtype int32


## imp points


Keep in mind that, unlike Python lists, NumPy arrays have a fixed type. This means, for
example, that if you attempt to insert a floating-point value to an integer array, the value
will be silently truncated. Don&#39;t be caught unaware by this behavior!

In [5]:
x[0, 0] = 3.14159
x

array([[3, 2, 3],
       [2, 3, 4]])

## indexing

In [13]:
a = np.random.randint(10, size=(3, 2, 4))
a

array([[[3, 6, 5, 8],
        [9, 8, 5, 9]],

       [[9, 2, 6, 3],
        [3, 0, 1, 0]],

       [[3, 1, 3, 1],
        [8, 1, 4, 3]]])

In [14]:
a[0, 1, 2]
# a[start:stop:step, start:so:se, sa:so:se] from outside dim to indside dim

5

Keep in mind that, unlike Python lists, NumPy arrays have a fixed type. This means, for example, that if you attempt to insert a floating-point value to an integer array, the value will be silently truncated. Don't be caught unaware by this behavior!

In [16]:
a[0, 1, 2] = 100.14256
a

array([[[  3,   6,   5,   8],
        [  9,   8, 100,   9]],

       [[  9,   2,   6,   3],
        [  3,   0,   1,   0]],

       [[  3,   1,   3,   1],
        [  8,   1,   4,   3]]])

## Slicing

x[start:stop:step, sa:so:se, .....]
If any of these are unspecified, they default to the values start=0, stop=size of
dimension, step=1.

In [25]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
x[:5], x[5:]

(array([0, 1, 2, 3, 4]), array([5, 6, 7, 8, 9]))

In [28]:
x[4:7] # EXCLUDING 7th INDEX

array([4, 5, 6])

In [31]:
x[::2], x[1::2] # FOR ODD PLACES

(array([0, 2, 4, 6, 8]), array([1, 3, 5, 7, 9]))

In [33]:
x[::-1], x[5::-2]

(array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]), array([5, 3, 1]))

### multidimensional slicing

In [35]:
x = np.arange(12).reshape((3, 4))
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [36]:
x[:2, :3]

array([[0, 1, 2],
       [4, 5, 6]])

In [38]:
x[:3, ::2], x[:, ::2]

(array([[ 0,  2],
        [ 4,  6],
        [ 8, 10]]),
 array([[ 0,  2],
        [ 4,  6],
        [ 8, 10]]))

Finally, subarray dimensions can even be reversed together:

In [42]:
x[::-1, ::-1]

array([[11, 10,  9,  8],
       [ 7,  6,  5,  4],
       [ 3,  2,  1,  0]])

### accessing array rows and columns

One commonly needed routine is accessing of single rows or columns of an array. This can
be done by **combining indexing and slicing, using an empty slice marked by a single colon**
(:):

In [47]:
x[:, 0], x[0, :], x[0] # equivalent to x[0, :]

(array([0, 4, 8]), array([0, 1, 2, 3]), array([0, 1, 2, 3]))

# Subarrays as no-copy views

One important–and extremely useful–thing to know about **array slices is that they return
views rather than copies of the array data**. This is one area in which NumPy array slicing
differs from **Python list slicing: in lists, slices will be copies**. Consider our two-dimensional
array from before:

In [51]:
a = np.random.randint(10, size=(3, 4))
a

array([[8, 0, 3, 0],
       [7, 9, 3, 4],
       [7, 6, 4, 5]])

In [53]:
a2 = a[:2, :2]
a2

array([[8, 0],
       [7, 9]])

In [55]:
a2[0, 0] = 99
a2, a

(array([[99,  0],
        [ 7,  9]]),
 array([[99,  0,  3,  0],
        [ 7,  9,  3,  4],
        [ 7,  6,  4,  5]]))

**This default behavior is actually quite useful: it means that when we work with large
datasets, we can access and process pieces of these datasets without the need to copy the
underlying data buffer.**

## creating copies of arrays

In [57]:
a2_copy = a2.copy() # a[:2, :2].copy()
a2_copy[0, 0] = 555
a2_copy, a2

(array([[555,   0],
        [  7,   9]]),
 array([[99,  0],
        [ 7,  9]]))

## Reshaping of Arrays

In [59]:
x = np.arange(1, 10).reshape((3, 3))
x

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

**Note that for this to work, the size of the initial array must match the size of the reshaped
array. Where possible, the reshape method will use a no-copy view of the initial array, but
with non-contiguous memory buffers this is not always the case.**`

In [64]:
x = np.arange(1, 10)
x.reshape((3, 3)) # IMPORTANT NOTE
x2 = x.reshape((3, 3))
print(x, x2, sep='\n')

# reshape does no-copy view and not copy of array

x2[0, 0] = 99
print(x, x2, sep='\n')

[1 2 3 4 5 6 7 8 9]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[99  2  3  4  5  6  7  8  9]
[[99  2  3]
 [ 4  5  6]
 [ 7  8  9]]


Another common reshaping pattern is the conversion of a one-dimensional array into a
two-dimensional row or column matrix. **This can be done with the reshape method, or
more easily done by making use of the newaxis keyword within a slice operation**:

In [66]:
x = np.array([1, 2, 3]) 
x.reshape((1, 3)), x[np.newaxis, :]

(array([[1, 2, 3]]), array([[1, 2, 3]]))

In [68]:
x.reshape((3, 1)), x[:, np.newaxis]

(array([[1],
        [2],
        [3]]),
 array([[1],
        [2],
        [3]]))

## Array Concatenation and Splitting

np.concatenate(), np.vstack, np.hstack, np.dstack

In [71]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
z = np.concatenate([x, y]) 
print(z)
z[[0, 3]] = [99, 88] # ****** VVV IIMMMPPPPP
print(z, x, y) # np.concatenate CREATED A NEW ARRAY

[1 2 3 3 2 1]
[99  2  3 88  2  1] [1 2 3] [3 2 1]


In [73]:
z = [99, 99, 99]
np.concatenate([x, y, z])

array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])

In [76]:
a = np.array([[1, 2, 3], 
              [4, 5, 6]]) # for 2d, axis 0 is rows and axis 1 is cols
np.concatenate([a, a]), np.concatenate([a, a], axis=1)

(array([[1, 2, 3],
        [4, 5, 6],
        [1, 2, 3],
        [4, 5, 6]]),
 array([[1, 2, 3, 1, 2, 3],
        [4, 5, 6, 4, 5, 6]]))

In [78]:
b = np.array([1, 2, 3])
grid = np.array([[9, 8, 7], 
                [6, 5, 4]])
np.vstack([b, grid]), np.vstack([grid, b])

(array([[1, 2, 3],
        [9, 8, 7],
        [6, 5, 4]]),
 array([[9, 8, 7],
        [6, 5, 4],
        [1, 2, 3]]))

In [80]:
c = [[99], 
     [99]]
np.hstack([c, grid]), np.hstack([grid, c])

(array([[99,  9,  8,  7],
        [99,  6,  5,  4]]),
 array([[ 9,  8,  7, 99],
        [ 6,  5,  4, 99]]))

### Similary, np.dstack will stack arrays along the third axis.

## Splitting the arrays

np.split(), np.vsplit(), np.hsplit(), np.dsplit()

In [82]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
x1, x2, x3

(array([1, 2, 3]), array([99, 99]), array([3, 2, 1]))

Notice that N split-points, leads to N + 1 subarrays. The related functions np.hsplit and
np.vsplit are similar:

In [83]:
a = np.arange(16).reshape((4, 4))
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [85]:
upper, lower = np.vsplit(a, [2])
upper, lower

(array([[0, 1, 2, 3],
        [4, 5, 6, 7]]),
 array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]]))

In [87]:
left, right = np.hsplit(a, [2])
left, right

(array([[ 0,  1],
        [ 4,  5],
        [ 8,  9],
        [12, 13]]),
 array([[ 2,  3],
        [ 6,  7],
        [10, 11],
        [14, 15]]))

#### Similarly, np.dsplit will split arrays along the third axis.

## Random in numpy

### np.random.randint()

random.randint(low, high=None, size=None, dtype=int)

Return random integers from low (inclusive) to high (exclusive).

Return random integers from the “discrete uniform” distribution of the specified dtype in the “half-open” interval [low, high). If high is None (the default), then results are from [0, low).

In [18]:
a = np.random.randint(10, size=(1, 2))
a

array([[0, 3]])

### np.arange()

numpy.arange([start, ]stop, [step, ]dtype=None, *, like=None)

Return evenly spaced values within a given interval.

Values are generated within the half-open interval [start, stop)

start: integer or **real**, optional
Start of interval. The interval includes this value. The default start value is 0.

stop: integer or **real**

step: integer or **real**, optional

dtype: dtype

like: array_like
Reference object to allow the creation of arrays which are not NumPy arrays. 

In [24]:
a = np.arange(15)
b = np.arange(5, 30, 3)
print(a, b, sep='\n')

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
[ 5  8 11 14 17 20 23 26 29]


In [None]:
np.random.rand(), np.random.random(), np.random.randint(), np.random.seed()
np.zeros(), np.ones(), np.partition(), np.

### Aggregations: Min, Max, and Everything in Between

In [89]:
a = np.random.random(10)
a, sum(a), np.sum(a)

(array([0.53676964, 0.5463407 , 0.7785273 , 0.20295929, 0.69461177,
        0.0487415 , 0.50881843, 0.3538618 , 0.09843193, 0.35928817]),
 4.128350531963947,
 4.128350531963948)

However, because it executes the operation in compiled code, NumPy&#39;s version of the operation is computed much more quickly:

big_array = np.random.rand(1000000)

%timeit sum(big_array)

%timeit np.sum(big_array)

10 loops, best of 3: 104 ms per loop

1000 loops, best of 3: 442 µs per loop

Be careful, though: the sum function and the np.sum function are not identical, which cansometimes lead to confusion! In particular, their optional arguments have different meanings, and np.sum is aware of multiple array dimensions

In [91]:
min(a), np.min(a), max(a), np.max(a)

(0.04874149876806089,
 0.04874149876806089,
 0.7785272981483864,
 0.7785272981483864)

NumPy&#39;s corresponding functions have similar syntax, and again operate much more quickly:

np.min(big_array), np.max(big_array)

(1.1717128136634614e-06, 0.9999976784968716)

%timeit min(big_array)

%timeit np.min(big_array)

10 loops, best of 3: 82.3 ms per loop

1000 loops, best of 3: 497 µs per loop

#### For min, max, sum, and several other NumPy aggregates, a shorter syntax is to use methods of the array object itself:

In [93]:
a.min(), a.max(), a.sum()

(0.04874149876806089, 0.7785272981483864, 4.128350531963948)

### Multi dimensional aggregates

In [94]:
a = np.random.random((3, 4))
a.sum(), a.min(axis=0), a.max(axis=1) # WE CAN USE axis FOR AGGREGATE FUNCS()

(5.4417102251277445,
 array([0.12865418, 0.37363031, 0.08335969, 0.00280732]),
 array([0.87379937, 0.88889216, 0.55743406]))

#### The axis keyword specifies the dimension of the array that will be collapsed, rather than the dimension that will be returned. So specifying axis=0 means that the first axis will be collapsed: for two-dimensional arrays, this means that values within each column will be aggregated.

## other aggregate functions

#### Function Name NaN-safe Version Description
np.sum np.nansum Compute sum of elements

np.prod np.nanprod Compute product of elements

np.mean np.nanmean Compute mean of elements

np.std np.nanstd Compute standard deviation

np.var np.nanvar Compute variance

np.min np.nanmin Find minimum value

np.max np.nanmax Find maximum value

np.argmin np.nanargmin Find index of minimum value

np.argmax np.nanargmax Find index of maximum value

np.median np.nanmedian Compute median of elements

np.percentile np.nanpercentile Compute rank-based statistics of elements

np.any N/A Evaluate whether any elements are true

np.all N/A Evaluate whether all elements are true

## Sorting Arrays

In [95]:
def selection_sort(x):
    for i in range(len(x)):
        swap = i + np.argmin(x[i:])
        x[i], x[swap] = x[swap], x[i]
    return x

In [96]:
x = np.array([2, 1, 4, 3, 5])
selection_sort(x)

array([1, 2, 3, 4, 5])

In [97]:
def bogosort(x): # never to be used
    while(np.any(x[:-1] > x[1:])): # (1 > 0, 2 > 1, 3 > 2, ... indices)
        np.random.shuffle(x)
    return x

In [98]:
x = np.array([2, 1, 4, 3, 5])
bogosort(x)

array([1, 2, 3, 4, 5])

Although Python has built-in sort and sorted functions to work with lists, we won&#39;t
discuss them here because NumPy&#39;s np.sort function turns out to be much more efficient
and useful for our purposes. By default np.sort uses an , quicksort algorithm, though
mergesort and heapsort are also available. For most applications, the default quicksort is
more than sufficient.

**To return a sorted version of the array without modifying the input, you can use np.sort:**

In [99]:
x = np.array([2, 1, 4, 3, 5])
np.sort(x)

array([1, 2, 3, 4, 5])

**If you prefer to sort the array in-place, you can instead use the sort method of arrays:**

In [101]:
x.sort()
x

array([1, 2, 3, 4, 5])

**A related function is argsort, which instead returns the indices of the sorted elements:**

In [103]:
x = np.array([2, 1, 4, 3, 5])
i = np.argsort(x)
print(i)

[1 0 3 2 4] [1 2 3 4 5]


**The first element of this result gives the index of the smallest element, the second value gives the index of the second smallest, and so on. These indices can then be used (via fancy indexing) to construct the sorted array if desired:**

In [105]:
x[i]

array([1, 2, 3, 4, 5])

### Sorting along rows and columns

In [106]:
rand = np.random.RandomState(42)
x = rand.randint(0, 10, (4, 6))
print(x)

[[6 3 7 4 6 9]
 [2 6 7 4 3 7]
 [7 2 5 4 1 7]
 [5 1 4 0 9 5]]


In [107]:
np.sort(x, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [108]:
np.sort(x, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

## Partial Sorts: Partitioning

#### Sometimes we&#39;re not interested in sorting the entire array, but simply want to find the k smallest values in the array. NumPy provides this in the np.partition function. np.partition takes an array and a number K; the result is a new array with the smallest K values to the left of the partition, and the remaining values to the right, in arbitrary order:

In [110]:
x = np.array([7, 2, 3, 1, 6, 5, 4])
np.partition(x, 3)

array([2, 1, 3, 4, 6, 5, 7])

#### Note that the first three values in the resulting array are the three smallest in the array, and the remaining array positions contain the remaining values. Within the two partitions, the elements have arbitrary order.

In [119]:
rand = np.random.RandomState(42)
x = rand.randint(0, 10, (4, 6))
np.partition(x, 3, axis=1), x

(array([[3, 4, 6, 6, 7, 9],
        [2, 3, 4, 6, 7, 7],
        [2, 1, 4, 5, 7, 7],
        [0, 4, 1, 5, 9, 5]]),
 array([[6, 3, 7, 4, 6, 9],
        [2, 6, 7, 4, 3, 7],
        [7, 2, 5, 4, 1, 7],
        [5, 1, 4, 0, 9, 5]]))

In [120]:
np.argpartition(x, 3, axis=1)

array([[1, 3, 4, 0, 2, 5],
       [0, 4, 3, 1, 2, 5],
       [1, 4, 3, 2, 0, 5],
       [3, 2, 1, 0, 4, 5]], dtype=int64)