In [1]:
import numpy as np

list1 = range(5)
arr1d = np.array(list1)

In [2]:
type(arr1d)
arr1d

numpy.ndarray

array([0, 1, 2, 3, 4])

In [3]:
# array in opposite to list can perform operations/function on every element
# rather than the whole object
# another difference is that numpy array can not be increased after creation

#list1 + 2 # it will bring the error of unsupported operand type(s)
arr1d + 2 # every element of array has been summed by 2

array([2, 3, 4, 5, 6])

In [4]:
# Using dtype helps control the memory allocations(float64, float32, int8, etc)
list2 = [[0,1,2], [3,4,5], [6,7,8]]
arr2d_f = np.array(list2, dtype='float') # 'bool' can be also dtype option
arr2d_f

array([[0., 1., 2.],
       [3., 4., 5.],
       [6., 7., 8.]])

In [5]:
# possible convertion to another type
arr2d_f.astype('int')

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [6]:
# possible multi convertion
arr2d_f
arr2d_f.astype('int').astype('str')

array([[0., 1., 2.],
       [3., 4., 5.],
       [6., 7., 8.]])

array([['0', '1', '2'],
       ['3', '4', '5'],
       ['6', '7', '8']], dtype='<U11')

In [7]:
# possible holding different types (like number and character) only 
# after specifiying dtype='object'
arr1d_obj = np.array([1, 'a', False], dtype='object')
arr1d_obj

array([1, 'a', False], dtype=object)

In [8]:
# possible to reverse conversion from array to list but the important
# advantage of array is that it occupies much less space than python list
arr1d
arr1d_obj.tolist()

array([0, 1, 2, 3, 4])

[1, 'a', False]

In [9]:
list3 = [range(1,5), range(3,7), range(5,9)]
arr2 = np.array(list3, dtype='float')
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [10]:
# getting the info about array
arr2
print('Shape: ', arr2.shape)
print('Datatype: ', arr2.dtype)
print('Size: ', arr2.size)
print('Num Dimmensions: ', arr2.ndim)

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

Shape:  (3, 4)
Datatype:  float64
Size:  12
Num Dimmensions:  2


In [11]:
# extracting the first 2 rows  and columns

arr2[:2, :2] # for 2 dim arr2[row, col]

array([[1., 2.],
       [3., 4.]])

In [12]:
# getting the boolean output by applying the condition of each element

b = arr2 >= 4 # new array with values of original array after filtering
b
arr2[b] # works like filter

array([[False, False, False,  True],
       [False,  True,  True,  True],
       [ True,  True,  True,  True]])

array([4., 4., 5., 6., 5., 6., 7., 8.])

In [13]:
# reversing only row positions

arr2
arr2[::-1]

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

array([[5., 6., 7., 8.],
       [3., 4., 5., 6.],
       [1., 2., 3., 4.]])

In [14]:
# reversing only column positions

arr2
arr2[:, ::-1]

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

array([[4., 3., 2., 1.],
       [6., 5., 4., 3.],
       [8., 7., 6., 5.]])

In [15]:
# reversing the row and columns positions

arr2
arr2[::-1, ::-1]

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

array([[8., 7., 6., 5.],
       [6., 5., 4., 3.],
       [4., 3., 2., 1.]])

In [16]:
# inserting a nan(not a number) and an inf (infinite)

arr2
arr2[1,1] = np.nan
arr2[1,2] = np.inf
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

array([[ 1.,  2.,  3.,  4.],
       [ 3., nan, inf,  6.],
       [ 5.,  6.,  7.,  8.]])

In [17]:
# replacing nan and inf with -1 (do not use arr2==np.nan)

missing_bool = np.isnan(arr2) | np.isinf(arr2)
arr2[missing_bool] = -1
missing_bool
arr2

array([[False, False, False, False],
       [False,  True,  True, False],
       [False, False, False, False]])

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [18]:
# mean, max and min

arr2
print("Mean value is: ", arr2.mean())
print('Max value is: ', arr2.max())
print('Min value is: ', arr2.min())

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

Mean value is:  3.5833333333333335
Max value is:  8.0
Min value is:  -1.0


In [19]:
# row wise and column wise min - for every row  or column

arr2
print('Column wise minimum: ', np.amin(arr2, axis=0))
print('Row wise minimum: ', np.amin(arr2, axis=1))

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

Column wise minimum:  [ 1. -1. -1.  4.]
Row wise minimum:  [ 1. -1.  5.]


In [20]:
# cumulative sum

arr2
np.cumsum(arr2)

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

array([ 1.,  3.,  6., 10., 13., 12., 11., 17., 22., 28., 35., 43.])

In [21]:
# assigning portion of arr2 to arr2a; it doesn't really create a new array
# so it influences arr2 when there are some modifications in arr2a

arr2
arr2a = arr2[:2, :2]
arr2a
arr2a[:1, :1] = 100
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

array([[ 1.,  2.],
       [ 3., -1.]])

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

In [22]:
# to avoid problem in above example real copy has to be created
arr2
arr2b = arr2[:2, :2].copy()
arr2b[:1, :1] = 101 # 101 will not reflect in arr2
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

In [23]:
# reshaping a 3x4 array to 4x3 array
arr2
arr2.reshape(4,3)

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

array([[100.,   2.,   3.],
       [  4.,   3.,  -1.],
       [ -1.,   6.,   5.],
       [  6.,   7.,   8.]])

In [24]:
# flatten() and ravel() work the same but the second one uses a reference
# to the parent array (affecting while some changes) but is memory efficient

arr2
b1 = arr2.flatten()
b1
b1[0] = 101
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

array([100.,   2.,   3.,   4.,   3.,  -1.,  -1.,   6.,   5.,   6.,   7.,
         8.])

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

In [25]:
b2 = arr2.ravel()
b2[0] = 101
b2
arr2

array([101.,   2.,   3.,   4.,   3.,  -1.,  -1.,   6.,   5.,   6.,   7.,
         8.])

array([[101.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

In [26]:
# sequences, repetitions and random numbers

np.arange(10,1, -1)
np.arange(10, 2, -1)
np.arange(10, 1, -2)

# evenly spaced numbers for given range
np.linspace(1, 30, 10, dtype=int) # (start, stop, amount, type)
                        # 'int' forces rounding some numbers

array([10,  9,  8,  7,  6,  5,  4,  3,  2])

array([10,  9,  8,  7,  6,  5,  4,  3])

array([10,  8,  6,  4,  2])

array([ 1,  4,  7, 10, 13, 17, 20, 23, 26, 30])

In [27]:
# logspace() makes rising the numbers in logarithmic scale

np.set_printoptions(precision=2) #limits number of digits after decimal

np.logspace(start=1, stop=50, num=10, base=10)

array([1.00e+01, 2.78e+06, 7.74e+11, 2.15e+17, 5.99e+22, 1.67e+28,
       4.64e+33, 1.29e+39, 3.59e+44, 1.00e+50])

In [28]:
# zeros and ones of desired shape

np.zeros([2,6])
np.ones([3,3])

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [29]:
# repeating sequences

a = [1, 2, 3]
print('Tile:    ', np.tile(a, 2)) # repeat whole of 'a' 2 times
print('Repeat: ', np.repeat(a, 2)) # repeat each element of 'a' 2 times

Tile:     [1 2 3 1 2 3]
Repeat:  [1 1 2 2 3 3]


In [30]:
# random module functions

np.random.rand(2,2) # random numbers betwenn[0,1] of shape (2,2)
np.random.randn(2,2) # normal distribution with mean-0 and var=1, shape(2,2)
np.random.randint(0, 10, size=[2,2]) # random integers from [0,11]
np.random.random(size=[2,2]) # similar to 1st example(?)
np.random.choice(['a','e','i','o','u'], size=10) # 10 items from the list
np.random.choice(['a', 'e', 'i','o','u'], size=10,
                 p=[0.3, .1, 0.1, 0.4, 0.1]) # picks more o's
np.random.choice(['a', 'e', 'i','o','u'], size=10,
                 p=[0.3, .1, 0.1, 0.1, 0.4])

array([[0.7 , 0.8 ],
       [0.8 , 0.52]])

array([[ 1.44,  0.39],
       [-0.81,  1.2 ]])

array([[9, 6],
       [4, 3]])

array([[0.89, 0.32],
       [0.1 , 0.93]])

array(['o', 'e', 'u', 'u', 'a', 'a', 'o', 'u', 'a', 'i'], dtype='<U1')

array(['i', 'o', 'o', 'o', 'o', 'o', 'i', 'o', 'o', 'o'], dtype='<U1')

array(['a', 'i', 'i', 'i', 'u', 'a', 'u', 'a', 'u', 'o'], dtype='<U1')

In [31]:
# creating the random state enables all the functions of np.random module

rn = np.random.RandomState(100) #seed=100 allow repeating the
                                # same set of numbers 100 times
rn.rand(2,2)
rn.rand(3,3)

array([[0.54, 0.28],
       [0.42, 0.84]])

array([[0.  , 0.12, 0.67],
       [0.83, 0.14, 0.58],
       [0.89, 0.21, 0.19]])

In [32]:
# creating random integers of size10 between 0 and 10
np.random.seed(100)
arr_rand = np.random.randint(0,10, size=10)
arr_rand

array([8, 8, 3, 7, 7, 0, 4, 2, 5, 2])

In [33]:
# getting unique items and their counts

uniqs, counts =np.unique(arr_rand, return_counts=True)
print('Unique items : ', uniqs)
print('Counts     :', counts)

Unique items :  [0 2 3 4 5 7 8]
Counts     : [1 2 1 1 1 2 2]


In [34]:
arr_rand
print('Array: ', arr_rand)

array([8, 8, 3, 7, 7, 0, 4, 2, 5, 2])

Array:  [8 8 3 7 7 0 4 2 5 2]


In [35]:
# positions where value of array item > 5 - works for 1dimm array

index_gt5 = np.where(arr_rand > 5)
print('Positions where value > 5: ', index_gt5)

Positions where value > 5:  (array([0, 1, 3, 4], dtype=int32),)


In [36]:
# extracting items at given index

arr_rand.take(index_gt5) # return array with indexed items
arr_rand # array is not changed by extraction

array([[8, 8, 7, 7]])

array([8, 8, 3, 7, 7, 0, 4, 2, 5, 2])

In [37]:
# for ndimm array 'where' returns values keeping with condition in
# first array taking them either from 2nd array-if condition is True
# or if else - from 3rd array 

np.where([[True, False],[True, True]],
        [[1, 2], [3, 4]],
        [[9, 8], [7, 6]])

np.where(arr_rand > 5, 'gt5', 'lt5') # 2nd and 3rd params equilvalent
                                    # for 'if - else'

array([[1, 8],
       [3, 4]])

array(['gt5', 'gt5', 'lt5', 'gt5', 'gt5', 'lt5', 'lt5', 'lt5', 'lt5',
       'lt5'], dtype='<U3')

In [38]:
# if only condition 'where' returns two arrays (for rows and columns)
# with indexes of items

x = np.arange(9).reshape(3,3)
x
np.where(x<2)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

(array([0, 0], dtype=int32), array([0, 1], dtype=int32))

In [39]:
# location of the maximum and minimum

arr_rand
print('Position of minimum value: ', np.argmax(arr_rand))
print('Position of maximum value: ', np.argmin(arr_rand))

array([8, 8, 3, 7, 7, 0, 4, 2, 5, 2])

Position of minimum value:  0
Position of maximum value:  5


In [40]:
# Importing, exporting data as a csv file
# apart from "genfromtxt" there is 'loadtxt' function but it is less versatile
np.set_printoptions(suppress=True) # turns off scientic notation

path = 'http://raw.githubusercontent.com/selva86/datasets/master/Auto.csv'

data = np.genfromtxt(path, delimiter=',', skip_header=1,
                     filling_values=-999, dtype='float')

data[:3] # see the first 3 rows


array([[  18. ,    8. ,  307. ,  130. , 3504. ,   12. ,   70. ,    1. ,
        -999. ],
       [  15. ,    8. ,  350. ,  165. , 3693. ,   11.5,   70. ,    1. ,
        -999. ],
       [  18. ,    8. ,  318. ,  150. , 3436. ,   11. ,   70. ,    1. ,
        -999. ]])

In [41]:
# dtype=None enables using number columns and text columns
data2= np.genfromtxt(path, delimiter=',', skip_header=1, dtype=None)
data[:2]

  


array([[  18. ,    8. ,  307. ,  130. , 3504. ,   12. ,   70. ,    1. ,
        -999. ],
       [  15. ,    8. ,  350. ,  165. , 3693. ,   11.5,   70. ,    1. ,
        -999. ]])

In [42]:
# exporting array as csv file

np.savetxt('out.csv', data, delimiter=',')

In [43]:
# saving single numpy array object as .npy file
np.save('myarray.npy', arr2d_f)

# the same as above but with more than one array object as .npz file
np.savez('array.npz', arr2d_f, data2[:3])


In [44]:
# loading back the files above

a = np.load('myarray.npy')
print(a)

b = np.load('array.npz')
print(b.files)
b['arr_1']

[[0. 1. 2.]
 [3. 4. 5.]
 [6. 7. 8.]]
['arr_0', 'arr_1']


array([(18., 8, 307., 130, 3504, 12. , 70, 1, b'"chevrolet chevelle malibu"'),
       (15., 8, 350., 165, 3693, 11.5, 70, 1, b'"buick skylark 320"'),
       (18., 8, 318., 150, 3436, 11. , 70, 1, b'"plymouth satellite"')],
      dtype=[('f0', '<f8'), ('f1', '<i4'), ('f2', '<f8'), ('f3', '<i4'), ('f4', '<i4'), ('f5', '<f8'), ('f6', '<i4'), ('f7', '<i4'), ('f8', 'S38')])

In [45]:
# concateanting arrays columnwise and row wise - three methods 
#(concatenate, vstack)

a = np.zeros([4,4])
b = np.ones([4,4])

print(a)
print(b)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [46]:
# the same result for every function below
np.concatenate([a, b], axis=0) # default axis is row
#np.vstack([a, b]) # no need for axis
#np.r_[a, b] # only brackets, no parantheses

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [47]:
# horizontal stacking

np.concatenate([a,b], axis=1)
#np.hstack([a, b])
#np.c_[a, b]

array([[0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.]])

In [48]:
# 'np.r_' enables more complex number sequences in 1d arrays

np.r_[[1,2,3], 0, 5, [6,7,8]]

array([1, 2, 3, 0, 5, 6, 7, 8])

In [49]:
# sorting array - content of rows disturbed

arr = np.random.randint(1, 6, size=[4,3])
arr
np.sort(arr, axis=0) # each column sorted in ascending order
np.sort(arr, axis=1) # each row sorted in ascending order

array([[3, 3, 2],
       [1, 1, 5],
       [4, 5, 3],
       [1, 4, 2]])

array([[1, 1, 2],
       [1, 3, 2],
       [3, 4, 3],
       [4, 5, 5]])

array([[2, 3, 3],
       [1, 1, 5],
       [3, 4, 5],
       [1, 2, 4]])

In [50]:
# 'argsort' returns positions of items according to their ascending
# order(1, 2, 5, 8, 9, 10) while 1column array applied

x = np.array([1, 10, 5, 2, 8, 9])
sort_index = np.argsort(x) #var name is useful to make some operations
sort_index
x[sort_index]

array([0, 3, 2, 4, 5, 1], dtype=int32)

array([ 1,  2,  5,  8,  9, 10])

In [51]:
# using argsort function for sorting arrays with more than 1 columns
# sorting based on one column (1st)
arr
sorted_index_1stcol = arr[:, 0].argsort()
arr[sorted_index_1stcol] # rows not disturbed

array([[3, 3, 2],
       [1, 1, 5],
       [4, 5, 3],
       [1, 4, 2]])

array([[1, 1, 5],
       [1, 4, 2],
       [3, 3, 2],
       [4, 5, 3]])

In [52]:
# sorting based on 2 or more columns (columns listed from right to left)
# sorting by column 0, then by column 11

arr2 = np.random.randint(1, 6, size=[8,4])
arr2
lexsorted_index = np.lexsort((arr2[:,1], arr2[:,0]))
arr2[lexsorted_index] #if values in 1st column are equal the 1nd col
                        # sequence is taken into account

array([[3, 4, 5, 5],
       [2, 4, 5, 5],
       [4, 4, 4, 2],
       [2, 4, 1, 3],
       [2, 2, 4, 3],
       [4, 1, 2, 1],
       [5, 3, 1, 1],
       [3, 3, 2, 1]])

array([[2, 2, 4, 3],
       [2, 4, 5, 5],
       [2, 4, 1, 3],
       [3, 3, 2, 1],
       [3, 4, 5, 5],
       [4, 1, 2, 1],
       [4, 4, 4, 2],
       [5, 3, 1, 1]])

In [53]:
# working with dates to create YYYY-MM-DD formatted date strings

date64 = np.datetime64('2018-09-06 13:19:00')
date64

numpy.datetime64('2018-09-06T13:19:00')

In [54]:
# dropping the time part from the datetime64 object

dt64 = np.datetime64(date64 , 'D')
dt64 + 10

numpy.datetime64('2018-09-16')

In [55]:
# creating timedeltas (individual units of time)

tenminutes = np.timedelta64(10, 'm')
tenseconds = np.timedelta64(10, 's')
tennanoseconds = np.timedelta64(10, 'ns') # '10 nanoseconds'

print(tenminutes)

10 minutes


In [56]:
print('Add 10 days: ', dt64 + 10)
print('Add 10 minutes: ', dt64 + tenminutes)
print('Add 10 seconds: ', dt64 + tenseconds)

Add 10 days:  2018-09-16
Add 10 minutes:  2018-09-06T00:10
Add 10 seconds:  2018-09-06T00:00:10


In [57]:
# converting np.datetime64 back to a string
np.datetime_as_string(dt64)

'2018-09-06'

In [58]:
# filtering out the business day from the given data

print('Date: ', dt64)
print('Is it a business day?: ', dt64, np.is_busday(dt64))
print('Add 2 business days, rolling forward to nearest biz day: ',
      np.busday_offset(dt64, 2, roll='forward'))
print('Subtract 5 business days, rolling backward to nearest biz day: ',
      np.busday_offset(dt64, -5, roll='backward'))

Date:  2018-09-06
Is it a business day?:  2018-09-06 True
Add 2 business days, rolling forward to nearest biz day:  2018-09-10
Subtract 5 business days, rolling backward to nearest biz day:  2018-08-30


In [59]:
# creating date sequences

dates = np.arange(np.datetime64('2018-09-06'), np.datetime64('2018-09-21'))
print(dates)

# checking if they are business days
np.is_busday(dates)

['2018-09-06' '2018-09-07' '2018-09-08' '2018-09-09' '2018-09-10'
 '2018-09-11' '2018-09-12' '2018-09-13' '2018-09-14' '2018-09-15'
 '2018-09-16' '2018-09-17' '2018-09-18' '2018-09-19' '2018-09-20']


array([ True,  True, False, False,  True,  True,  True,  True,  True,
       False, False,  True,  True,  True,  True])

In [60]:
# converting numpy.datetime64 to datetime.datetime object helps
# extracting some things

import datetime
dt = dt64.tolist()
dt
print(dt)

print('Year: ', dt.year)
print('Day of month: ', dt.day)
print('Month of year: ', dt.month)
print('Day of Week: ', dt.weekday()) # Thursday

datetime.date(2018, 9, 6)

2018-09-06
Year:  2018
Day of month:  6
Month of year:  9
Day of Week:  3


In [61]:
# making a scalar function workin on vectors

def foo(x):
    if x % 2 == 1:
        return x**2
    else:
        return x/2

print('x = 10 returns ', foo(10))
print('x = 11 returns ', foo(11))

#print('x = [10, 11, 12] returns ', foo([10, 11, 12])) # does not work

x = 10 returns  5.0
x = 11 returns  121


In [62]:
# vectorizing foo() makes it work on vectors

foo_v = np.vectorize(foo, otypes=[float]) # 'vectorize' makes scalar
                                        # function working on arrays
print('x = [10,11,12], returns ', foo_v([10, 11, 12]))
print('x = [[10,11,12], [1, 2, 3]] returns ', foo_v([[10,11,12],[1,2,3]]))

x = [10,11,12], returns  [  5. 121.   6.]
x = [[10,11,12], [1, 2, 3]] returns  [[  5. 121.   6.]
 [  1.   1.   9.]]


In [63]:
# applying a function column wise or row wise

# creating a 4x10 random array
np.random.seed(100)
arr_x = np.random.randint(1, 10, size=[4,10])
arr_x

# defining a some function that works on 1D vector
def max_minus_min(x):
    return np.max(x) - np.min(x)

# applying along the rows(axis=1) and columns(axis=0)
# apply_along_axis parameters: func, axis, array
print('Row wise: ', np.apply_along_axis(
    max_minus_min, axis=1, arr=arr_x))
print('Column wise: ', np.apply_along_axis(
    max_minus_min, 0, arr_x))

array([[9, 9, 4, 8, 8, 1, 5, 3, 6, 3],
       [3, 3, 2, 1, 9, 5, 1, 7, 3, 5],
       [2, 6, 4, 5, 5, 4, 8, 2, 2, 8],
       [8, 1, 3, 4, 3, 6, 9, 2, 1, 8]])

Row wise:  [8 8 6 8]
Column wise:  [7 8 2 7 6 5 8 5 5 5]


In [64]:
# finding the location to insert so the array
# will remains sorted
# np.sort(np.random.randint(1,10, 20)) - sorting function

x = np.arange(10); print(x)
print('Where should 5 be inserted?: ',
      np.searchsorted(x, 5))
print('Where should 5 be inserted (right)?: ',
     np.searchsorted(x, 5, side='right')) 
                            # the last location for 5

[0 1 2 3 4 5 6 7 8 9]
Where should 5 be inserted?:  5
Where should 5 be inserted (right)?:  6


In [65]:
lst = range(10000)
probs = np.random.random(10000) # array of probes <0,1>
probs /= probs.sum() # probabilities for each probe
#probs.cumsum() # array with cumulative values of probes

# comparison of time needed for two searching functions
%timeit lst[np.searchsorted(probs.cumsum(), np.random.random())]

%timeit np.random.choice(lst, p=probs) #p is optional
# searchsorted is around 38 times faster than random.choice


36.8 µs ± 2.42 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.62 ms ± 34.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [66]:
# adding new axis to a numpy array

x = np.arange(5)
print('Original array: ', x)
x_col = x[:, np.newaxis] # introducing a new column axis
print('x_col shape: ', x_col.shape)
print(x_col)

x_row = x[np.newaxis, :] # introducing a new row axis
print('x_row shape: ', x_row.shape)
print(x_row)

Original array:  [0 1 2 3 4]
x_col shape:  (5, 1)
[[0]
 [1]
 [2]
 [3]
 [4]]
x_row shape:  (1, 5)
[[0 1 2 3 4]]


In [67]:
# using 'np.digitize' to return the index of the bin
# each element belongs to

x = np.arange(10)
bins =np.array([0, 3, 6, 9])

np.digitize(x, bins) # returns number of bins for each 
                    # array item

array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4], dtype=int32)

In [68]:
# replacing numbers which are out of given range with
# low or upper limits values
x
np.clip(x, 3, 8)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

array([3, 3, 3, 3, 4, 5, 6, 7, 8, 8])

In [69]:
# difference between np.histogram and np.bincount

x = np.array([1,1,2,2,2,4,4,5,6,6,6])
np.bincount(x) # 0 occurs 0 times, 1 occurs 2 times, ...

counts, bins = np.histogram(x, [0, 2, 4, 6, 8])
print('Counts: ', counts) # number of elements in each bin

array([0, 2, 3, 0, 2, 1, 3], dtype=int32)

Counts:  [2 3 3 3]


In [76]:
class C(object):
    x = 4
 
c = C()
#c.y = 5
c.__dict__
C.__dict__

{}

mappingproxy({'__dict__': <attribute '__dict__' of 'C' objects>,
              '__doc__': None,
              '__module__': '__main__',
              '__weakref__': <attribute '__weakref__' of 'C' objects>,
              'x': 4})