### Filtering

In NumPy, you filter an array using a boolean index list.

A boolean index list is a list of booleans corresponding to indexes in the array.

If the value at an index is True that element is contained in the filtered array, if the value at that index is False that element is excluded from the filtered array.

In [18]:
import numpy as np

arr = np.array([41, 42, 43, 44])

x = [True, False, True, False]

newarr = arr[x]

print(newarr)

[41 43]


In [19]:
a = np.array([1,2,3,4,5])
a[a>3]

array([4, 5])

In [23]:
# Using generators:

def filter_fromiter(arr, k):
    return np.fromiter((x for x in arr if x < k), dtype=arr.dtype)
filter_fromiter(np.array([1,2,3,4,5,6,7,8]),3)

array([1, 2])

- np.fromiter(iterable, int)

In [28]:
iterable = (x*x for x in range(5) for y in range(10) if y<x)
#print(iterable)
np.fromiter(iterable, float)

array([ 1.,  4.,  4.,  9.,  9.,  9., 16., 16., 16., 16.])

In [29]:
iterable = (x*x for x in range(5) for y in range(10) if y<x)

np.fromiter(iterable, int)

array([ 1,  4,  4,  9,  9,  9, 16, 16, 16, 16])

In [32]:
for i in range(5): #0 #1
    for j in range(10):#0,1,2,3,---10 cond(false) #0 cond(true) 1 cond(false)
        if j<i:
            print(i*i)#1*1

1
4
4
9
9
9
16
16
16
16


In [34]:
# boolean
def filter_mask(arr, k):
    return arr[arr < k]
filter_mask(np.array([1,2,3]),2)

array([1])

In [38]:
#Using np.where():
def filter_where(arr, k):
    return arr[np.where(arr < k)]
filter_where(np.array([1,2,3,89,45,23,12,13,26]),35)

array([ 1,  2,  3, 23, 12, 13, 26])

In [39]:
#Using np.nonzero()
def filter_nonzero(arr, k):
    return arr[np.nonzero(arr < k)]
filter_nonzero(np.array([1,2,3,89,45,23,12,13,26]),35)

array([ 1,  2,  3, 23, 12, 13, 26])

In [1]:
import numpy
import math
def prime(upto=100000):
    return filter(lambda num: (num % numpy.arange(2,1+int(math.sqrt(num)))).all(), range(2,upto+1))

In [None]:
filter(lambda num: (num % numpy.arange(2,1+int(math.sqrt(num)))).all(), range(2,upto+1))

### Aggregate and Statistical Functions

In [40]:
arr1 = np.array([10, 20, 30, 40, 50])
print(arr1)

arr2 = np.array([[0, 10, 20], [30, 40, 50], [60, 70, 80]])
print(arr2)

arr3 = np.array([[14, 6, 9, -12, 19, 72],[-9, 8, 22, 0, 99, -11]])
print(arr3)

[10 20 30 40 50]
[[ 0 10 20]
 [30 40 50]
 [60 70 80]]
[[ 14   6   9 -12  19  72]
 [ -9   8  22   0  99 -11]]


In [42]:
print(arr1.sum())
print(arr2.sum())
print(arr3.sum())

150
360
217


In [43]:
print(arr2.sum(axis = 0))
arr3.sum(axis = 0)

[ 90 120 150]


array([  5,  14,  31, -12, 118,  61])

In [44]:
print(arr2.sum(axis = 1))
arr3.sum(axis = 1)

[ 30 120 210]


array([108, 109])

In [45]:
print(arr2.sum(0))
print(arr2.sum(1))
print(arr3.sum(1))
arr3.sum(0)

[ 90 120 150]
[ 30 120 210]
[108 109]


array([  5,  14,  31, -12, 118,  61])

In [46]:
#Python NumPy average
print(np.average(arr1))
print(np.average(arr2))
np.average(arr3)

30.0
40.0


18.083333333333332

In [47]:
print(np.average(arr2, axis = 0))
np.average(arr2, axis = 1)

[30. 40. 50.]


array([10., 40., 70.])

In [48]:
arr2

array([[ 0, 10, 20],
       [30, 40, 50],
       [60, 70, 80]])

In [49]:
print(np.average(arr2, 0))
np.average(arr2, 1)

[30. 40. 50.]


array([10., 40., 70.])

In [51]:

print(np.prod(arr1))
np.prod(arr2) # any number multiply by zero gives zero

1.0
12000000


0

In [52]:
x = np.array([[1, 2, 3], [4, 5, 6]])
print(np.prod(x))

y = np.random.randint(1, 10, size = (5, 5))
np.prod(y)

720


1746665472

In [54]:
print(np.prod(x, axis = 0))
print(np.prod(x, axis = 1))
print(np.prod(y, axis = 0))
print(np.prod(y, axis = 1))

[ 4 10 18]
[  6 120]
[ 360 5292 7776 3024  240]
[18144  4200   756   144  1296]


In [55]:
print(arr1.min())
print(arr2.min())
print(arr3.min())

10
0
-12


In [56]:
print(arr1.min(axis=0))
print(arr2.min(1))
print(arr3.min(0))

10
[ 0 30 60]
[ -9   6   9 -12  19 -11]


In [57]:
arr2

array([[ 0, 10, 20],
       [30, 40, 50],
       [60, 70, 80]])

In [58]:
print(arr1.max(axis=0))
print(arr2.max(1))
print(arr3.max(0))

50
[20 50 80]
[14  8 22  0 99 72]


In [59]:
print(arr1.mean())
print(arr2.mean())
print(arr3.mean())

30.0
40.0
18.083333333333332


In [60]:
print(arr1.mean(axis=0))
print(arr2.mean(1))
print(arr3.mean(0))

30.0
[10. 40. 70.]
[ 2.5  7.  15.5 -6.  59.  30.5]


In [61]:
np.median(arr1)
np.median(arr2)
np.median(arr3)

# Median of x and Y axis
np.median(arr2, axis = 0)
np.median(arr2, axis = 1)

array([10., 40., 70.])

In [67]:
#Formula of Python Nampy var is : (item1 – mean)2 + …(itemN – mean)2 / total items
print(arr1.var(),"var")
arr2.var()
arr3.var()

x.var()
y.var()

200.0 var


6.854400000000002

In [63]:
arr1

array([10, 20, 30, 40, 50])

In [64]:
np.mean(arr1)

30.0

In [65]:
(10-30)**2+(20-30)**2+(30-30)**2+(40-30)**2+(50-30)**2

1000

In [66]:
1000/5

200.0

In [68]:
#Python NumPy std
#The Python NumPy std function returns the standard deviation of a given array or in a given axis. The formula behind this is the square root of variance.


print(arr1.std())
print(arr2.std())
print(arr3.std())

14.142135623730951
25.81988897471611
32.44086500422303


In [69]:
#Python NumPy cumsum
#The Python Numpy cumsum function returns the cumulative sum of a given array or in a given axis.
print(arr1.cumsum())
arr2.cumsum()

[ 10  30  60 100 150]


array([  0,  10,  30,  60, 100, 150, 210, 280, 360], dtype=int32)

In [70]:
arr1

array([10, 20, 30, 40, 50])

In [71]:
print(arr2.cumsum(axis = 0))
arr2.cumsum(axis = 1)

[[  0  10  20]
 [ 30  50  70]
 [ 90 120 150]]


array([[  0,  10,  30],
       [ 30,  70, 120],
       [ 60, 130, 210]], dtype=int32)

In [72]:
print(arr1.cumprod())
arr2.cumprod()

[      10      200     6000   240000 12000000]


array([0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [82]:
#Python NumPy argmin
#The Python NumPy argmin returns the index position of the minimum value in a given array 
#or a given axis. To demonstrate this and the next function, 
#we declared two more arrays of random values



In [84]:
arr4 = np.random.randint(9, size = (9))
print(arr4)

arr5 = np.random.randint(25, size = (5, 5))
arr5

[1 3 3 6 4 4 0 6 2]


array([[ 1, 22,  2,  4,  2],
       [ 3, 14,  8, 14,  4],
       [15,  6,  7, 14, 19],
       [ 4,  4, 18,  2, 21],
       [ 6, 19, 24, 15, 16]])

In [85]:
arr4.argmin()# index

6

In [87]:
a = arr4.argmin()
arr4[a]

0

In [88]:
arr5.argmin(axis = 0)

array([0, 3, 0, 3, 0], dtype=int64)

In [89]:
arr4.argmax()

3

In [91]:
v = arr4.argmax()
arr4[v]

6

In [92]:
arr3.argmax(axis = 0)

array([0, 1, 1, 1, 1, 0], dtype=int64)

In [None]:
#Python NumPy corrcoef
#Python NumPy corrcoef function find and returns the correlation coefficient of an array. 



Correlation coefficients are used in statistics to measure how strong a relationship is between two variables. There are several types of correlation coefficient, but the most popular is Pearson’s. Pearson’s correlation (also called Pearson’s R) is a correlation coefficient commonly used in linear regression. If you’re starting out in statistics, you’ll probably learn about Pearson’s R first. In fact, when anyone refers to the correlation coefficient, they are usually talking about Pearson’s.





Correlation coefficient formulas are used to find how strong a relationship is between data. The formulas return a value between -1 and 1, where:

1 indicates a strong positive relationship.
-1 indicates a strong negative relationship.
A result of zero indicates no relationship at all.

In [93]:
np.corrcoef(arr1)

1.0

In [95]:
np.corrcoef(arr2)

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [96]:
np.corrcoef(arr3)

array([[ 1.        , -0.12768134],
       [-0.12768134,  1.        ]])

### Saving data

In [97]:
data = np.linspace(0,1,201)

In [100]:
np.savetxt('A_data.dat', data)

In [103]:
x = np.linspace(0, 1, 201)
y = np.random.random(201)

np.savetxt('AA_data.dat', [x, y])

In [104]:
data = np.column_stack((x, y))
np.savetxt('AA_data.dat', data)

In [105]:
header = "X-Column, Y-Column"
np.savetxt('AB_data.dat', data, header=header)

In [106]:
header = "X-Column, Y-Column\n"
header += "This is a second line"
np.savetxt('AB_data.dat', data, header=header)

- Loading Saved Data with Numpy

In [107]:
data = np.loadtxt('AB_data.dat')
x = data[:, 0]
y = data[:, 1]

In [108]:
print(x)

[0.    0.005 0.01  0.015 0.02  0.025 0.03  0.035 0.04  0.045 0.05  0.055
 0.06  0.065 0.07  0.075 0.08  0.085 0.09  0.095 0.1   0.105 0.11  0.115
 0.12  0.125 0.13  0.135 0.14  0.145 0.15  0.155 0.16  0.165 0.17  0.175
 0.18  0.185 0.19  0.195 0.2   0.205 0.21  0.215 0.22  0.225 0.23  0.235
 0.24  0.245 0.25  0.255 0.26  0.265 0.27  0.275 0.28  0.285 0.29  0.295
 0.3   0.305 0.31  0.315 0.32  0.325 0.33  0.335 0.34  0.345 0.35  0.355
 0.36  0.365 0.37  0.375 0.38  0.385 0.39  0.395 0.4   0.405 0.41  0.415
 0.42  0.425 0.43  0.435 0.44  0.445 0.45  0.455 0.46  0.465 0.47  0.475
 0.48  0.485 0.49  0.495 0.5   0.505 0.51  0.515 0.52  0.525 0.53  0.535
 0.54  0.545 0.55  0.555 0.56  0.565 0.57  0.575 0.58  0.585 0.59  0.595
 0.6   0.605 0.61  0.615 0.62  0.625 0.63  0.635 0.64  0.645 0.65  0.655
 0.66  0.665 0.67  0.675 0.68  0.685 0.69  0.695 0.7   0.705 0.71  0.715
 0.72  0.725 0.73  0.735 0.74  0.745 0.75  0.755 0.76  0.765 0.77  0.775
 0.78  0.785 0.79  0.795 0.8   0.805 0.81  0.815 0.