# 101 numpy exercise for ML
source: https://www.machinelearningplus.com/python/101-numpy-exercises-python/

In [1]:
import numpy as np
print(np.__version__)

1.21.2


In [3]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
np.full((3,3), True, dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [6]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr = arr[arr % 2 == 1]
arr

array([1, 3, 5, 7, 9])

In [7]:
# replace inplace
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[arr % 2 == 1] = -1
arr

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

In [9]:
# replace
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
out = np.where(arr % 2 == 1, -1, arr)
print(out)
arr

[ 0 -1  2 -1  4 -1  6 -1  8 -1]


array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
arr = np.arange(10)
arr.reshape(2,-1) # -1 to automatic set cols

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [20]:
# stack vertically
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)
np.concatenate([a,b], axis=0)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [21]:
a = np.array([1,2,3])
np.r_[np.repeat(a, 3, axis=0), np.tile(a,3)]

array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])

In [23]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.intersect1d(a, b)

array([2, 4])

In [30]:
# remove any items in b from a
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])
#[i for i in a if i not in b]
np.setdiff1d(a, b)

array([1, 2, 3, 4])

In [31]:
# get index where a[i] = b[i]
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.where(a == b)

(array([1, 3, 5, 7]),)

In [40]:
a = np.array([2, 6, 1, 9, 10, 3, 27])
# 1
a[(a >= 5) & (a <= 10)]

# 2
index = np.where((a >= 5) & (a <= 10))
a[index]

array([ 6,  9, 10])

In [45]:
def maxx(x, y):
    """Get the maximum of two items"""
    if x >= y:
        return x
    else:
        return y

pair_max = np.vectorize(maxx, otypes=[float])
a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])
pair_max(a,b)

array([6., 7., 9., 8., 9., 7., 5.])

In [51]:
# swap colums
arr = np.arange(9).reshape(3,3)
arr[:,[0,2,1]]

array([[0, 2, 1],
       [3, 5, 4],
       [6, 8, 7]])

In [63]:
# reverse rows
arr = np.arange(9).reshape(3,3)
arr[::-1]

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

In [65]:
# reverse cols
arr = np.arange(9).reshape(3,3)
arr[:, ::-1]

# or
np.flip(arr, axis=1)

array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

In [67]:
np.random.uniform(5,10,(5,3))

array([[6.57922207, 8.06057486, 6.32869692],
       [8.34544431, 8.09252373, 8.45244463],
       [7.67786538, 6.45797787, 6.30237645],
       [8.38927633, 5.10213048, 9.24589382],
       [6.27740447, 9.29242398, 8.38539077]])

In [74]:
rand_arr = np.random.random((5,3))
np.set_printoptions(precision=3)
rand_arr[:]

array([[0.024, 0.593, 0.385],
       [0.612, 0.453, 0.202],
       [0.185, 0.197, 0.308],
       [0.897, 0.364, 0.589],
       [0.385, 0.907, 0.014]])

In [78]:
np.random.seed(100)
rand_arr = np.random.random([3,3])/1e3

np.set_printoptions(suppress=True, precision=6)
rand_arr

array([[0.000543, 0.000278, 0.000425],
       [0.000845, 0.000005, 0.000122],
       [0.000671, 0.000826, 0.000137]])

In [84]:
a = np.arange(15)
np.set_printoptions(threshold=3)
a

array([ 0,  1,  2, ..., 12, 13, 14])

In [85]:
# object dtype to retain text
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
iris[:3]

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa']], dtype=object)

In [86]:
iris[:,4]

array([b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', ...,
       b'Iris-virginica', b'Iris-virginica', b'Iris-virginica'],
      dtype=object)

In [94]:
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
mu, med, stdev = np.mean(sepallength), np.median(sepallength), np.std(sepallength)
print(mu, med, stdev)

5.843333333333334 5.8 0.8253012917851409


In [96]:
smin, smax = np.min(sepallength), np.max(sepallength)
(sepallength - smin) / (smax - smin)

array([0.222222, 0.166667, 0.111111, ..., 0.611111, 0.527778, 0.444444])

In [101]:
def softmax(arr):
    arr_exp = np.exp(arr)
    return arr_exp / np.sum(arr_exp)

np.set_printoptions(precision=3, threshold=1000)
smax = softmax(sepallength)
arr_sum, arr_min, arr_max = np.sum(smax), np.min(smax), np.max(smax)
print(arr_sum, arr_min, arr_max)


0.9999999999999999 0.0009973239612701972 0.03650029615086432


In [102]:
# find pecentile scores

np.percentile(sepallength, [5, 95])

array([4.6  , 7.255])

In [116]:
# randomly change 20 elements to nan 
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='object')
rows, cols = iris_2d.shape

iris_2d[np.random.randint(0,rows,20), np.random.randint(0,cols,20)] = np.nan
# iris_2d[:10]
np.isnan(iris_2d)

# possible less than 20 ?


TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [114]:
np.nan is np.nan

True