#### Import numpy as np and print the version of numpy

In [2]:
import numpy as np
np.__version__

'1.22.3'

#### How to create a 1D array?

In [3]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

#### How to create a boolean array?

In [4]:
np.full((3,3), True, dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

#### Extract all odd numbers from numpy 1D Array

In [5]:
arr = np.arange(10)
arr[arr % 2 != 0]

array([1, 3, 5, 7, 9])

#### Replace all odd numbers in arr with -1

In [6]:
arr[arr % 2 != 0] = -1
arr

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

#### Replace all odd numbers in arr with -1 without changing arr

In [7]:
arr = np.arange(10)
out = np.where(arr % 2 != 0,-1, arr)
print(out)
print(arr)


[ 0 -1  2 -1  4 -1  6 -1  8 -1]
[0 1 2 3 4 5 6 7 8 9]


#### Convert a 1D array to a 2D array with 2 rows

In [8]:
arr = np.arange(10)
arr.reshape(-1,5) # -1 meaning automatically decide the number rows/columns

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

#### Stack arrays a and b vertically

In [9]:
arr1 = np.arange(10).reshape(2, -1)
arr2 = np.repeat(1,10).reshape(2, -1)

# Method 1:
np.concatenate([arr1, arr2], axis=0) # axis 0 mean row and axis 1 mean column

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [10]:
# Method 2:
np.vstack([arr1, arr2])  # for row use vstack() and for column use hstack()

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [11]:
# Method 3:
np.r_[arr1, arr2]   # for row use r_ and for column use c_

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

#### Stack the arrays a and b horizontally.

In [12]:
# Method 1:
np.concatenate([arr1, arr2], axis=1) # axis 0 mean row and axis 1 mean column

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

In [13]:
# Method 2:
np.hstack([arr1, arr2])  # for row use vstack() and for column use hstack()


array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

In [14]:
# Method 3:
np.c_[arr1, arr2]   # for row use r_ and for column use c_

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

#### How to generate custom sequences in numpy without hardcoding?

In [15]:
arr = np.array([1,2,3])

np.r_[np.repeat(arr, 3), np.tile(arr, 3)]

array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])

In [16]:
tile = np.tile(arr, 3)

In [17]:
repeat_arr = np.repeat(arr, 3)

In [18]:
np.concatenate([repeat_arr, tile])

array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])

#### Get the common items between arr_1 and arr_2

In [19]:
arr_1 = np.array([1,2,3,2,3,4,3,4,5,6])
arr_2 = np.array([7,2,10,2,7,4,9,4,9,8])

print(np.intersect1d(arr_1, arr_2))
print(np.intersect1d(arr_2, arr_1))

[2 4]
[2 4]


#### From array a remove all items present in array b

In [20]:
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])

np.setdiff1d(a,b)

array([1, 2, 3, 4])

#### Get the positions where elements of a and b match

In [24]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

np.where(a == b)


(array([1, 3, 5, 7]),)

#### Get all items between 5 and 10 from a.

In [32]:
a = np.array([2, 6, 1, 9, 10, 3, 27])

index = np.where((a >= 5) & (a <= 10))
a[index]


array([ 6,  9, 10])

In [29]:
a[(a >= 5) & (a <= 10)]


array([ 6,  9, 10])

#### Convert the function maxx that works on two scalars, to work on two arrays.

In [35]:
def maxx(x, y):
    """
        Get the maximum of two items
    """
    if x >= y:
        return x
    else:
        return y


pair_max = np.vectorize(maxx, otypes=[float])

a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])

pair_max(a, b)


array([6., 7., 9., 8., 9., 7., 5.])

#### Swap columns 1 and 2 in the array arr.

In [37]:
arr = np.arange(9).reshape(3,3)

arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [40]:
arr[:, [0,2,1]]

array([[0, 2, 1],
       [3, 5, 4],
       [6, 8, 7]])

#### Swap rows 1 and 2 in the array arr

In [41]:
arr = np.arange(9).reshape(3, 3)
arr


array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [42]:
arr[[0,2,1], :]

array([[0, 1, 2],
       [6, 7, 8],
       [3, 4, 5]])

#### Reverse the rows of a 2D array arr.

In [43]:
arr = np.arange(9).reshape(3,3)
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [44]:
arr[::-1]

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

#### Reverse the columns of a 2D array arr.


In [45]:
arr = np.arange(9).reshape(3,3)

arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [47]:
arr[:, ::-1]

array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

#### Create a 2D array of shape 5x3 to contain random decimal numbers between 5 and 10.

In [48]:
rand_arr = np.random.randint(low=5, high=10, size=(5,3)) + np.random.random((5,3))
rand_arr

array([[7.29182927, 8.13108993, 5.61611088],
       [6.73669875, 5.09254276, 6.29012298],
       [9.50405803, 9.31092665, 9.50594261],
       [7.20299836, 9.23147018, 8.05023206],
       [5.53319201, 5.25688414, 9.701944  ]])

In [49]:
rand_arr = np.random.uniform(5,10, size=(5,3))
rand_arr

array([[6.14335906, 9.58495942, 6.21615   ],
       [6.40344497, 5.56831258, 5.43346849],
       [8.9650373 , 6.2442279 , 6.63851233],
       [8.72733859, 7.88275154, 9.04635189],
       [8.06663288, 6.23777206, 5.19384489]])

#### Print or show only 3 decimal places of the numpy array rand_arr.

In [60]:
rand_arr = np.random.random((5,3))
np.set_printoptions(precision=3)
rand_arr


array([[0.018, 0.079, 0.143],
       [0.945, 0.601, 0.521],
       [0.342, 0.816, 0.765],
       [0.552, 0.171, 0.48 ],
       [0.192, 0.961, 0.53 ]])

#### Pretty print rand_arr by suppressing the scientific notation (like 1e10)

In [63]:
np.set_printoptions(suppress=False)

np.random.seed(100)
rand_arr = np.random.random([3,3])/1e3
rand_arr

array([[5.434e-04, 2.784e-04, 4.245e-04],
       [8.448e-04, 4.719e-06, 1.216e-04],
       [6.707e-04, 8.259e-04, 1.367e-04]])

In [64]:
np.set_printoptions(suppress=True, precision=6)  # precision is optional
rand_arr


array([[0.000543, 0.000278, 0.000425],
       [0.000845, 0.000005, 0.000122],
       [0.000671, 0.000826, 0.000137]])

#### Limit the number of items printed in python numpy array a to a maximum of 6 elements.

In [67]:
np.set_printoptions(threshold=6)
a = np.arange(15)
a

array([ 0,  1,  2, ..., 12, 13, 14])

#### Print the full numpy array a without truncating.

In [77]:
np.set_printoptions(threshold=6)
a = np.arange(15)
# np.set_printoptions(threshold=sys.maxsize)
np.set_printoptions(threshold=len(a))
a


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

#### Import the iris dataset keeping the text intact.

In [78]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

# Print the first 5 rows
iris[:5]


array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.6', b'3.1', b'1.5', b'0.2', b'Iris-setosa'],
       [b'5.0', b'3.6', b'1.4', b'0.2', b'Iris-setosa']], dtype=object)

#### Extract the text column species from the 1D iris imported in previous question.

In [81]:
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)
print(iris_1d.shape)
species = np.array([row[4] for row in iris_1d])
species[:5]


(150,)


  iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)


array([b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa'], dtype='|S15')

####  Convert the 1D iris to 2D array iris_2d by omitting the species text field.



In [82]:
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)
iris_2d = np.array([row.tolist()[:4] for row in iris_1d])
iris_2d[:4]

  iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)


array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2]])

#### Find the mean, median, standard deviation of iris's sepallength (1st column)

In [89]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])

mean, median, std = np.mean(sepallength), np.median(sepallength), np.std(sepallength)
print("Mean",mean, "Median", median, "Std",  std)

Mean 5.843333333333334 Median 5.8 Std 0.8253012917851409


#### Find out if iris_2d has any missing values.


In [91]:
np.isnan(iris_2d).any()


False

#### Replace all ccurrences of nan with 0 in numpy array


In [92]:
iris_2d[np.isnan(iris_2d)] = 0
iris_2d[:4]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2]])

#### Find the unique values and the count of unique values in iris's species

In [94]:
species = np.array([row.tolist()[4] for row in iris])
np.unique(species, return_counts=True)


(array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
       dtype='|S15'),
 array([50, 50, 50]))

#### Bin the petal length (3rd) column of iris_2d to form a text array, such that if petal length is:

- Less than 3 --> 'small'
- 3-5 --> 'medium'
- '>=5 --> 'large'

In [95]:
# Bin petallength 
petal_length_bin = np.digitize(iris[:, 2].astype('float'), [0, 3, 5, 10])

# Map it to respective category
label_map = {1: 'small', 2: 'medium', 3: 'large', 4: np.nan}
petal_length_cat = [label_map[x] for x in petal_length_bin]

# View
petal_length_cat[:4]

['small', 'small', 'small', 'small']

#### Create a new column for volume in iris_2d, where volume is (pi x petallength x sepal_length^2)/3

In [96]:
# Compute volume
sepallength = iris_2d[:, 0].astype('float')
petallength = iris_2d[:, 2].astype('float')
volume = (np.pi * petallength * (sepallength**2))/3

# Introduce new dimension to match iris_2d's
volume = volume[:, np.newaxis]

# Add the new column
out = np.hstack([iris_2d, volume])

# View
out[:4]

array([[ 5.1     ,  3.5     ,  1.4     ,  0.2     , 38.132652],
       [ 4.9     ,  3.      ,  1.4     ,  0.2     , 35.200498],
       [ 4.7     ,  3.2     ,  1.3     ,  0.2     , 30.072372],
       [ 4.6     ,  3.1     ,  1.5     ,  0.2     , 33.23805 ]])

#### Randomly sample iris's species such that setose is twice the number of versicolor and virginica

In [97]:
# Get the species column
species = iris[:, 4]

# Approach 1: Generate Probablistically
np.random.seed(100)
a = np.array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])
species_out = np.random.choice(a, 150, p=[0.5, 0.25, 0.25])

# Approach 2: Probablistic Sampling (preferred)
np.random.seed(100)
probs = np.r_[np.linspace(0, 0.500, num=50), np.linspace(0.501, .750, num=50), np.linspace(.751, 1.0, num=50)]
index = np.searchsorted(probs, np.random.random(150))
species_out = species[index]
print(np.unique(species_out, return_counts=True))

(array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
      dtype=object), array([77, 37, 36]))


#### What is the value of second longest petallength of species setosa

In [98]:
# Get the species and petal length columns
petal_len_setosa = iris[iris[:, 4] == b'Iris-setosa', [2]].astype('float')

# Get the second last value
np.unique(np.sort(petal_len_setosa))[-2]

1.7

#### Sort the iris dataset based on sepallength column.

In [99]:
print(iris[iris[:, 0].argsort()][:20])


[[b'4.3' b'3.0' b'1.1' b'0.1' b'Iris-setosa']
 [b'4.4' b'3.2' b'1.3' b'0.2' b'Iris-setosa']
 [b'4.4' b'3.0' b'1.3' b'0.2' b'Iris-setosa']
 ...
 [b'4.9' b'2.5' b'4.5' b'1.7' b'Iris-virginica']
 [b'4.9' b'3.1' b'1.5' b'0.1' b'Iris-setosa']
 [b'4.9' b'3.1' b'1.5' b'0.1' b'Iris-setosa']]


#### Find the most frequent value of petal length (3rd column) in iris dataset.

In [100]:
vals, counts = np.unique(iris[:, 2], return_counts=True)
print(vals[np.argmax(counts)])

b'1.5'


#### Find the position of the first occurrence of a value greater than 1.0 in petalwidth 4th column of iris dataset.

In [101]:
np.argwhere(iris[:, 3].astype(float) > 1.0)[0]

array([50])

#### From the array a, replace all values greater than 30 to 30 and less than 10 to 10.

In [103]:
np.set_printoptions(precision=2)
np.random.seed(100)
a = np.random.uniform(1, 50, 20)

# Solution 1: Using np.clip
print(np.clip(a, a_min=10, a_max=30))

# Solution 2: Using np.where
print(np.where(a < 10, 10, np.where(a > 30, 30, a)))


[27.63 14.64 21.8  ... 10.   30.   14.43]
[27.63 14.64 21.8  ... 10.   30.   14.43]


#### Get the positions of top 5 maximum values in a given array a.

In [104]:
# Solution:
print(a.argsort())
#> [18 7 3 10 15]

# Solution 2:
print(np.argpartition(-a, 5)[:5])
#> [15 10  3  7 18]

# Below methods will get you the values.
# Method 1:
print(a[a.argsort()][-5:])

# Method 2:
print(np.sort(a)[-5:])

# Method 3:
print(np.partition(a, kth=-5)[-5:])

# Method 4:
print(a[np.argpartition(-a, 5)][:5])


[ 4 13  5 ...  3 10 15]
[15 10  3  7 18]
[41.   41.47 42.39 44.67 48.95]
[41.   41.47 42.39 44.67 48.95]
[41.   41.47 42.39 44.67 48.95]
[48.95 44.67 42.39 41.47 41.  ]


#### Compute the counts of unique values row-wise.

In [105]:
np.random.seed(100)
arr = np.random.randint(1, 11, size=(6, 10))
arr

array([[ 9,  9,  4, ...,  3,  6,  3],
       [ 3,  3,  2, ..., 10,  7,  3],
       [ 5,  2,  6, ...,  8,  2,  2],
       [ 8,  8,  1, ...,  3,  6,  9],
       [ 2,  1,  8, ...,  3,  6,  2],
       [ 9,  2,  6, ...,  6,  1, 10]])

In [106]:
def counts_of_all_values_rowwise(arr2d):
    # Unique values and its counts row wise
    num_counts_array = [np.unique(row, return_counts=True) for row in arr2d]

    # Counts of all values row wise
    return([[int(b[a == i]) if i in a else 0 for i in np.unique(arr2d)] for a, b in num_counts_array])


# Print
print(np.arange(1, 11))
counts_of_all_values_rowwise(arr)

[ 1  2  3  4  5  6  7  8  9 10]


[[1, 0, 2, 1, 1, 1, 0, 2, 2, 0],
 [2, 1, 3, 0, 1, 0, 1, 0, 1, 1],
 [0, 3, 0, 2, 3, 1, 0, 1, 0, 0],
 [1, 0, 2, 1, 0, 1, 0, 2, 1, 2],
 [2, 2, 2, 0, 0, 1, 1, 1, 1, 0],
 [1, 1, 1, 1, 1, 2, 0, 0, 2, 1]]

#### Drop all nan values from a 1D numpy array

In [108]:
np.array([1,2,3,np.nan,5,6,7,np.nan])

a[~np.isnan(a)]


array([27.63, 14.64, 21.8 , ...,  9.43, 41.  , 14.43])

#### Compute the euclidean distance between two arrays a and b.

In [109]:
a = np.array([1,2,3,4,5])
b = np.array([4,5,6,7,8])

In [111]:
dist = np.linalg.norm(a-b)
dist

6.708203932499369