### 101 NumPy Exercises for Data Analysis (Python)
https://www.machinelearningplus.com/python/101-numpy-exercises-python/

In [2]:
import numpy as np

### 41.Create a new column for volume in iris_2d, where volume is (pi x petallength x sepal_length^2)/3

In [3]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='object')

In [4]:
# Compute volume
sepallength = iris_2d[:, 0].astype('float')
petallength = iris_2d[:, 2].astype('float')
volume = (np.pi * petallength * (sepallength**2))/3

In [5]:
# Introduce new dimension to match iris_2d's
# np.newaxis, create a new axis
volume = volume[:, np.newaxis]

In [6]:
# Add the new column
out = np.hstack([iris_2d, volume])

In [7]:
out[:4]

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa',
        38.13265162927291],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa',
        35.200498485922445],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa', 30.0723720777127],
       [b'4.6', b'3.1', b'1.5', b'0.2', b'Iris-setosa',
        33.238050274980004]], dtype=object)

### 42. Randomly sample iris's species such that setose is twice the number of versicolor and virginica

In [8]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

In [9]:
# Get the species column
species = iris[:, 4]

In [10]:
# Approach 1: Generate Probablistically
np.random.seed(100)
a = np.array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])
species_out = np.random.choice(a, 150, p=[0.5, 0.25, 0.25])

In [11]:
# Approach 2: Probablistic Sampling (preferred)
np.random.seed(100)
probs = np.r_[np.linspace(0, 0.500, num=50), np.linspace(0.501, .750, num=50), np.linspace(.751, 1.0, num=50)]
index = np.searchsorted(probs, np.random.random(150))
species_out = species[index]
print(np.unique(species_out, return_counts=True))

(array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
      dtype=object), array([77, 37, 36], dtype=int64))


### 43. What is the value of second last petallength of species setosa

In [12]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [13]:
# Get the species and petal length columns
petal_len_setosa = iris[iris[:, 4] == b'Iris-setosa', [2]].astype('float')

In [14]:
# Get the second last value
np.unique(np.sort(petal_len_setosa))[-2]

1.7

### 44. Sort the iris dataset based on sepallength column.

In [15]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [16]:
# Sort by column position 0: SepalLength
iris[iris[:,0].argsort()][:20]

array([[b'4.3', b'3.0', b'1.1', b'0.1', b'Iris-setosa'],
       [b'4.4', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.4', b'3.0', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.4', b'2.9', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.5', b'2.3', b'1.3', b'0.3', b'Iris-setosa'],
       [b'4.6', b'3.6', b'1.0', b'0.2', b'Iris-setosa'],
       [b'4.6', b'3.1', b'1.5', b'0.2', b'Iris-setosa'],
       [b'4.6', b'3.4', b'1.4', b'0.3', b'Iris-setosa'],
       [b'4.6', b'3.2', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.6', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.0', b'1.4', b'0.1', b'Iris-setosa'],
       [b'4.8', b'3.0', b'1.4', b'0.3', b'Iris-setosa'],
       [b'4.8', b'3.4', b'1.9', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.4', b'1.6', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.1', b'1.6', b'0.2', b'Iris-setosa'],
       [b'4.9', b'2.4', b'3.3', b'1.0', b'Iris-versicolor'],
       [b'4.9', b'2.5', b'4

### 45. Find the most frequent value of petal length (3rd column) in iris dataset.

In [17]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [18]:
vals, counts = np.unique(iris[:, 2], return_counts=True)
print(vals[np.argmax(counts)])

b'1.5'


### 46. Find the position of the first occurrence of a value greater than 1.0 in petalwidth 4th column of iris dataset.

In [19]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

In [21]:
# Solution: (edit: changed argmax to argwhere. )
# np.where(condition)
np.argwhere(iris[:, 3].astype(float) > 1.0)[0]

array([50], dtype=int64)

### 47. From the array a, replace all values greater than 30 to 30 and less than 10 to 10.

In [22]:
np.set_printoptions(precision=2)
np.random.seed(100)
a = np.random.uniform(1,50, 20)
a

array([27.63, 14.64, 21.8 , 42.39,  1.23,  6.96, 33.87, 41.47,  7.7 ,
       29.18, 44.67, 11.25, 10.08,  6.31, 11.77, 48.95, 40.77,  9.43,
       41.  , 14.43])

In [23]:
# Solution 1: Using np.clip
np.clip(a, a_min=10, a_max=30)

array([27.63, 14.64, 21.8 , 30.  , 10.  , 10.  , 30.  , 30.  , 10.  ,
       29.18, 30.  , 11.25, 10.08, 10.  , 11.77, 30.  , 30.  , 10.  ,
       30.  , 14.43])

In [24]:
# Solution 2: Using np.where
# np.where(condition, x, y)
print(np.where(a < 10, 10, np.where(a > 30, 30, a)))

[27.63 14.64 21.8  30.   10.   10.   30.   30.   10.   29.18 30.   11.25
 10.08 10.   11.77 30.   30.   10.   30.   14.43]


### 48. Get the positions of top 5 maximum values in a given array a.

In [53]:
np.random.seed(100)
a = np.random.uniform(1,50, 20)
a

array([27.63, 14.64, 21.8 , 42.39,  1.23,  6.96, 33.87, 41.47,  7.7 ,
       29.18, 44.67, 11.25, 10.08,  6.31, 11.77, 48.95, 40.77,  9.43,
       41.  , 14.43])

In [54]:
# Solution 1:
print(a.argsort())

[ 4 13  5  8 17 12 11 14 19  1  2  0  9  6 16 18  7  3 10 15]


In [55]:
# Solution 2:
a[np.argpartition(-a, 5)[:5]]

array([48.95, 44.67, 42.39, 41.47, 41.  ])

In [56]:
a[np.argpartition(a, -5)[-5:]]

array([41.  , 41.47, 42.39, 44.67, 48.95])

In [57]:
-a

array([-27.63, -14.64, -21.8 , -42.39,  -1.23,  -6.96, -33.87, -41.47,
        -7.7 , -29.18, -44.67, -11.25, -10.08,  -6.31, -11.77, -48.95,
       -40.77,  -9.43, -41.  , -14.43])

In [58]:
a[np.argpartition(-a, 5)]

array([48.95, 44.67, 42.39, 41.47, 41.  , 40.77, 33.87, 29.18, 27.63,
       14.64,  7.7 , 11.25, 10.08,  6.31, 11.77,  6.96,  1.23,  9.43,
       21.8 , 14.43])

In [59]:
#np.argpartition(a, 4), quick sort,  return index, from smallest to biggest, top 5 small
a = np.array([1,3,2,5,4,6,7,9,8])
a[np.argpartition(a, 4)]

array([3, 1, 2, 4, 5, 6, 7, 9, 8])

### 49. Compute the counts of unique values row-wise.

In [60]:
np.random.seed(100)
arr = np.random.randint(1,11,size=(6, 10))
arr

array([[ 9,  9,  4,  8,  8,  1,  5,  3,  6,  3],
       [ 3,  3,  2,  1,  9,  5,  1, 10,  7,  3],
       [ 5,  2,  6,  4,  5,  5,  4,  8,  2,  2],
       [ 8,  8,  1,  3, 10, 10,  4,  3,  6,  9],
       [ 2,  1,  8,  7,  3,  1,  9,  3,  6,  2],
       [ 9,  2,  6,  5,  3,  9,  4,  6,  1, 10]])

In [61]:
def counts_of_all_values_rowwise(arr2d):
    # Unique values and its counts row wise
    num_counts_array = [np.unique(row, return_counts=True) for row in arr2d]

    # Counts of all values row wise
    return([[int(b[a==i]) if i in a else 0 for i in np.unique(arr2d)] for a, b in num_counts_array])

# Print
print(np.arange(1,11))
counts_of_all_values_rowwise(arr)

[ 1  2  3  4  5  6  7  8  9 10]


[[1, 0, 2, 1, 1, 1, 0, 2, 2, 0],
 [2, 1, 3, 0, 1, 0, 1, 0, 1, 1],
 [0, 3, 0, 2, 3, 1, 0, 1, 0, 0],
 [1, 0, 2, 1, 0, 1, 0, 2, 1, 2],
 [2, 2, 2, 0, 0, 1, 1, 1, 1, 0],
 [1, 1, 1, 1, 1, 2, 0, 0, 2, 1]]

In [62]:
# Example 2:
arr = np.array([np.array(list('bill clinton')), np.array(list('narendramodi')), np.array(list('jjayalalitha'))])
print(np.unique(arr))
counts_of_all_values_rowwise(arr)

[' ' 'a' 'b' 'c' 'd' 'e' 'h' 'i' 'j' 'l' 'm' 'n' 'o' 'r' 't' 'y']


[[1, 0, 1, 1, 0, 0, 0, 2, 0, 3, 0, 2, 1, 0, 1, 0],
 [0, 2, 0, 0, 2, 1, 0, 1, 0, 0, 1, 2, 1, 2, 0, 0],
 [0, 4, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 0, 0, 1, 1]]

### 50. Convert array_of_arrays into a flat linear 1d array.

In [68]:
arr1 = np.arange(3)
arr2 = np.arange(3,7)
arr3 = np.arange(7,10)

array_of_arrays = np.array([arr1, arr2, arr3])
array_of_arrays

array([array([0, 1, 2]), array([3, 4, 5, 6]), array([7, 8, 9])],
      dtype=object)

In [69]:
# Solution 1
arr_2d = np.array([a for arr in array_of_arrays for a in arr])
arr_2d

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [70]:
# Solution 2:
arr_2d = np.concatenate(array_of_arrays)
arr_2d

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])