### 101 NumPy Exercises for Data Analysis (Python)
https://www.machinelearningplus.com/python/101-numpy-exercises-python/

In [15]:
import numpy as np

### 41.Create a new column for volume in iris_2d, where volume is (pi x petallength x sepal_length^2)/3

In [16]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='object')

In [17]:
# Compute volume
sepallength = iris_2d[:, 0].astype('float')
petallength = iris_2d[:, 2].astype('float')
volume = (np.pi * petallength * (sepallength**2))/3

In [18]:
# Introduce new dimension to match iris_2d's
# np.newaxis, create a new axis
volume = volume[:, np.newaxis]

In [19]:
# Add the new column
out = np.hstack([iris_2d, volume])

In [20]:
out[:4]

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa',
        38.13265162927291],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa',
        35.200498485922445],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa', 30.0723720777127],
       [b'4.6', b'3.1', b'1.5', b'0.2', b'Iris-setosa',
        33.238050274980004]], dtype=object)

### 42. Randomly sample iris's species such that setose is twice the number of versicolor and virginica

In [21]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

In [22]:
# Get the species column
species = iris[:, 4]

In [23]:
# Approach 1: Generate Probablistically
np.random.seed(100)
a = np.array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])
species_out = np.random.choice(a, 150, p=[0.5, 0.25, 0.25])

In [24]:
# Approach 2: Probablistic Sampling (preferred)
np.random.seed(100)
probs = np.r_[np.linspace(0, 0.500, num=50), np.linspace(0.501, .750, num=50), np.linspace(.751, 1.0, num=50)]
index = np.searchsorted(probs, np.random.random(150))
species_out = species[index]
print(np.unique(species_out, return_counts=True))

(array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
      dtype=object), array([77, 37, 36], dtype=int64))


### 43. What is the value of second last petallength of species setosa

In [25]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [26]:
# Get the species and petal length columns
petal_len_setosa = iris[iris[:, 4] == b'Iris-setosa', [2]].astype('float')

In [27]:
# Get the second last value
np.unique(np.sort(petal_len_setosa))[-2]

1.7

### 44. Sort the iris dataset based on sepallength column.

In [28]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [29]:
# Sort by column position 0: SepalLength
iris[iris[:,0].argsort()][:20]

array([[b'4.3', b'3.0', b'1.1', b'0.1', b'Iris-setosa'],
       [b'4.4', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.4', b'3.0', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.4', b'2.9', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.5', b'2.3', b'1.3', b'0.3', b'Iris-setosa'],
       [b'4.6', b'3.6', b'1.0', b'0.2', b'Iris-setosa'],
       [b'4.6', b'3.1', b'1.5', b'0.2', b'Iris-setosa'],
       [b'4.6', b'3.4', b'1.4', b'0.3', b'Iris-setosa'],
       [b'4.6', b'3.2', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.6', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.0', b'1.4', b'0.1', b'Iris-setosa'],
       [b'4.8', b'3.0', b'1.4', b'0.3', b'Iris-setosa'],
       [b'4.8', b'3.4', b'1.9', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.4', b'1.6', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.1', b'1.6', b'0.2', b'Iris-setosa'],
       [b'4.9', b'2.4', b'3.3', b'1.0', b'Iris-versicolor'],
       [b'4.9', b'2.5', b'4

### 45. Find the most frequent value of petal length (3rd column) in iris dataset.

In [30]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [31]:
vals, counts = np.unique(iris[:, 2], return_counts=True)
print(vals[np.argmax(counts)])

b'1.5'


### 46. Find the position of the first occurrence of a value greater than 1.0 in petalwidth 4th column of iris dataset.

In [32]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

In [33]:
# Solution: (edit: changed argmax to argwhere. )
np.argwhere(iris[:, 3].astype(float) > 1.0)[0]

array([50], dtype=int64)

### 47. From the array a, replace all values greater than 30 to 30 and less than 10 to 10.

In [36]:
np.set_printoptions(precision=2)
np.random.seed(100)
a = np.random.uniform(1,50, 20)
a

array([27.63, 14.64, 21.8 , 42.39,  1.23,  6.96, 33.87, 41.47,  7.7 ,
       29.18, 44.67, 11.25, 10.08,  6.31, 11.77, 48.95, 40.77,  9.43,
       41.  , 14.43])

In [37]:
# Solution 1: Using np.clip
np.clip(a, a_min=10, a_max=30)

array([27.63, 14.64, 21.8 , 30.  , 10.  , 10.  , 30.  , 30.  , 10.  ,
       29.18, 30.  , 11.25, 10.08, 10.  , 11.77, 30.  , 30.  , 10.  ,
       30.  , 14.43])

In [38]:
# Solution 2: Using np.where
print(np.where(a < 10, 10, np.where(a > 30, 30, a)))

[27.63 14.64 21.8  30.   10.   10.   30.   30.   10.   29.18 30.   11.25
 10.08 10.   11.77 30.   30.   10.   30.   14.43]
