In [4]:
import numpy as np

# Set random seed so that results are reproducible 
np.random.seed(0)

### 1. Generate three separate 1-dimensional arrays of 10,000 elements each.
- Array 1 should contain numbers randomly drawn from the uniform distribution between 1 and 100
- Array 2 should contain numbers randomly drawn from the normal distribution, with a mean of 0 and a standard deviation of 1
- Array 3 should contain numbers randomly drawn from the binomial distribution, where n=10 and p=0.5

### Solution

In [8]:
np.random.seed(0)

arr1 = np.random.uniform(1, 100, 10000)
arr2 = np.random.normal(0, 1, 10000)
arr3 = np.random.binomial(10, .5, 10000)

print(arr1)
print(arr2)
print(arr3)

[55.33253689 71.80374727 60.67357423 ... 76.08452255  3.35495532
 81.54393292]
[-0.72582032  0.56347552 -0.43563209 ... -0.48621622 -2.04550028
  1.03931951]
[4 5 5 ... 5 4 4]


### 2. For each array, randomly sample 1,000 rows without replacement and print shape of new arrays (hint: `np.random.choice()`)

### Solution

In [15]:
np.random.seed(0)

arr1_samp = np.random.choice(arr1, 1000, replace=False)
arr2_samp = np.random.choice(arr2, 1000, replace=False)
arr3_samp = np.random.choice(arr3, 1000, replace=False)

print(arr1_samp.shape)
print(arr2_samp.shape)
print(arr3_samp.shape)

(1000,)
(1000,)
(1000,)


### 3. For each of the three samples:  
- Calculate the mean, median, and standard deviation 
- Find the index and the value of the largest element (hint: `np.argmax()`)  
- Find the value of the 75th percentile (hint: `np.percentile()`)  

### Solution

In [16]:
for i, samp in enumerate([arr1_samp, arr2_samp, arr3_samp]):
    print(f"\nSample #{i+1}")
    print(f"Mean: {np.mean(samp)}")
    print(f"Median: {np.median(samp)}")
    print(f"Standard deviation: {np.std(samp)}")
    print(f"Index of largest: {np.argmax(samp)}")
    print(f"Value of largest: {np.max(samp)}")
    print(f"75th Percentile: {np.percentile(samp, 75)}")


Sample #1
Mean: 50.5667844252836
Median: 51.154758164405166
Standard deviation: 28.698605751811936
Index of largest: 920
Value of largest: 99.99643783571764
75th Percentile: 74.66284426458829

Sample #2
Mean: 0.031445148760512014
Median: -0.046725721435603405
Standard deviation: 0.9982063664273709
Index of largest: 625
Value of largest: 3.598310102305438
75th Percentile: 0.7124560005954285

Sample #3
Mean: 5.092
Median: 5.0
Standard deviation: 1.5380299086818825
Index of largest: 253
Value of largest: 10
75th Percentile: 6.0


### 4. Sort each of the three samples in increasing order, then combine them together to create a 1,000 by 3 array. (hint: `np.sort()` and `np.stack()`)

### Solution

In [18]:
arr = np.stack([np.sort(arr1_samp), np.sort(arr2_samp), np.sort(arr3_samp)], axis=1)
print(arr)

[[ 1.00717251 -3.1699786   1.        ]
 [ 1.01488773 -2.53176467  1.        ]
 [ 1.04860542 -2.40475651  1.        ]
 ...
 [99.80425599  3.03075787  9.        ]
 [99.98104923  3.46838338  9.        ]
 [99.99643784  3.5983101  10.        ]]


### 5. Check the min and max value of array and then replace all negative values with 0. Replace all values that exceed 10 with 10. Once done, recheck the min and max again.

### Solution

In [21]:
print(arr.min())
print(arr.max())

arr[arr < 0] = 0
arr[arr > 10] = 10

print(arr.min())
print(arr.max())

-3.169978597207576
99.99643783571764
0.0
10.0


### 6. Subtract 5 from all values that exceed 5. (hint: `np.where()`)

### Solution

In [22]:
arr = np.where(arr > 5, arr-5, arr)
arr

array([[1.00717251, 0.        , 1.        ],
       [1.01488773, 0.        , 1.        ],
       [1.04860542, 0.        , 1.        ],
       ...,
       [5.        , 3.03075787, 4.        ],
       [5.        , 3.46838338, 4.        ],
       [5.        , 3.5983101 , 5.        ]])

### 7. Produce a fourth column that is the sum of the first two columns.  (hint: `np.hstack()`)

### Solution

In [23]:
new_arr = np.expand_dims(arr[:, 0] + arr[:, 1], axis=1)
arr = np.hstack((arr, new_arr))
arr

array([[1.00717251, 0.        , 1.        , 1.00717251],
       [1.01488773, 0.        , 1.        , 1.01488773],
       [1.04860542, 0.        , 1.        , 1.04860542],
       ...,
       [5.        , 3.03075787, 4.        , 8.03075787],
       [5.        , 3.46838338, 4.        , 8.46838338],
       [5.        , 3.5983101 , 5.        , 8.5983101 ]])

### 8. Use broadcasting to add 1 to each element of the first column, 2 to each element of the second column, 3 to each element of the third column, and 4 to each element of the fourth column.

### Solution

In [24]:
arr += np.array([1,2,3,4])
arr

array([[ 2.00717251,  2.        ,  4.        ,  5.00717251],
       [ 2.01488773,  2.        ,  4.        ,  5.01488773],
       [ 2.04860542,  2.        ,  4.        ,  5.04860542],
       ...,
       [ 6.        ,  5.03075787,  7.        , 12.03075787],
       [ 6.        ,  5.46838338,  7.        , 12.46838338],
       [ 6.        ,  5.5983101 ,  8.        , 12.5983101 ]])