# NumPy Pair Problem

For today's pair, we will use NumPy to complete the following tasks consecutively.

In [1]:
import numpy as np

# Set random seed so that results are reproducible 
np.random.seed(0)

### 1. Generate three separate 1-dimensional arrays of 10,000 elements each.
* Array 1 should contain numbers randomly drawn from the uniform distribution between 1 and 100
* Array 2 should contain numbers randomly drawn from the normal distribution, with a mean of 0 and a standard deviation of 1
* Array 3 should contain numbers randomly drawn from the binomial distribution, where n=10 and p=0.5

In [3]:
uniform = np.random.uniform(1, 100, 10000)
normal = np.random.normal(0, 1, 10000)
binomial = np.random.binomial(10, 0.5, 10000)

print("Array 1:", uniform)
print("Array 2:", normal)
print("Array 3:", binomial)

Array 1: [55.33253689 71.80374727 60.67357423 ... 76.08452255  3.35495532
 81.54393292]
Array 2: [-0.72582032  0.56347552 -0.43563209 ... -0.48621622 -2.04550028
  1.03931951]
Array 3: [4 5 5 ... 5 4 4]


### 2. For each array, randomly sample 1,000 rows without replacement. (hint: `np.random.choice()`)


In [4]:
uniform_sample = np.random.choice(uniform, 1000, replace=False)
normal_sample = np.random.choice(normal, 1000, replace=False)
binomial_sample = np.random.choice(binomial, 1000, replace=False)

print("Sample 1:", uniform_sample)
print("Sample 2:", normal_sample)
print("Sample 3:", binomial_sample)

Sample 1: [25.78942307 22.82505784 72.23603848 73.69670482 29.80696432 40.46328307
 95.02707208 92.90497616  7.1161279  84.14966865 12.45730708 62.66974529
 32.85071757 13.11415817 47.35392793 51.24572161 35.49394315  4.13861793
 58.17212262 54.1662897  53.62897599 15.15598922 53.13181885 49.89268321
 58.43239937 50.96385197 97.16788935 45.94292407 80.17328277 58.66488379
 96.20521694 71.60202801 94.59766607 79.35391927 50.42776529 75.97667353
 55.83073823 63.61838845 56.75494285 25.74622163 22.63701812 49.74766884
 17.4477183  54.19479602 70.04544854 29.59208571 14.02808819 31.8949816
 15.44932668 33.11495625 22.35188105 36.02808422 80.25390803  7.26470853
 46.19546924 16.08311586 11.09993361 27.1910056   3.94249623 23.9293089
 20.60029719 56.22734122 76.9714287  55.14805612 62.13599895  5.69562255
 15.91326643  8.75673754 16.75547746 35.44828682 34.2143188  47.84115487
 11.84739951 46.71936694 10.97964717 81.49233246 93.8286463  42.20160539
 55.54369769 54.73784041  3.84641057  2.529

### 3. For each of the three samples:  
- Calculate the mean, median, and standard deviation 
- Find the index and the value of the largest element (hint: `np.argmax()`)  
- Find the value of the 75th percentile (hint: `np.percentile()`)  

In [6]:

uniform_mean = np.mean(uniform_sample)
uniform_median = np.median(uniform_sample)
uniform_std = np.std(uniform_sample)

normal_mean = np.mean(normal_sample)
normal_median = np.median(normal_sample)
normal_std = np.std(normal_sample)

binomial_mean = np.mean(binomial_sample)
binomial_median = np.median(binomial_sample)
binomial_std = np.std(binomial_sample)


In [7]:
max_index_uniform = np.argmax(uniform_sample)
max_index_normal = np.argmax(normal_sample)
max_index_binomial = np.argmax(binomial_sample)

max_value_uniform = uniform_sample[max_index_uniform]
max_value_normal = normal_sample[max_index_normal]
max_value_binomial = binomial_sample[max_index_binomial]


print(max_index_uniform, max_index_normal, max_index_binomial)
print(max_value_uniform, max_value_normal, max_value_binomial)

255 628 7
99.83117493278014 2.699568441630259 10


In [8]:
uniform_75_percentile = np.percentile(uniform_sample, 75)
normal_75_percentile = np.percentile(normal_sample, 75)
binomial_75_percentile = np.percentile(binomial_sample, 75)

print(uniform_75_percentile, normal_75_percentile, binomial_75_percentile)

75.20925502271464 0.6323050377198595 6.0


### 4. Sort each of the three samples in increasing order, then combine them together to create a 1,000 by 3 array. (hint: `np.sort()` and `np.stack()`). Now, check the shape of the array.

In [17]:
uniform_sorted = np.sort(uniform_sample)
normal_sorted = np.sort(normal_sample)
binomial_sorted = np.sort(binomial_sample)

combined_array = np.stack((uniform_sorted,normal_sorted,binomial_sorted), axis = 1)
print(combined_array)

[[ 1.1110819  -3.69428529  1.        ]
 [ 1.13557152 -3.01787832  1.        ]
 [ 1.13695165 -2.92415314  1.        ]
 ...
 [99.57570491  2.52860191  9.        ]
 [99.65852424  2.55385416 10.        ]
 [99.83117493  2.69956844 10.        ]]


In [18]:
np.shape(combined_array)

(1000, 3)

### 5. Replace all negative values with 0. Replace all values that exceed 10 with 10.

In [20]:
combined_array = np.where(combined_array < 0, 0, combined_array)
combined_array = np.where(combined_array > 10, 10, combined_array)
print(combined_array)

[[ 1.1110819   0.          1.        ]
 [ 1.13557152  0.          1.        ]
 [ 1.13695165  0.          1.        ]
 ...
 [10.          2.52860191  9.        ]
 [10.          2.55385416 10.        ]
 [10.          2.69956844 10.        ]]


### 6. Subtract 5 from all values that exceed 5. (hint: `np.where()`)

In [21]:
combined_array = np.where(combined_array > 5, combined_array - 5, combined_array)
print(combined_array)

[[1.1110819  0.         1.        ]
 [1.13557152 0.         1.        ]
 [1.13695165 0.         1.        ]
 ...
 [5.         2.52860191 4.        ]
 [5.         2.55385416 5.        ]
 [5.         2.69956844 5.        ]]


### 7. Produce a fourth column that is the sum of the first two columns.  (hint: `np.hstack()`). Now check the new shape of the array.

In [25]:
fourth_column = combined_array[:, 0] + combined_array[:, 1]
new_array = np.hstack((combined_array, fourth_column.reshape(-1,1)))
print(new_array)

[[1.1110819  0.         1.         1.1110819 ]
 [1.13557152 0.         1.         1.13557152]
 [1.13695165 0.         1.         1.13695165]
 ...
 [5.         2.52860191 4.         7.52860191]
 [5.         2.55385416 5.         7.55385416]
 [5.         2.69956844 5.         7.69956844]]


In [26]:
np.shape(new_array)

(1000, 4)

### 8. Use broadcasting to add 1 to each element of the first column, 2 to each element of the second column, 3 to each element of the third column, and 4 to each element of the fourth column.

In [27]:
add_values = np.array([1, 2, 3, 4])
new_array += add_values
print(new_array)

[[ 2.1110819   2.          4.          5.1110819 ]
 [ 2.13557152  2.          4.          5.13557152]
 [ 2.13695165  2.          4.          5.13695165]
 ...
 [ 6.          4.52860191  7.         11.52860191]
 [ 6.          4.55385416  8.         11.55385416]
 [ 6.          4.69956844  8.         11.69956844]]


### 9. Matrix multiply this 1000x4 array by a 4x1 array of ones. How many unique values are in the resulting product?  (hint: `np.unique()`)

In [28]:
ones_array = np.ones((4,1))

result = np.dot(new_array, ones_array)

unique_values = np.unique(result)

print(unique_values, "\n")
print("Number of unique values is: ", len(unique_values))

[12.40598877 12.44779957 12.60215543 12.80197663 13.10944756 13.18789103
 13.2221638  13.27114305 13.2739033  13.32939749 13.36328397 13.38833382
 13.39124509 13.48477163 13.51906704 13.697767   13.76566562 13.7993105
 14.04688247 14.11577459 14.11963723 14.24324801 14.63924238 15.01919689
 15.09597644 15.24468195 15.32219202 15.47220317 15.52826745 15.57533602
 16.70377048 16.73861175 16.86897105 16.95513744 17.01051989 17.01531534
 17.05832279 17.09355034 17.11709453 17.23225581 17.25405543 17.52941705
 17.59825782 17.61214826 17.70120428 17.98525494 18.14893279 18.23841362
 18.52634811 18.53060273 18.63268897 18.69142712 18.6950651  18.70609017
 18.90427349 18.9171343  19.21503471 19.30975336 19.51701738 19.69282114
 19.74098426 19.86228632 19.88499246 19.93016433 19.98576973 20.03973659
 20.04848176 20.19847198 20.22085276 20.27723587 20.45735955 20.51347508
 20.8488449  21.00176228 21.0381574  21.17125467 21.50174397 21.5469433
 21.62878051 21.73904983 21.74697006 21.75459633 21.7

# Thank You