# NumPy Pair Problem

For today's pair, we will use NumPy to complete the following tasks consecutively.

In [1]:
import numpy as np

# Set random seed so that results are reproducible 
np.random.seed(0)

### 1. Generate three separate 1-dimensional arrays of 10,000 elements each.
* Array 1 should contain numbers randomly drawn from the uniform distribution between 1 and 100
* Array 2 should contain numbers randomly drawn from the normal distribution, with a mean of 0 and a standard deviation of 1
* Array 3 should contain numbers randomly drawn from the binomial distribution, where n=10 and p=0.5

In [2]:
array_1 = np.random.uniform(1, 100, 10000)
array_2 = np.random.normal(0, 1, 10000)
array_3 = np.random.binomial(10, 0.5, 10000)#n (number of trials) and p (probability of success)
array_1[:5], array_2[:5], array_3[:5]


(array([55.33253689, 71.80374727, 60.67357423, 54.94343512, 42.94182513]),
 array([-0.72582032,  0.56347552, -0.43563209, -0.10455255, -2.32127283]),
 array([4, 5, 5, 3, 8]))

### 2. For each array, randomly sample 1,000 rows without replacement. (hint: `np.random.choice()`)


In [5]:
array1 = np.random.choice(array_1, 1000, replace=False)
array2 = np.random.choice(array_2, 1000, replace=False)
array3 = np.random.choice(array_3, 1000, replace=False)
print("arr1",array1[1:10])
print("arr2",array2[1:10])
print("arr3",array3[1:10])


arr1 [89.52307901  4.92213214 85.41621157 11.37010628 34.19503757  5.86461792
 21.43868971 50.46250003 33.89264273]
arr2 [ 0.36858698  0.53205347  0.78811797 -0.04465737  0.87593967  0.52102031
 -0.01066147 -0.92358729 -0.93338531]
arr3 [5 5 3 5 5 3 7 7 4]


### 3. For each of the three samples:  
- Calculate the mean, median, and standard deviation 
- Find the index and the value of the largest element (hint: `np.argmax()`)  
- Find the value of the 75th percentile (hint: `np.percentile()`)  

In [14]:
mean1 = np.mean(array1)
mean2 = np.mean(array2)
mean3 = np.mean(array3)
print(mean1,mean2,mean3)

largest1 = np.argmax(array1)
largest2 = np.argmax(array2)
largest3 = np.argmax(array3)

'''to get index foreach argmax'''
ind_largest1 = np.unravel_index(largest1,array1.shape)
ind_largest2 = np.unravel_index(largest2,array1.shape)
ind_largest3 = np.unravel_index(largest3,array1.shape)
print("largest element",largest1,"index1 = ",ind_largest1)
print("largest element",largest2,"index2 = ",ind_largest2)
print("largest element",largest3,"index3 = ",ind_largest3)





50.97551836022753 -0.03311998066409303 4.997
largest element 623 index1 =  (623,)
largest element 262 index2 =  (262,)
largest element 272 index3 =  (272,)


In [17]:
std1 = np.std(array1)
std2 = np.std(array2)
std3 = np.std(array3)
print(std1,std2,std3)

28.51977633552813 0.9806292899731628 1.5598047954792291


In [16]:
median1 = np.median(array1)
median2 = np.median(array2)
median3 = np.median(array3)
print(median1,median2,median3)

51.39336398178053 -0.0006671234114336762 5.0


In [15]:
thirdpercentile1 = np.percentile(array1,75)
thirdpercentile2 = np.percentile(array2,75)
thirdpercentile3 = np.percentile(array3,75)
print(thirdpercentile1,thirdpercentile2,thirdpercentile3)

74.99266104683839 0.6285524847182982 6.0


### 4. Sort each of the three samples in increasing order, then combine them together to create a 1,000 by 3 array. (hint: `np.sort()` and `np.stack()`). Now, check the shape of the array.

In [24]:
'''axis 1 rows axis 0 columns'''
sorted_array1 = np.sort(array1)
sorted_array2 = np.sort(array2)
sorted_array3 = np.sort(array3)
combined_array = np.stack((sorted_array1,sorted_array2,sorted_array3),axis=1)
combined_array.shape

(1000, 3)

### 5. Replace all negative values with 0. Replace all values that exceed 10 with 10.

In [25]:
replaced_value_array = np.clip(combined_array,0,10)
replaced_value_array

array([[ 1.00717251,  0.        ,  0.        ],
       [ 1.17204485,  0.        ,  1.        ],
       [ 1.44199494,  0.        ,  1.        ],
       ...,
       [10.        ,  2.71078957, 10.        ],
       [10.        ,  2.71751471, 10.        ],
       [10.        ,  2.75041437, 10.        ]])

### 6. Subtract 5 from all values that exceed 5. (hint: `np.where()`)

In [None]:
subtracted_array = np.where(replaced_value_array > 5 ,replaced_value_array-5,replaced_value_array)
subtracted_array
#1000*3

array([[1.00717251, 0.        , 0.        ],
       [1.17204485, 0.        , 1.        ],
       [1.44199494, 0.        , 1.        ],
       ...,
       [5.        , 2.71078957, 5.        ],
       [5.        , 2.71751471, 5.        ],
       [5.        , 2.75041437, 5.        ]])

### 7. Produce a fourth column that is the sum of the first two columns.  (hint: `np.hstack()`). Now check the new shape of the array.

In [35]:
fourth_column = subtracted_array[:,0] + subtracted_array[:,1]
fourth_column = fourth_column.reshape(-1,1) ## Reshape to a column vector , 1000 x 1
print(fourth_column.shape)
new_shape_array = np.hstack((subtracted_array,fourth_column))
print(new_shape_array,"shape = ",new_shape_array.shape)

(1000, 1)
[[1.00717251 0.         0.         1.00717251]
 [1.17204485 0.         1.         1.17204485]
 [1.44199494 0.         1.         1.44199494]
 ...
 [5.         2.71078957 5.         7.71078957]
 [5.         2.71751471 5.         7.71751471]
 [5.         2.75041437 5.         7.75041437]] shape =  (1000, 4)


### 8. Use broadcasting to add 1 to each element of the first column, 2 to each element of the second column, 3 to each element of the third column, and 4 to each element of the fourth column.

In [36]:
add_values = np.array([1, 2, 3, 4])
new_shape_array[:,:4] = new_shape_array[:,:4]+add_values
new_shape_array

array([[ 2.00717251,  2.        ,  3.        ,  5.00717251],
       [ 2.17204485,  2.        ,  4.        ,  5.17204485],
       [ 2.44199494,  2.        ,  4.        ,  5.44199494],
       ...,
       [ 6.        ,  4.71078957,  8.        , 11.71078957],
       [ 6.        ,  4.71751471,  8.        , 11.71751471],
       [ 6.        ,  4.75041437,  8.        , 11.75041437]])

### 9. Matrix multiply this 1000x4 array by a 4x1 array of ones. How many unique values are in the resulting product?  (hint: `np.unique()`)

In [41]:
ones_array = np.ones((4, 1))
dot_product_array = np.dot(new_shape_array,ones_array)
print(dot_product_array)
num_unique = len(np.unique(dot_product_array))
print("unique values = ",num_unique)

[[12.01434503]
 [13.3440897 ]
 [13.88398989]
 [14.00027026]
 [14.40430005]
 [14.72681689]
 [15.01580283]
 [15.5621749 ]
 [15.83594359]
 [16.01531534]
 [16.05832279]
 [17.09000076]
 [17.32416607]
 [17.42335124]
 [17.5966872 ]
 [17.72038049]
 [17.74241152]
 [18.2129128 ]
 [18.85740639]
 [18.9171343 ]
 [18.9392347 ]
 [19.21503471]
 [19.69282114]
 [19.73144523]
 [19.76453197]
 [19.7796665 ]
 [19.79005341]
 [19.85562253]
 [20.00156401]
 [20.03973659]
 [20.23073592]
 [21.34550231]
 [21.62878051]
 [21.68252175]
 [21.84426428]
 [21.95069436]
 [12.15359868]
 [12.20295932]
 [12.44779957]
 [12.755656  ]
 [12.8885018 ]
 [13.36884037]
 [13.57274441]
 [13.72923584]
 [14.15102814]
 [14.52916081]
 [14.5988648 ]
 [14.74474614]
 [14.89298537]
 [15.99002394]
 [16.14433068]
 [16.14957893]
 [16.32219202]
 [16.41806527]
 [16.48977374]
 [16.51892767]
 [16.57533602]
 [16.59924059]
 [16.66308515]
 [16.84957048]
 [16.92454231]
 [17.01051989]
 [17.58364742]
 [17.66314269]
 [17.66848791]
 [17.82255416]
 [17.98976

# Thank You