In [213]:
import numpy as np

# Set random seed so that results are reproducible 
np.random.seed(0)

### 1. Generate three separate 1-dimensional arrays of 10,000 elements each.
- Array 1 should contain numbers randomly drawn from the uniform distribution between 1 and 100
- Array 2 should contain numbers randomly drawn from the normal distribution, with a mean of 0 and a standard deviation of 1
- Array 3 should contain numbers randomly drawn from the binomial distribution, where n=10 and p=0.5

### Solution

In [217]:
array1 = np.random.uniform(1, 100, 10000)
array2 = np.random.normal(0, 1, 10000)
array3 = np.random.binomial(10, 0.5, 10000)

print(array1)
print(array2)
print(array3)

[55.33253689 71.80374727 60.67357423 ... 76.08452255  3.35495532
 81.54393292]
[-0.72582032  0.56347552 -0.43563209 ... -0.48621622 -2.04550028
  1.03931951]
[4 5 5 ... 5 4 4]


### 2. For each array, randomly sample 1,000 rows without replacement and print shape of new arrays (hint: `np.random.choice()`)

### Solution

In [221]:
sampled_array1 = np.random.choice(array1, 1000, replace=False)
print(sampled_array1.shape)
sampled_array2 = np.random.choice(array2, 1000, replace=False)
print(sampled_array2.shape)
sampled_array3 = np.random.choice(array3, 1000, replace=False)
print(sampled_array3.shape)

(1000,)
(1000,)
(1000,)


### 3. For each of the three samples:  
- Calculate the mean, median, and standard deviation 
- Find the index and the value of the largest element (hint: `np.argmax()`)  
- Find the value of the 75th percentile (hint: `np.percentile()`)  

### Solution

In [225]:
# Function to calculate required statistics 
def calculate_stats(array): 
    mean = np.mean(array) 
    median = np.median(array) 
    std_dev = np.std(array) 
    max_index = np.argmax(array) 
    max_value = array[max_index] 
    percentile_75 = np.percentile(array, 75) 
    
    return { 
        "Mean": mean, 
        "Median": median, 
        "Standard deviation": std_dev, 
        "Index of largest": max_index, 
        "Value of largest": max_value, 
        "75th Percentile": percentile_75 
    } 
# Calculate and print statistics for each sample 
stats_sample1 = calculate_stats(sampled_array1) 
stats_sample2 = calculate_stats(sampled_array2) 
stats_sample3 = calculate_stats(sampled_array3) 
    
print("Sample #1") 
for key, value in stats_sample1.items(): 
    print(f"{key}: {value}") 
    
print("\nSample #2") 
for key, value in stats_sample2.items(): 
    print(f"{key}: {value}") 
    
print("\nSample #3") 
for key, value in stats_sample3.items(): 
    print(f"{key}: {value}")

Sample #1
Mean: 50.552213219585695
Median: 50.00499476743026
Standard deviation: 28.406698883921372
Index of largest: 255
Value of largest: 99.83117493278014
75th Percentile: 75.20925502271464

Sample #2
Mean: -0.03398671921971037
Median: -0.026937092247233608
Standard deviation: 0.9747233951952765
Index of largest: 628
Value of largest: 2.699568441630259
75th Percentile: 0.6323050377198595

Sample #3
Mean: 4.896
Median: 5.0
Standard deviation: 1.562428878381349
Index of largest: 7
Value of largest: 10
75th Percentile: 6.0


### 4. Sort each of the three samples in increasing order, then combine them together to create a 1,000 by 3 array. (hint: `np.sort()` and `np.stack()`)

### Solution

In [229]:
# Sort each sample in increasing order
sorted_sample1 = np.sort(sampled_array1)
sorted_sample2 = np.sort(sampled_array2)
sorted_sample3 = np.sort(sampled_array3)

# Combine the sorted samples into a 1,000 by 3 array
combined_array = np.stack((sorted_sample1, sorted_sample2, sorted_sample3), axis=1)

print(combined_array)

[[ 1.1110819  -3.69428529  1.        ]
 [ 1.13557152 -3.01787832  1.        ]
 [ 1.13695165 -2.92415314  1.        ]
 ...
 [99.57570491  2.52860191  9.        ]
 [99.65852424  2.55385416 10.        ]
 [99.83117493  2.69956844 10.        ]]


### 5. Check the min and max value of array and then replace all negative values with 0. Replace all values that exceed 10 with 10. Once done, recheck the min and max again.

### Solution

In [233]:
# Check initial min and max values
initial_min = np.min(combined_array)
initial_max = np.max(combined_array)

print("Initial min:", initial_min)
print("Initial max:", initial_max)

# Replace negative values with 0 and values greater than 10 with 10
combined_array[combined_array < 0] = 0
combined_array[combined_array > 10] = 10

# Check min and max values after replacement
final_min = np.min(combined_array)
final_max = np.max(combined_array)

print("Final min:", final_min)
print("Final max:", final_max)

Initial min: -3.6942852922787712
Initial max: 99.83117493278014
Final min: 0.0
Final max: 10.0


### 6. Subtract 5 from all values that exceed 5. (hint: `np.where()`)

### Solution

In [237]:
# Subtract 5 from all values that exceed 5
combined_array = np.where(combined_array > 5, combined_array - 5, combined_array)

print(combined_array)

[[1.1110819  0.         1.        ]
 [1.13557152 0.         1.        ]
 [1.13695165 0.         1.        ]
 ...
 [5.         2.52860191 4.        ]
 [5.         2.55385416 5.        ]
 [5.         2.69956844 5.        ]]


### 7. Produce a fourth column that is the sum of the first two columns.  (hint: `np.hstack()`)

### Solution

In [241]:
# Produce a fourth column that is the sum of the first two columns
fourth_column = combined_array[:, 0] + combined_array[:, 1]

# Combine the new column with the existing array
final_array = np.hstack((combined_array, fourth_column.reshape(-1, 1)))

print(final_array)


[[1.1110819  0.         1.         1.1110819 ]
 [1.13557152 0.         1.         1.13557152]
 [1.13695165 0.         1.         1.13695165]
 ...
 [5.         2.52860191 4.         7.52860191]
 [5.         2.55385416 5.         7.55385416]
 [5.         2.69956844 5.         7.69956844]]


### 8. Use broadcasting to add 1 to each element of the first column, 2 to each element of the second column, 3 to each element of the third column, and 4 to each element of the fourth column.

### Solution

In [245]:

# Produce a fourth column that is the sum of the first two columns
fourth_column = combined_array[:, 0] + combined_array[:, 1]

# Combine the new column with the existing array
final_array = np.hstack((combined_array, fourth_column.reshape(-1, 1)))

# Use broadcasting to add specified values to each column
final_array += np.array([1, 2, 3, 4])

print(final_array)



[[ 2.1110819   2.          4.          5.1110819 ]
 [ 2.13557152  2.          4.          5.13557152]
 [ 2.13695165  2.          4.          5.13695165]
 ...
 [ 6.          4.52860191  7.         11.52860191]
 [ 6.          4.55385416  8.         11.55385416]
 [ 6.          4.69956844  8.         11.69956844]]
