In [1]:
# task 1
import numpy as np
rng = np.random.default_rng(42)
readings = rng.normal(loc=50, scale=10, size=(360,4))
np.shape(readings)

(360, 4)

In [2]:
readings.dtype

dtype('float64')

In [3]:
readings[:3, :]

array([[53.0471708 , 39.60015894, 57.50451196, 59.40564716],
       [30.48964811, 36.97820493, 51.27840403, 46.83757408],
       [49.83198842, 41.46956072, 58.79397975, 57.77791935]])

In [4]:
# task 2
scaled_readings = (readings - readings.min()) / (readings.max() - readings.min())
scaled_readings

array([[0.57902089, 0.38206048, 0.64430823, 0.67215445],
       [0.24861746, 0.34365633, 0.55311349, 0.48806799],
       [0.53192763, 0.40944189, 0.66319526, 0.64831287],
       ...,
       [0.63027748, 0.64358286, 0.49470527, 0.39831808],
       [0.40722292, 0.29483975, 0.56909474, 0.52216565],
       [0.46994286, 0.10147458, 0.35169205, 0.69855973]], shape=(360, 4))

In [5]:
means = readings.mean(axis=0)
centered_readings = readings - means
centered_readings

array([[  3.49101095,  -9.96168725,   7.68545381,   9.20934154],
       [-19.06651173, -12.58364126,   1.45934588,  -3.35873155],
       [  0.27582858,  -8.09228546,   8.9749216 ,   7.58161373],
       ...,
       [  6.99043544,   7.89314288,  -2.52833945,  -9.4861973 ],
       [ -8.23809415, -15.91647867,   2.55042796,  -1.03079346],
       [ -3.95603689, -29.11803443, -12.29223285,  11.01210089]],
      shape=(360, 4))

In [6]:
print(np.shape(readings) == np.shape(scaled_readings) == np.shape(centered_readings))

True


In [7]:
# task 3
bool_mask = (readings < 20) | (readings > 80)
bool_mask

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       ...,
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]], shape=(360, 4))

In [8]:
outlier_count = np.count_nonzero(bool_mask == True)
outlier_count

np.int64(2)

In [9]:
outlier_persentage = (outlier_count / readings.size)*100
print("the percentage of total readings that are outliers: ",outlier_persentage," %")

the percentage of total readings that are outliers:  0.1388888888888889  %


In [10]:
cleaned_readings = readings.copy()
cleaned_readings[bool_mask] = np.nan
cleaned_readings

array([[53.0471708 , 39.60015894, 57.50451196, 59.40564716],
       [30.48964811, 36.97820493, 51.27840403, 46.83757408],
       [49.83198842, 41.46956072, 58.79397975, 57.77791935],
       ...,
       [56.54659528, 57.45498907, 47.2907187 , 40.71010833],
       [41.31806569, 33.64536752, 52.36948612, 49.16551217],
       [45.60012295, 20.44381176, 37.5268253 , 61.20840651]],
      shape=(360, 4))

In [11]:
count_nan = np.isnan(cleaned_readings).sum()
count_nan

np.int64(2)

In [12]:
print(count_nan == outlier_count)

True


In [13]:
#task 4
sensor_means = readings.mean(axis=0)
sensor_means[:5]

array([49.55615985, 49.56184619, 49.81905815, 50.19630563])

In [14]:
sensor_stds = readings.std(axis=0)
sensor_stds[:5]

array([ 9.63991495, 10.36268631, 10.24277058,  9.64547273])

In [15]:
time_means = readings.mean(axis=1)

In [16]:
sensor_means.shape

(4,)

In [17]:
sensor_stds.shape

(4,)

In [18]:
time_means.shape

(360,)

In [20]:
#task 5
report ={
    "total_readings": readings.size,
    "outlier_count": outlier_count ,
    "outlier_percent": outlier_persentage,
    "sensor_means": sensor_means,
    "sensor_stds": sensor_stds
}
assert report["total_readings"] == readings.size

In [26]:
print("Total readings:", report["total_readings"])
print("Outlier count:", report["outlier_count"])
print(f"Outlier %: {report['outlier_percent']:.2f}")
print("Sensor means:", report["sensor_means"])
print("Sensor stds:", report["sensor_stds"])

Total readings: 1440
Outlier count: 2
Outlier %: 0.14
Sensor means: [49.55615985 49.56184619 49.81905815 50.19630563]
Sensor stds: [ 9.63991495 10.36268631 10.24277058  9.64547273]


In [32]:
# task 6
import numpy as np
rng = np.random.default_rng(42)
sim = rng.normal(0, 1, (1000, 6))
print("Shape: ", sim.shape)
print("Data type: ", sim.dtype)
print("First 3 row: ", sim[:3])

Shape:  (1000, 6)
Data type:  float64
First 3 row:  [[ 0.30471708 -1.03998411  0.7504512   0.94056472 -1.95103519 -1.30217951]
 [ 0.1278404  -0.31624259 -0.01680116 -0.85304393  0.87939797  0.77779194]
 [ 0.0660307   1.12724121  0.46750934 -0.85929246  0.36875078 -0.9588826 ]]


In [33]:
# task 7
scenario_shift = np.array([-0.3, -0.1, 0.0, 0.1, 0.2, 0.4])
scenario_shift.shape

(6,)

In [34]:
adjusted_sim = sim + scenario_shift
print(adjusted_sim.shape)
print(sim.shape)

(1000, 6)
(1000, 6)


In [36]:
original_means = sim.mean(axis=0)
adjusted_means = adjusted_sim.mean(axis=0)
print(original_means)
print(adjusted_means)
print(scenario_shift)

[ 0.00140029 -0.04260601 -0.04041433  0.03629828  0.00825714 -0.01317833]
[-0.29859971 -0.14260601 -0.04041433  0.13629828  0.20825714  0.38682167]
[-0.3 -0.1  0.   0.1  0.2  0.4]


In [37]:
# task 8
scenario_means = adjusted_sim.mean(axis=0)
scenario_means.shape

(6,)

In [38]:
ranking = np.argsort(scenario_means)

array([0, 1, 2, 3, 4, 5])

In [39]:
top_2_partition = np.partition(scenario_means,-2)[-2:]
top_2_partition

array([0.20825714, 0.38682167])

In [41]:
top_2_sorted = np.sort(scenario_means)[-2:]
top_2_sorted

array([0.20825714, 0.38682167])

In [42]:
np.allclose(top_2_partition, top_2_sorted)

True

In [47]:
# task 9

rng_2 = np.random.default_rng(42)
sim_2 = rng_2.normal(0, 1, (1000, 6))
print("sim == sim_2:", np.allclose(sim, sim_2))

sim == sim_2: True


In [48]:
rng_3 = np.random.default_rng(1)
sim_3 = rng_3.normal(1, 0, (1000, 6))
print("sim == sim_3:", np.allclose(sim, sim_3))

sim == sim_3: False


In [49]:
# task 10
report = {
    "shape": adjusted_sim.shape,
    "scenario_means": scenario_means,
    "top_two_indices": top_2_partition,
    "top_two_values": top_2_sorted
}
report

{'shape': (1000, 6),
 'scenario_means': array([-0.29859971, -0.14260601, -0.04041433,  0.13629828,  0.20825714,
         0.38682167]),
 'top_two_indices': array([0.20825714, 0.38682167]),
 'top_two_values': array([0.20825714, 0.38682167])}

In [52]:
assert len(report["scenario_means"]) == 6
assert len(report["top_two_indices"]) == 2

In [57]:
report_to_str = "\n".join(
    f"{key}: {value}" for key, value in report.items()
)
print(report_to_str)

shape: (1000, 6)
scenario_means: [-0.29859971 -0.14260601 -0.04041433  0.13629828  0.20825714  0.38682167]
top_two_indices: [0.20825714 0.38682167]
top_two_values: [0.20825714 0.38682167]
