In [None]:
Objective: Work with NaN values
Instructions:

Create a NumPy array with some NaN values.

Remove NaN values.

Replace NaN values with the average of remaining numbers.

Expected Learning:

np.isnan()

Boolean masking

Data cleaning

In [1]:
import numpy as np

# Create array with NaN values
arr = np.array([12, np.nan, 25, 40, np.nan, 55, 70])

print("Original array:", arr)

# Remove NaN values
clean_arr = arr[~np.isnan(arr)]

# Compute mean of remaining values
mean_val = np.mean(clean_arr)

# Replace NaN with mean
arr[np.isnan(arr)] = mean_val

print("Array without NaN:", clean_arr)
print("Mean of valid values:", mean_val)
print("NaN replaced array:", arr)

Original array: [12. nan 25. 40. nan 55. 70.]
Array without NaN: [12. 25. 40. 55. 70.]
Mean of valid values: 40.4
NaN replaced array: [12.  40.4 25.  40.  40.4 55.  70. ]


In [None]:
Objective: Time-series style analysis
Instructions:

Create an array of daily temperatures for 30 days.

Find:

Hottest day

Coldest day

Average temperature

Replace abnormal values (below 0 or above 45) with average.

Learning Outcome:

Data cleaning

Boolean masking

Real-life data simulation

In [3]:
import numpy as np

# Create array of daily temperatures for 30 days (simulated)
temps = np.random.randint(-5, 50, size=30)

print("Daily temperatures:\n", temps)

# Hottest day temperature
hottest = np.max(temps)

# Coldest day temperature
coldest = np.min(temps)

# Average temperature
avg_temp = np.mean(temps)

print("\nHottest temperature:", hottest)
print("Coldest temperature:", coldest)
print("Average temperature:", avg_temp)

# Replace abnormal values (<0 or >45) with average
temps_clean = temps.copy()
temps_clean[(temps_clean < 0) | (temps_clean > 45)] = avg_temp

print("\nCleaned temperatures:\n", temps_clean)

Daily temperatures:
 [24  3 26 21 32 33 17 15  1 20 -5  9 40 15 13 25 24 -1  9 37  7 34 28 48
 32 42 14 20 22  7]

Hottest temperature: 48
Coldest temperature: -5
Average temperature: 20.4

Cleaned temperatures:
 [24  3 26 21 32 33 17 15  1 20 20  9 40 15 13 25 24 20  9 37  7 34 28 20
 32 42 14 20 22  7]


In [None]:
Objective: Boolean & logical operations
Instructions:

Create a boolean array:

Rows → Students

Columns → Days

Calculate:

Attendance percentage

Students below 75%

Perfect attendance list

Learning Outcome:

Boolean arrays

Logical indexing



In [7]:
import numpy as np
import random

# Create boolean attendance array (5 students × 10 days)
np.random.seed(1)  # for repeatable result
attendance = np.random.choice([True, False], size=(5, 10), p=[0.8, 0.2])

print("Attendance Matrix (Students × Days):\n")
print(attendance)

# Total days
total_days = attendance.shape[1]

# Attendance percentage per student
attendance_percent = attendance.sum(axis=1) / total_days * 100
print("\nAttendance % per student:", attendance_percent)

# Students below 75% attendance
below_75 = np.where(attendance_percent < 75)[0]
print("Students below 75% attendance:", below_75)

# Perfect attendance (all True across days)
perfect_students = np.where(attendance.all(axis=1))[0]
print("Perfect attendance students: ", perfect_students)
print("Total Students: ",attendance.shape[0])

Attendance Matrix (Students × Days):

[[ True  True  True  True  True  True  True  True  True  True]
 [ True  True  True False  True  True  True  True  True  True]
 [False False  True  True False False  True  True  True False]
 [ True  True False  True  True  True  True False  True  True]
 [False  True  True  True  True  True False  True  True  True]]

Attendance % per student: [100.  90.  50.  80.  80.]
Students below 75% attendance: [2]
Perfect attendance students: [0]
Total Students:  5
