In [11]:
import numpy as np
import pandas as pd

# Task 1: Initialize Temperature and Humidity Data
np.random.seed(42)  # For reproducibility
locations = 500
days = 365

# Generate random temperature data in Celsius and humidity percentages
temperature_data = np.random.uniform(-10, 40, (locations, days))
humidity_data = np.random.uniform(0, 100, (locations, days))


In [4]:
# Task 2: Check for Missing Data
# Randomly set 5% of values in each array to NaN to simulate missing data
total_entries = locations * days
num_missing_values = int(0.05 * total_entries)

# Choose random indices to replace with NaN
temperature_missing_indices = np.random.choice(total_entries, num_missing_values, replace=False)
humidity_missing_indices = np.random.choice(total_entries, num_missing_values, replace=False)

# Flatten data to 1D for easy indexing, then reshape back
temperature_data_flat = temperature_data.flatten()
humidity_data_flat = humidity_data.flatten()

temperature_data_flat[temperature_missing_indices] = np.nan
humidity_data_flat[humidity_missing_indices] = np.nan

temperature_data = temperature_data_flat.reshape(locations, days)
humidity_data = humidity_data_flat.reshape(locations, days)

# Report the total number of missing entries
temperature_missing_total = np.isnan(temperature_data).sum()
humidity_missing_total = np.isnan(humidity_data).sum()
print(f"Missing temperature entries: {temperature_missing_total}")
print(f"Missing humidity entries: {humidity_missing_total}")

Missing temperature entries: 9125
Missing humidity entries: 9125


In [5]:
# Task 3: Convert Temperature and Calculate Discomfort Index
# Convert Celsius to Fahrenheit
temperature_data_fahrenheit = (temperature_data * 9/5) + 32

# Calculate discomfort index as a simple "feels like" measure
discomfort_index = temperature_data + (0.55 * (1 - (humidity_data / 100)) * (temperature_data - 14.5))

# Cap the "feels like" index at 80
discomfort_index[discomfort_index > 80] = 80

# Print the output arrays
print("Temperature Data in Fahrenheit:")
print(temperature_data_fahrenheit)

print("\nDiscomfort Index (capped at 80):")
print(discomfort_index)


Temperature Data in Fahrenheit:
[[ 47.7086107   99.56428758  79.87945476 ...  55.24276014  63.10551104
   98.73183279]
 [ 48.7492374  100.50715074  95.48155778 ...  85.57405807          nan
   28.72903486]
 [ 28.78392181  87.31172482  73.86774986 ...  68.69631154          nan
   34.76028305]
 ...
 [ 63.18983909  17.55575497  59.75814921 ...  37.23405737          nan
   25.17427278]
 [ 68.78020998 103.96640574  75.49790495 ...  81.29485701  20.80774931
   59.36306712]
 [ 89.15030872  88.76823241  35.00224112 ...  44.70666238  18.1911585
   31.84581927]]

Discomfort Index (capped at 80):
[[  7.75480662  46.7659152   30.17497599 ...  12.20386711  18.37812055
           nan]
 [  9.30151138  48.46543699  37.0786101  ...  35.86252567          nan
   -3.22009595]
 [ -3.16067412  30.99670635  25.01626055 ...  20.54440367          nan
   -0.8684746 ]
 ...
 [ 18.0337347  -15.94723889  15.87835749 ...          nan          nan
   -6.00739084]
 [ 22.83174724          nan  26.38031445 ...  29.45557

In [6]:
# Task 4: Analyze January Temperatures
january_temperatures = temperature_data[:, :31]
january_avg_temp = np.nanmean(january_temperatures)
print(f"Average January temperature across all locations: {january_avg_temp:.2f}°C")

Average January temperature across all locations: 14.94°C


In [7]:
# Task 5: Identify Extreme Temperatures
# Set any temperature above 35°C to NaN as potential errors
temperature_data[temperature_data > 35] = np.nan

# Count the number of null values per location
null_count_per_location = np.isnan(temperature_data).sum(axis=1)
print("Null values per location due to extreme temperatures:", null_count_per_location)


Null values per location due to extreme temperatures: [54 58 56 55 44 47 61 54 48 45 51 48 44 50 60 56 54 54 53 55 50 47 42 46
 49 56 42 54 52 47 56 53 70 48 46 67 49 54 47 58 40 52 43 52 62 54 58 52
 50 48 48 67 67 46 56 53 59 56 48 63 57 70 53 48 64 54 49 47 63 52 47 50
 55 45 67 52 47 51 50 46 44 48 54 54 52 64 50 53 53 42 54 59 47 50 42 56
 49 52 63 48 49 47 51 56 57 57 59 47 46 61 59 39 61 53 48 52 61 47 48 57
 54 46 53 59 57 49 57 47 48 51 58 60 54 59 38 51 49 57 56 55 44 55 48 52
 52 58 59 59 49 55 52 51 49 53 55 62 44 47 53 44 56 58 47 56 62 44 53 66
 59 66 61 61 51 39 55 54 52 54 49 38 50 46 39 42 39 53 48 55 67 51 47 58
 43 49 49 46 53 48 46 54 70 53 50 62 37 73 46 63 38 59 50 47 51 46 52 49
 41 45 49 48 50 56 68 62 46 50 60 51 46 68 51 53 49 60 65 52 64 55 51 47
 57 46 49 57 47 58 58 51 57 45 61 58 38 49 50 45 43 55 50 56 47 55 55 49
 60 67 58 58 48 41 41 54 50 49 48 46 64 46 57 42 56 48 43 65 51 48 39 57
 51 48 51 52 58 53 49 41 63 55 51 47 56 57 40 58 50 51 52 44 54 66 59 

In [8]:
# Task 6: Calculate Quarterly Temperature Averages
# Reshape data to calculate seasonal averages
temperature_data_quarters = temperature_data.reshape(locations, 4, 91)
quarterly_averages = np.nanmean(temperature_data_quarters, axis=2)
print("Quarterly temperature averages for each location:\n", quarterly_averages)

ValueError: cannot reshape array of size 182500 into shape (500,4,91)

In [13]:
# Task 7: Classify Humidity Levels
humidity_classification = np.where(humidity_data < 30, "Dry", np.where(humidity_data > 70, "Humid", "Moderate"))
dry_days_count = np.sum(humidity_classification == "Dry", axis=1)
humid_days_count = np.sum(humidity_classification == "Humid", axis=1)
print("Total 'Dry' days per location:", dry_days_count)
print()
print("Total 'Humid' days per location:", humid_days_count)

Total 'Dry' days per location: [113 107 113 106 118 102 120 114 107 121 110 110  90 107 110 103 114  95
 105  96 111  97  97 136  98 112 126  92 112 110 109 115 106 116 110 110
 114 109 107 107 119  98 122 117 111 118 127 121 104 125 113 122 103 126
 107 111 114 109 114 113 122  93 112 122 118 107 122 102 103 101 125 107
 113 113 112 109 119 109 109 125  98 106 112  94 114 123 123 103  93 121
 106  92 111 106 108 104 103 109 120  97 108 122  94 111 107 112 108 112
 110  93  88 124 116 116 100 110 111 107  99 108  94  99 108 109 107 112
 103 100 104 110 112 104 120 104 104  95 121 118 117 106 119 103  98 119
 116 106 100 114 107 111 119 111 114 115 105 115 106 105 102 112 115 126
 107 102 114 103 113 111 111 117 106 102 105 124  91 116 106  88 108 120
 115 100 110 117 103 113 105 121 118 127 113 108 101  89 121 114  91 109
 112 112 115 108  97 106 113 107 112 103  94 100  90 108 120 109 108  99
 107  94 115 105 111 110 115  95 111 107 109 115 107 111 109 104 112 102
 101 106 100 113 111

In [10]:
# Task 8: Apply Daily Pressure Trend to Temperature Data
# Create a sinusoidal pressure trend to simulate seasonal changes
pressure_trend = 5 * np.sin(2 * np.pi * np.arange(days) / days)
temperature_adjusted = temperature_data + pressure_trend

# Display some adjusted temperature data to verify adjustments
print("Sample adjusted temperatures:", temperature_adjusted[:5, :10])

Sample adjusted temperatures: [[ 8.72700594         nan 26.77180515 20.19102255 -1.85505584 -1.77044999
  -6.5803109          nan 20.74214445 26.17517299]
 [ 9.30513189         nan         nan  0.04765508 -6.18792282 -4.53127594
  -8.57340023 -4.67681174 24.83673253 -5.66902347]
 [-1.7867101  30.81480279 23.43196909 16.41136957  8.28553634 34.28985103
  10.13776386 31.4310122  12.64313929  9.61876557]
 [-1.1735984   1.11037723 -0.50597883 29.23732201  7.85027509 -6.67804218
          nan 34.79033447         nan         nan]
 [ 9.69322028 22.64771563 -4.49824043 23.15036357         nan -7.15957406
          nan 10.94643826 34.22406638 29.89081831]]
