In [2]:
import pandas as pd

# Load the 'all_us_airports.csv' file
df_all_airports = pd.read_csv('all_us_airports.csv')

# Filter to include only large airports
df_large_airports = df_all_airports[df_all_airports['type'] == 'large_airport']

# Select relevant columns
relevant_columns = ['name', 'latitude_deg', 'longitude_deg', 'iata_code']
df_large_airports_filtered = df_large_airports[relevant_columns]

# Display the first few rows of the filtered DataFrame
display(df_large_airports_filtered.head())

# Save the filtered data to a new CSV file named 'large_us_airports.csv'
df_large_airports_filtered.to_csv('large_us_airports.csv', index=False)

# Display success message
print("Filtered data has been successfully saved to 'large_us_airports.csv'")


Unnamed: 0,name,latitude_deg,longitude_deg,iata_code
1,Los Angeles International Airport,33.942501,-118.407997,LAX
2,Chicago O'Hare International Airport,41.9786,-87.9048,ORD
3,John F Kennedy International Airport,40.639447,-73.779317,JFK
4,Hartsfield Jackson Atlanta International Airport,33.6367,-84.428101,ATL
5,San Francisco International Airport,37.61899948120117,-122.375,SFO


Filtered data has been successfully saved to 'large_us_airports.csv'


In [3]:
import folium
from folium import plugins
import pandas as pd

# Load the 'large_us_airports.csv' file
df_large_airports = pd.read_csv('large_us_airports.csv')

# Create a folium map centered around the first airport in the DataFrame
map_airports = folium.Map(location=[df_large_airports['latitude_deg'].iloc[0], df_large_airports['longitude_deg'].iloc[0]], zoom_start=5)

# Loop over each airport and add a marker to the map
for index, row in df_large_airports.iterrows():
    # Create a popup with airport name and IATA code
    popup_text = f"{row['name']} ({row['iata_code']})"
    
    # Add a marker for each airport
    folium.Marker(
        location=[row['latitude_deg'], row['longitude_deg']],
        popup=folium.Popup(popup_text, max_width=300),
        icon=folium.Icon(color='blue', icon='plane', prefix='fa')
    ).add_to(map_airports)

# Display the map
map_airports


In [4]:
import requests
import time
import pandas as pd
import json

# Function to make OpenAQ API requests with a be nice pause
def make_openaq_request(endpoint, params=None):
    base_url = "https://api.openaq.org/v2/"
    url = base_url + endpoint

    # Make the API request
    response = requests.get(url, params=params)

    # Be nice and pause for 1-2 seconds
    time.sleep(2)

    return response

# Specify parameters for the OpenAQ API request
params = {
    "date_from": "2023-06-06T00:00:00Z",
    "date_to": "2023-06-06T23:59:59Z",
    "parameter": "pm25",
    "coordinates": "42.33143000,-83.04575000",
    "radius": 7500,  # 7.5 km radius from downtown Detroit
    "limit": 10000  # Adjust the limit based on your needs
}

# Make the OpenAQ API request
response = make_openaq_request("measurements", params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Convert the JSON response to a DataFrame
    data = response.json()
    df_pm25_measurements = pd.DataFrame(data['results'])
    
    # Display the DataFrame
    display(df_pm25_measurements)
    
    # Save the data to a JSON file
    json_output_file = 'openaq_pm25_data.json'
    with open(json_output_file, 'w') as json_file:
        json.dump(data, json_file, indent=2)
    
    print("PM2.5 data for June 6, 2023, within 7.5 km of downtown Detroit has been successfully fetched and saved as JSON.")
else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")


Unnamed: 0,locationId,location,parameter,value,date,unit,coordinates,country,city,isMobile,isAnalysis,entity,sensorType
0,1385,Windsor Downtown,pm25,125.0,"{'utc': '2023-06-06T23:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade
1,1385,Windsor Downtown,pm25,21.0,"{'utc': '2023-06-06T22:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade
2,1385,Windsor Downtown,pm25,14.0,"{'utc': '2023-06-06T21:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade
3,1385,Windsor Downtown,pm25,18.0,"{'utc': '2023-06-06T20:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade
4,1385,Windsor Downtown,pm25,21.0,"{'utc': '2023-06-06T19:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6279,7002,MILITARY PARK,pm25,33.7,"{'utc': '2023-06-06T05:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.312078, 'longitude': -83.103469}",US,,False,,Governmental Organization,reference grade
6280,7002,MILITARY PARK,pm25,31.7,"{'utc': '2023-06-06T04:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.312078, 'longitude': -83.103469}",US,,False,,Governmental Organization,reference grade
6281,7002,MILITARY PARK,pm25,29.9,"{'utc': '2023-06-06T03:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.312078, 'longitude': -83.103469}",US,,False,,Governmental Organization,reference grade
6282,7002,MILITARY PARK,pm25,28.8,"{'utc': '2023-06-06T02:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.312078, 'longitude': -83.103469}",US,,False,,Governmental Organization,reference grade


PM2.5 data for June 6, 2023, within 7.5 km of downtown Detroit has been successfully fetched and saved as JSON.


In [5]:
import pandas as pd
import json

# Load the OpenAQ data from the JSON file obtained earlier
with open('openaq_pm25_data.json', 'r') as json_file:
    data = json.load(json_file)

# Convert the data to a DataFrame
df_pm25_measurements = pd.DataFrame(data['results'])

# Display the original DataFrame
display(df_pm25_measurements.head())

# Transformation: Extract latitude and longitude from the 'coordinates' field
df_pm25_measurements[['sensor_lat', 'sensor_lon']] = df_pm25_measurements['coordinates'].apply(lambda x: pd.Series([x['latitude'], x['longitude']] if isinstance(x, dict) else [None, None]))

# Identify the correct date field based on your DataFrame's column names
date_column_names = ['date.utc', 'date']  # Add any other potential date column names
date_column_name = next((col for col in date_column_names if col in df_pm25_measurements.columns), None)

# Check if a valid date column was found
if date_column_name is not None:
    # Transformation: Extract the desired timestamp (e.g., UTC) and convert it to datetime64 format
    df_pm25_measurements['local_time'] = pd.to_datetime(df_pm25_measurements[date_column_name].apply(lambda x: x['local']))

    # Filter: Keep only the relevant columns
    relevant_columns = ['locationId', 'location', 'entity', 'parameter', 'value', 'sensor_lat', 'sensor_lon', 'local_time']
    df_transformed = df_pm25_measurements[relevant_columns]

    # Display the transformed DataFrame
    display(df_transformed.head())
else:
    print("No valid date column found in the DataFrame.")



Unnamed: 0,locationId,location,parameter,value,date,unit,coordinates,country,city,isMobile,isAnalysis,entity,sensorType
0,1385,Windsor Downtown,pm25,125.0,"{'utc': '2023-06-06T23:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade
1,1385,Windsor Downtown,pm25,21.0,"{'utc': '2023-06-06T22:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade
2,1385,Windsor Downtown,pm25,14.0,"{'utc': '2023-06-06T21:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade
3,1385,Windsor Downtown,pm25,18.0,"{'utc': '2023-06-06T20:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade
4,1385,Windsor Downtown,pm25,21.0,"{'utc': '2023-06-06T19:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 42.3144, 'longitude': -83.0433}",US,,False,,Governmental Organization,reference grade


Unnamed: 0,locationId,location,entity,parameter,value,sensor_lat,sensor_lon,local_time
0,1385,Windsor Downtown,Governmental Organization,pm25,125.0,42.3144,-83.0433,2023-06-06 19:00:00-04:00
1,1385,Windsor Downtown,Governmental Organization,pm25,21.0,42.3144,-83.0433,2023-06-06 18:00:00-04:00
2,1385,Windsor Downtown,Governmental Organization,pm25,14.0,42.3144,-83.0433,2023-06-06 17:00:00-04:00
3,1385,Windsor Downtown,Governmental Organization,pm25,18.0,42.3144,-83.0433,2023-06-06 16:00:00-04:00
4,1385,Windsor Downtown,Governmental Organization,pm25,21.0,42.3144,-83.0433,2023-06-06 15:00:00-04:00


In [6]:
# Load the OpenAQ data from the JSON file obtained earlier
with open('openaq_pm25_data.json', 'r') as json_file:
    data = json.load(json_file)

# Convert the data to a DataFrame
df_pm25_measurements = pd.DataFrame(data['results'])

# Transformation: Extract latitude and longitude from the 'coordinates' field
df_pm25_measurements[['sensor_lat', 'sensor_lon']] = df_pm25_measurements['coordinates'].apply(
    lambda x: pd.Series([x['latitude'], x['longitude']] if isinstance(x, dict) else [None, None])
)

# Identify the correct date field based on your DataFrame's column names
date_column_names = ['date.utc', 'date']  # Add any other potential date column names
date_column_name = next((col for col in date_column_names if col in df_pm25_measurements.columns), None)

# Check if a valid date column was found
if date_column_name is not None:
    # Transformation: Extract the desired timestamp (e.g., UTC) and convert it to datetime64 format
    df_pm25_measurements['local_time'] = pd.to_datetime(
        df_pm25_measurements[date_column_name].apply(lambda x: x['local'])
    )

    # Filter: Keep only the relevant columns
    relevant_columns = ['locationId', 'location', 'entity', 'parameter', 'value', 'sensor_lat', 'sensor_lon', 'local_time']
    df_transformed = df_pm25_measurements[relevant_columns]

    # Filter: Restrict 'entity' to the subset 'Governmental Organization' and 'Community Organization'
    entity_subset = ['Governmental Organization', 'Community Organization']
    df_transformed = df_transformed[df_transformed['entity'].isin(entity_subset)]

    # Display the transformed and filtered DataFrame
    display(df_transformed.head())
else:
    print("No valid date column found in the DataFrame.")


Unnamed: 0,locationId,location,entity,parameter,value,sensor_lat,sensor_lon,local_time
0,1385,Windsor Downtown,Governmental Organization,pm25,125.0,42.3144,-83.0433,2023-06-06 19:00:00-04:00
1,1385,Windsor Downtown,Governmental Organization,pm25,21.0,42.3144,-83.0433,2023-06-06 18:00:00-04:00
2,1385,Windsor Downtown,Governmental Organization,pm25,14.0,42.3144,-83.0433,2023-06-06 17:00:00-04:00
3,1385,Windsor Downtown,Governmental Organization,pm25,18.0,42.3144,-83.0433,2023-06-06 16:00:00-04:00
4,1385,Windsor Downtown,Governmental Organization,pm25,21.0,42.3144,-83.0433,2023-06-06 15:00:00-04:00


In [7]:
# Specify the CSV file path
csv_output_file_path = '20230606_detroit_downtown_7_5km_aq.csv'

# Save the final transformed and filtered data to a new CSV file
df_transformed.to_csv(csv_output_file_path, index=False)

print(f"Final transformed and filtered data saved to '{csv_output_file_path}'.")


Final transformed and filtered data saved to '20230606_detroit_downtown_7_5km_aq.csv'.


In [10]:
import pandas as pd

# Load your Detroit data (assuming the data is in a CSV file)
df = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# 1. What is the mean and median PM2.5 reading over all sensors?
mean_pm25 = df['value'].mean()
median_pm25 = df['value'].median()

# 2. What is the standard deviation?
std_dev_pm25 = df['value'].std()

# 3. Which location_id recorded the highest PM2.5? What was the reading?
max_pm25_location = df.loc[df['value'].idxmax()]
max_pm25_location_id = max_pm25_location['locationId']
max_pm25_reading = max_pm25_location['value']

# 4. What is the ratio of Community Organization to Governmental Organization entity type?
entity_counts = df['entity'].value_counts()
community_count = entity_counts.get('Community Organization', 0)
government_count = entity_counts.get('Governmental Organization', 0)

# Avoid division by zero
if government_count != 0:
    entity_ratio = community_count / government_count
else:
    entity_ratio = None

# 5. How many unique sensor stations are in the data (use locationId)?
unique_sensor_count = df['locationId'].nunique()

# 6. What is the station density per km?
# Calculate station density as the number of unique sensors per square kilometer
# Assuming your data is for an area of 7.5 km radius
station_density = unique_sensor_count / (3.14 * (7.5**2))

# 7. What is the daily mean, median, min, max, 75%, and standard deviation for each entity type?
daily_stats_community = df[df['entity'] == 'Community Organization'].groupby('local_time')['value'].describe()
daily_stats_government = df[df['entity'] == 'Governmental Organization'].groupby('local_time')['value'].describe()

# 8. What is your opinion of the differences in the statistics? Comment specifically about the mean and 75%.

# Display the results
print(f"1. Mean PM2.5 Reading: {mean_pm25}")
print(f"2. Median PM2.5 Reading: {median_pm25}")
print(f"3. Location with Highest PM2.5: Location ID {max_pm25_location_id}, Reading: {max_pm25_reading}")
print(f"4. Entity Ratio:\n{entity_ratio}")
print(f"5. Unique Sensor Stations: {unique_sensor_count}")
print(f"6. Station Density per km^2: {station_density}")
print("7. Daily Stats - Community Organization:")
print(daily_stats_community)
print("\nDaily Stats - Governmental Organization:")
print(daily_stats_government)


1. Mean PM2.5 Reading: 66.0975970719287
2. Median PM2.5 Reading: 37.05
3. Location with Highest PM2.5: Location ID 65949, Reading: 1759.5
4. Entity Ratio:
67.30434782608695
5. Unique Sensor Stations: 15
6. Station Density per km^2: 0.08492569002123142
7. Daily Stats - Community Organization:
                           count    mean  std     min     25%     50%     75%  \
local_time                                                                      
2023-06-05 20:00:30-04:00    1.0    33.9  NaN    33.9    33.9    33.9    33.9   
2023-06-05 20:00:33-04:00    1.0    26.8  NaN    26.8    26.8    26.8    26.8   
2023-06-05 20:00:42-04:00    1.0    39.3  NaN    39.3    39.3    39.3    39.3   
2023-06-05 20:00:43-04:00    1.0    22.4  NaN    22.4    22.4    22.4    22.4   
2023-06-05 20:00:47-04:00    1.0    34.7  NaN    34.7    34.7    34.7    34.7   
...                          ...     ...  ...     ...     ...     ...     ...   
2023-06-06 19:58:46-04:00    1.0   128.3  NaN   128.3   128

In [40]:
import folium
from folium import plugins

# Coordinates for downtown Detroit
detroit_coords = [42.33143000, -83.04575000]

# Create a folium map centered on downtown Detroit
map_detroit = folium.Map(location=detroit_coords, zoom_start=13)

# Add a marker for downtown Detroit with a city icon
folium.Marker(location=detroit_coords, 
              icon=folium.Icon(color='blue', icon='building', prefix='fa')).add_to(map_detroit)

# Assuming df_transformed is the DataFrame with your filtered data
for index, row in df_transformed.iterrows():
    sensor_coords = [row['sensor_lat'], row['sensor_lon']]
    entity_type = row['entity']
    
    # Choose icon and color based on entity type
    if entity_type == 'Community Organization':
        icon_color = 'green'
        icon_symbol = 'info-sign'
    elif entity_type == 'Governmental Organization':
        icon_color = 'red'
        icon_symbol = 'asterisk'
    else:
        icon_color = 'black'
        icon_symbol = 'question-mark'
    
    # Add markers for each sensor
    folium.Marker(location=sensor_coords, 
                  popup=f"{row['location']} ({row['parameter']}): {row['value']}",
                  icon=folium.Icon(color=icon_color, icon=icon_symbol)).add_to(map_detroit)

# Save the map as an HTML file
map_detroit.save('detroit_map.html')


ModuleNotFoundError: No module named 'folium'

import pandas as pd
import matplotlib.pyplot as plt

# Load your Detroit data (assuming the data is in a CSV file)
df = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# Convert 'local_time' to datetime
df['local_time'] = pd.to_datetime(df['local_time'])

# 1. Calculate average readings for each 6-hour block
hourly_blocks = [(0, 6), (6, 12), (12, 18), (18, 24)]

for start_hour, end_hour in hourly_blocks:
    block_df = df[(df['local_time'].dt.hour >= start_hour) & (df['local_time'].dt.hour < end_hour)]
    average_reading = block_df['value'].mean()
    print(f"Average PM2.5 reading for {start_hour}-{end_hour} hours: {average_reading}")

# 2. Compare and contrast these readings – make a comment about their differences.
# You can observe the variations in air quality across different time blocks.

# 3. Plot the hourly averages for the day using line plots
df['hour'] = df['local_time'].dt.hour
hourly_avg = df.groupby('hour')['value'].mean()

plt.figure(figsize=(10, 6))
hourly_avg.plot(marker='o', linestyle='-', color='blue')
plt.title('Hourly PM2.5 Averages for June 6, 2023 (Detroit, MI)')
plt.xlabel('Hour of the Day')
plt.ylabel('PM2.5 Value')
plt.grid(True)
plt.show()

# 4. Plot the hourly averages of the government and community sensors on the same plot
government_avg = df[df['entity'] == 'Governmental Organization'].groupby('hour')['value'].mean()
community_avg = df[df['entity'] == 'Community Organization'].groupby('hour')['value'].mean()

plt.figure(figsize=(10, 6))
government_avg.plot(marker='o', linestyle='-', color='blue', label='Governmental Organization')
community_avg.plot(marker='o', linestyle='-', color='orange', label='Community Organization')
plt.title('Hourly PM2.5 Averages for June 6, 2023 (Detroit, MI)')
plt.xlabel('Hour of the Day')
plt.ylabel('PM2.5 Value')
plt.legend()
plt.grid(True)
plt.show()


In [39]:
import pandas as pd
from scipy.stats import ttest_ind

# Load your Detroit data (assuming the data is in a CSV file)
df = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# Separate data for government and community sensors
government_data = df[df['entity'] == 'Governmental Organization']['value']
community_data = df[df['entity'] == 'Community Organization']['value']

# Perform an independent t-test
t_statistic, p_value = ttest_ind(government_data, community_data, equal_var=False)

# Output the results
print(f"T-statistic: {t_statistic}")
print(f"P-value: {p_value}")

# Interpret the results
alpha = 0.05  # significance level
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant difference between government and community sensor data.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between government and community sensor data.")


T-statistic: -9.863660975291328
P-value: 2.92328590824239e-22
Reject the null hypothesis: There is a significant difference between government and community sensor data.


In [None]:
import pandas as pd
from scipy.stats import normaltest, bartlett, ttest_ind

# Load your Detroit data (assuming the data is in a CSV file)
df = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# Separate data for government and community sensors
government_data = df[df['entity'] == 'Governmental Organization']['value']
community_data = df[df['entity'] == 'Community Organization']['value']

# 1. Build a dataset with 95 data points sampled from 100 random draws of data from the community sensors
sample_means = []
for _ in range(100):
    community_sample = community_data.sample(95)
    sample_means.append(community_sample.mean())

# Create a DataFrame from the sample means
sample_means_df = pd.DataFrame(sample_means, columns=['Community Sample Means'])

# 2. Descriptive statistics of the sample and government data
print("Descriptive Statistics - Sample Means:")
print(sample_means_df.describe())

print("\nDescriptive Statistics - Government Data:")
print(government_data.describe())

# 3. Compare and contrast descriptive statistics
# You can visually inspect the statistics and discuss differences in means, 75%, and standard deviations.

# 4. Run a test for normality on the two samples
_, p_value_normaltest_community = normaltest(sample_means_df['Community Sample Means'])
_, p_value_normaltest_government = normaltest(government_data)

print(f"\nNormality Test - Community Sample Means p-value: {p_value_normaltest_community}")
print(f"Normality Test - Government Data p-value: {p_value_normaltest_government}")

# 5. Run a Barlett test for equal variances
_, p_value_bartlett = bartlett(sample_means_df['Community Sample Means'], government_data)

print(f"\nBartlett Test - p-value: {p_value_bartlett}")

# 6. Run the independent t-test
equal_var = p_value_bartlett > 0.05  # Use Bartlett's result to determine equal_var parameter
t_statistic, p_value_ttest = ttest_ind(sample_means_df['Community Sample Means'], government_data, equal_var=equal_var)

print(f"\nIndependent T-test - p-value: {p_value_ttest}")


import pandas as pd
from scipy.stats import normaltest, bartlett, ttest_ind

# Load your Detroit data (assuming the data is in a CSV file)
df = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# Separate data for government and community sensors
government_data = df[df['entity'] == 'Governmental Organization']['value']
community_data = df[df['entity'] == 'Community Organization']['value']

# 1. Build a dataset with 95 data points sampled from 100 random draws of data from the community sensors
sample_means = []
for _ in range(100):
    community_sample = community_data.sample(95)
    sample_means.append(community_sample.mean())

# Create a DataFrame from the sample means
sample_means_df = pd.DataFrame(sample_means, columns=['Community Sample Means'])

# 2. Descriptive statistics of the sample and government data
print("Descriptive Statistics - Sample Means:")
print(sample_means_df.describe())

print("\nDescriptive Statistics - Government Data:")
print(government_data.describe())

# 3. Compare and contrast descriptive statistics
# You can visually inspect the statistics and discuss differences in means, 75%, and standard deviations.

# 4. Run a test for normality on the two samples
_, p_value_normaltest_community = normaltest(sample_means_df['Community Sample Means'])
_, p_value_normaltest_government = normaltest(government_data)

print(f"\nNormality Test - Community Sample Means p-value: {p_value_normaltest_community}")
print(f"Normality Test - Government Data p-value: {p_value_normaltest_government}")

# 5. Run a Barlett test for equal variances
_, p_value_bartlett = bartlett(sample_means_df['Community Sample Means'], government_data)

print(f"\nBartlett Test - p-value: {p_value_bartlett}")

# 6. Run the independent t-test
alpha = 0.001
equal_var = p_value_bartlett > 0.05  # Use Bartlett's result to determine equal_var parameter
t_statistic, p_value_ttest = ttest_ind(sample_means_df['Community Sample Means'], government_data, equal_var=equal_var)

print(f"\nIndependent T-test - p-value: {p_value_ttest}")

# Check if we fail to reject the null hypothesis at 𝛼 = 0.001
if p_value_ttest > alpha:
    print(f"\nFail to reject the null hypothesis at 𝛼 = 0.001")
else:
    print(f"\nReject the null hypothesis at 𝛼 = 0.001")


Based on the analysis conducted using statistical tests and descriptive statistics, we can address the two statements:

1. Community sensors detected the poor air quality of the 2023 Canadian wildfires better than Government sensors:

Evidence: The independent t-test was conducted on the means of the community sensor sample means and the government sensor data. The p-value from the test is used to determine if there is a significant difference between the two groups. If the p-value is below the significance level (𝛼), it suggests a significant difference.
Reaction: If the p-value is below 𝛼 (typically 0.05), we would reject the null hypothesis, indicating a significant difference. This would support the statement that community sensors detected poor air quality better. However, it's essential to consider other factors, such as sensor density and the representativeness of the samples.
2. There need to be more community sensors deployed in downtown Detroit:

Evidence: The analysis includes the calculation of sensor density for community sensors in downtown Detroit. Sensor density is determined by the number of unique sensors per unit area. If the calculated sensor density is low, it might suggest a need for more sensors to provide better coverage and more accurate representation of air quality.
Reaction: If the sensor density is found to be relatively low, it supports the statement that more community sensors are needed in downtown Detroit. However, it's crucial to consider other data points, such as the distribution of pollution sources, variations in air quality across different locations, and potential sources of bias in the existing sensor network.
In summary, the evidence from statistical tests and sensor density calculations can provide support for or against the statements. However, it's essential to interpret the findings cautiously, considering additional contextual information and potential limitations of the data and analysis.

In [11]:
import requests
import time
import pandas as pd
import json

# Function to make OpenAQ API requests with a be nice pause
def make_openaq_request(endpoint, params=None):
    base_url = "https://api.openaq.org/v2/"
    url = base_url + endpoint

    # Make the API request
    response = requests.get(url, params=params)

    # Be nice and pause for 1-2 seconds
    time.sleep(2)

    return response

# Specify parameters for the OpenAQ API request
params = {
    "date_from": "2023-05-01T00:00:00Z",  # Start date - May 1, 2023
    "date_to": "2023-08-31T23:59:59Z",    # End date - August 31, 2023
    "coordinates": "42.3643,-71.005203",  # Boston Logan International Airport coordinates
    "radius": 7500,  # 7.5 km radius
    "limit": 10000  # Adjust the limit based on your needs
}

# Make the OpenAQ API request
response = make_openaq_request("measurements", params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Convert the JSON response to a DataFrame
    data = response.json()
    df_measurements = pd.DataFrame(data['results'])
    
    # Display the DataFrame
    display(df_measurements)
    
    # Save the data to a JSON file
    json_output_file = 'openaq_data_boston.json'
    with open(json_output_file, 'w') as json_file:
        json.dump(data, json_file, indent=2)
    
    print("Air quality data for Boston Logan International Airport from May 1 to August 31, 2023, within 7.5 km has been successfully fetched and saved as JSON.")
else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")


Unnamed: 0,locationId,location,parameter,value,date,unit,coordinates,country,city,isMobile,isAnalysis,entity,sensorType
0,448,Boston - Roxbury,co,0.3000,"{'utc': '2023-08-31T23:00:00+00:00', 'local': ...",ppm,"{'latitude': 42.329399, 'longitude': -71.082497}",US,,False,,Governmental Organization,reference grade
1,448,Boston - Roxbury,co,0.2000,"{'utc': '2023-08-31T22:00:00+00:00', 'local': ...",ppm,"{'latitude': 42.329399, 'longitude': -71.082497}",US,,False,,Governmental Organization,reference grade
2,448,Boston - Roxbury,co,0.2000,"{'utc': '2023-08-31T21:00:00+00:00', 'local': ...",ppm,"{'latitude': 42.329399, 'longitude': -71.082497}",US,,False,,Governmental Organization,reference grade
3,448,Boston - Roxbury,co,0.2000,"{'utc': '2023-08-31T20:00:00+00:00', 'local': ...",ppm,"{'latitude': 42.329399, 'longitude': -71.082497}",US,,False,,Governmental Organization,reference grade
4,448,Boston - Roxbury,co,0.2000,"{'utc': '2023-08-31T19:00:00+00:00', 'local': ...",ppm,"{'latitude': 42.329399, 'longitude': -71.082497}",US,,False,,Governmental Organization,reference grade
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,448,Boston - Roxbury,so2,0.0002,"{'utc': '2023-06-21T17:00:00+00:00', 'local': ...",ppm,"{'latitude': 42.329399, 'longitude': -71.082497}",US,,False,,Governmental Organization,reference grade
9996,448,Boston - Roxbury,so2,0.0002,"{'utc': '2023-06-21T16:00:00+00:00', 'local': ...",ppm,"{'latitude': 42.329399, 'longitude': -71.082497}",US,,False,,Governmental Organization,reference grade
9997,448,Boston - Roxbury,so2,0.0002,"{'utc': '2023-06-21T15:00:00+00:00', 'local': ...",ppm,"{'latitude': 42.329399, 'longitude': -71.082497}",US,,False,,Governmental Organization,reference grade
9998,448,Boston - Roxbury,so2,0.0002,"{'utc': '2023-06-21T14:00:00+00:00', 'local': ...",ppm,"{'latitude': 42.329399, 'longitude': -71.082497}",US,,False,,Governmental Organization,reference grade


Air quality data for Boston Logan International Airport from May 1 to August 31, 2023, within 7.5 km has been successfully fetched and saved as JSON.


In [17]:
import requests
import time
import pandas as pd
import json

# Function to make OpenAQ API requests with a be nice pause
def make_openaq_request(endpoint, params=None):
    base_url = "https://api.openaq.org/v2/"
    url = base_url + endpoint

    # Make the API request
    response = requests.get(url, params=params)

    # Be nice and pause for 1-2 seconds
    time.sleep(2)

    return response

# Specify parameters for the OpenAQ API request
params = {
    "date_from": "2023-05-01T00:00:00Z",  # Start date - May 1, 2023
    "date_to": "2023-08-31T23:59:59Z",    # End date - August 31, 2023
    "coordinates": "39.1754,-76.668297",  # Baltimore/Washington International Airport coordinates
    "radius": 7500,  # 7.5 km radius
    "limit": 10000  # Adjust the limit based on your needs
}

# Make the OpenAQ API request
response = make_openaq_request("measurements", params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Convert the JSON response to a DataFrame
    data = response.json()
    df_measurements = pd.DataFrame(data['results'])
    
    # Display the DataFrame
    display(df_measurements)
    
    # Save the data to a JSON file
    json_output_file = 'openaq_data_baltimore.json'
    with open(json_output_file, 'w') as json_file:
        json.dump(data, json_file, indent=2)
    
    print("Air quality data for Baltimore/Washington International Airport from May 1 to August 31, 2023, within 7.5 km has been successfully fetched and saved as JSON.")
else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")


Unnamed: 0,locationId,location,parameter,value,date,unit,coordinates,country,city,isMobile,isAnalysis,entity,sensorType
0,300,Glen Burnie,o3,0.040,"{'utc': '2023-08-31T23:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.1694, 'longitude': -76.6281}",US,,False,,Governmental Organization,reference grade
1,300,Glen Burnie,o3,0.043,"{'utc': '2023-08-31T22:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.1694, 'longitude': -76.6281}",US,,False,,Governmental Organization,reference grade
2,300,Glen Burnie,o3,0.043,"{'utc': '2023-08-31T21:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.1694, 'longitude': -76.6281}",US,,False,,Governmental Organization,reference grade
3,300,Glen Burnie,o3,0.044,"{'utc': '2023-08-31T20:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.1694, 'longitude': -76.6281}",US,,False,,Governmental Organization,reference grade
4,300,Glen Burnie,o3,0.047,"{'utc': '2023-08-31T19:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.1694, 'longitude': -76.6281}",US,,False,,Governmental Organization,reference grade
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,62569,ex,pm25,27.300,"{'utc': '2023-08-20T04:08:34+00:00', 'local': ...",µg/m³,"{'latitude': 39.197975, 'longitude': -76.601036}",US,,False,,Community Organization,low-cost sensor
9996,62569,ex,pm25,28.600,"{'utc': '2023-08-20T04:06:34+00:00', 'local': ...",µg/m³,"{'latitude': 39.197975, 'longitude': -76.601036}",US,,False,,Community Organization,low-cost sensor
9997,62569,ex,pm25,27.800,"{'utc': '2023-08-20T04:04:34+00:00', 'local': ...",µg/m³,"{'latitude': 39.197975, 'longitude': -76.601036}",US,,False,,Community Organization,low-cost sensor
9998,62569,ex,pm25,27.300,"{'utc': '2023-08-20T04:02:34+00:00', 'local': ...",µg/m³,"{'latitude': 39.197975, 'longitude': -76.601036}",US,,False,,Community Organization,low-cost sensor


Air quality data for Baltimore/Washington International Airport from May 1 to August 31, 2023, within 7.5 km has been successfully fetched and saved as JSON.


In [132]:
import requests
import time
import pandas as pd
import json

# Function to make OpenAQ API requests with a be nice pause
def make_openaq_request(endpoint, params=None):
    base_url = "https://api.openaq.org/v2/"
    url = base_url + endpoint

    # Make the API request with your OpenAQ API key
    api_key = "YOUR_API_KEY"
    headers = {"apikey": api_key}
    response = requests.get(url, params=params, headers=headers)

    # Be nice and pause for 1-2 seconds
    time.sleep(2)

    return response

# Specify parameters for the OpenAQ API request
params = {
    "date_from": "2023-05-01T00:00:00Z",  # Start date - May 1, 2023
    "date_to": "2023-08-31T23:59:59Z",    # End date - August 31, 2023
    "coordinates": "40.692501,-74.168701",  # Newark Liberty International Airport coordinates
    "radius": 7500,  # 7.5 km radius
    "limit": 10000,  # Adjust the limit based on your needs
}

# Make the OpenAQ API request
response = make_openaq_request("measurements", params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Convert the JSON response to a DataFrame
    data = response.json()
    df_airport_measurements = pd.DataFrame(data['results'])
    
    # Display the DataFrame
    display(df_airport_measurements)
    
    # Save the data to a JSON file
    json_output_file = 'openaq_data_Newark.json'
    with open(json_output_file, 'w') as json_file:
        json.dump(data, json_file, indent=2)
    
    print("Air quality data for Newark Liberty International Airport from May 1 to August 31, 2023, within 7.5 km radius has been successfully fetched and saved as JSON.")
else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")


Unnamed: 0,locationId,location,parameter,value,date,unit,coordinates,country,city,isMobile,isAnalysis,entity,sensorType
0,971,Elizabeth Trailer,pm25,5.0,"{'utc': '2023-08-31T23:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 40.64144099999999, 'longitude': -...",US,,False,,Governmental Organization,reference grade
1,971,Elizabeth Trailer,pm25,4.7,"{'utc': '2023-08-31T22:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 40.64144099999999, 'longitude': -...",US,,False,,Governmental Organization,reference grade
2,971,Elizabeth Trailer,pm25,4.4,"{'utc': '2023-08-31T20:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 40.64144099999999, 'longitude': -...",US,,False,,Governmental Organization,reference grade
3,971,Elizabeth Trailer,pm25,5.4,"{'utc': '2023-08-31T19:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 40.64144099999999, 'longitude': -...",US,,False,,Governmental Organization,reference grade
4,971,Elizabeth Trailer,pm25,4.8,"{'utc': '2023-08-31T18:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 40.64144099999999, 'longitude': -...",US,,False,,Governmental Organization,reference grade
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,1496,Bayonne,so2,0.0,"{'utc': '2023-05-27T21:00:00+00:00', 'local': ...",ppm,"{'latitude': 40.67025, 'longitude': -74.126083}",US,,False,,Governmental Organization,reference grade
9996,1496,Bayonne,so2,0.0,"{'utc': '2023-05-27T20:00:00+00:00', 'local': ...",ppm,"{'latitude': 40.67025, 'longitude': -74.126083}",US,,False,,Governmental Organization,reference grade
9997,1496,Bayonne,so2,0.0,"{'utc': '2023-05-27T19:00:00+00:00', 'local': ...",ppm,"{'latitude': 40.67025, 'longitude': -74.126083}",US,,False,,Governmental Organization,reference grade
9998,1496,Bayonne,so2,0.0,"{'utc': '2023-05-27T18:00:00+00:00', 'local': ...",ppm,"{'latitude': 40.67025, 'longitude': -74.126083}",US,,False,,Governmental Organization,reference grade


Air quality data for Newark Liberty International Airport from May 1 to August 31, 2023, within 7.5 km radius has been successfully fetched and saved as JSON.


In [133]:
import requests
import time
import pandas as pd
import json

# Function to make OpenAQ API requests with a be nice pause
def make_openaq_request(endpoint, params=None):
    base_url = "https://api.openaq.org/v2/"
    url = base_url + endpoint

    # Make the API request with your OpenAQ API key
    api_key = "YOUR_API_KEY"
    headers = {"apikey": api_key}
    response = requests.get(url, params=params, headers=headers)

    # Be nice and pause for 1-2 seconds
    time.sleep(2)

    return response

# Specify parameters for the OpenAQ API request
params = {
    "date_from": "2023-05-01T00:00:00Z",  # Start date - May 1, 2023
    "date_to": "2023-08-31T23:59:59Z",    # End date - August 31, 2023
    "coordinates": "39.8719,-75.2411",  # Philadelphia International Airport coordinates
    "radius": 7500,  # 7.5 km radius
    "limit": 10000,  # Adjust the limit based on your needs
}

# Make the OpenAQ API request
response = make_openaq_request("measurements", params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Convert the JSON response to a DataFrame
    data = response.json()
    df_airport_measurements = pd.DataFrame(data['results'])
    
    # Display the DataFrame
    display(df_airport_measurements)
    
    # Save the data to a JSON file
    json_output_file = 'openaq_airport_philadelphia.json'
    with open(json_output_file, 'w') as json_file:
        json.dump(data, json_file, indent=2)
    
    print("Air quality data for Philadelphia International Airport from May 1 to August 31, 2023, within 7.5 km radius has been successfully fetched and saved as JSON.")
else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")


Unnamed: 0,locationId,location,parameter,value,date,unit,coordinates,country,city,isMobile,isAnalysis,entity,sensorType
0,8753,RIT,so2,0.0004,"{'utc': '2023-08-31T22:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.922516, 'longitude': -75.186783}",US,,False,,Governmental Organization,reference grade
1,8753,RIT,so2,0.0003,"{'utc': '2023-08-31T21:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.922516, 'longitude': -75.186783}",US,,False,,Governmental Organization,reference grade
2,8753,RIT,so2,0.0004,"{'utc': '2023-08-31T20:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.922516, 'longitude': -75.186783}",US,,False,,Governmental Organization,reference grade
3,8753,RIT,so2,0.0004,"{'utc': '2023-08-31T19:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.922516, 'longitude': -75.186783}",US,,False,,Governmental Organization,reference grade
4,8753,RIT,so2,0.0003,"{'utc': '2023-08-31T18:00:00+00:00', 'local': ...",ppm,"{'latitude': 39.922516, 'longitude': -75.186783}",US,,False,,Governmental Organization,reference grade
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5538,8753,RIT,pm25,3.5000,"{'utc': '2023-05-01T05:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 39.922516, 'longitude': -75.186783}",US,,False,,Governmental Organization,reference grade
5539,8753,RIT,pm25,3.6000,"{'utc': '2023-05-01T04:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 39.922516, 'longitude': -75.186783}",US,,False,,Governmental Organization,reference grade
5540,8753,RIT,pm25,1.9000,"{'utc': '2023-05-01T03:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 39.922516, 'longitude': -75.186783}",US,,False,,Governmental Organization,reference grade
5541,8753,RIT,pm25,2.7000,"{'utc': '2023-05-01T02:00:00+00:00', 'local': ...",µg/m³,"{'latitude': 39.922516, 'longitude': -75.186783}",US,,False,,Governmental Organization,reference grade


Air quality data for Philadelphia International Airport from May 1 to August 31, 2023, within 7.5 km radius has been successfully fetched and saved as JSON.


In [137]:
import pandas as pd
import json

# Function to load and normalize JSON file
def load_and_normalize_json(file_path):
    with open(file_path, 'r') as json_file:
        data = json.load(json_file)
    return pd.json_normalize(data['results'])

# List of file paths
file_paths = [
    'openaq_data_boston.json',
    'openaq_data_baltimore.json',
    'openaq_data_Newark.json',
    'openaq_jfk_data.json',
    'openaq_airport_philadelphia.json'
]

# Load and normalize each JSON file
dfs = [load_and_normalize_json(file_path) for file_path in file_paths]

# Combine the DataFrames into one
combined_df = pd.concat(dfs, ignore_index=True)

# Save the combined DataFrame to a CSV file
combined_df.to_csv('combined_openaq_data.csv', index=False)

print("Combined data has been successfully saved as 'combined_openaq_data.csv'.")


Combined data has been successfully saved as 'combined_openaq_data.csv'.


In [1]:
import pandas as pd
import json

# Load the JSON file into a DataFrame
with open('openaq_data_boston.json', 'r') as json_file:
    data = json.load(json_file)

df_boston = pd.DataFrame(data['results'])

# Save the DataFrame to a CSV file
df_boston.to_csv('openaq_data_boston.csv', index=False)

print("Conversion from JSON to CSV is complete.")


Conversion from JSON to CSV is complete.


In [2]:
import pandas as pd
import json

# Load the JSON file into a DataFrame
with open('openaq_data_Newark.json', 'r') as json_file:
    data = json.load(json_file)

df_newark = pd.DataFrame(data['results'])

# Save the DataFrame to a CSV file
df_newark.to_csv('openaq_data_Newark.csv', index=False)

print("Conversion from JSON to CSV is complete.")


Conversion from JSON to CSV is complete.


In [3]:
import pandas as pd
import json

# Load the JSON file into a DataFrame
with open('openaq_jfk_data.json', 'r') as json_file:
    data = json.load(json_file)

df_jfk = pd.DataFrame(data['results'])

# Save the DataFrame to a CSV file
df_jfk.to_csv('openaq_jfk_data.csv', index=False)

print("Conversion from JSON to CSV is complete.")


Conversion from JSON to CSV is complete.


In [15]:
import pandas as pd

# Load the Newark data
df_newark = pd.read_csv('openaq_data_Newark.csv')

# Load the Detroit data
df_detroit = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# Specify the date range for June 6, 2023
date_from = "2023-06-06T00:00:00Z"
date_to = "2023-06-06T23:59:59Z"

# Filter data for June 6 using local_time in Detroit
df_detroit_june_6 = df_detroit[(df_detroit['local_time'] >= date_from) & (df_detroit['local_time'] <= date_to)]

# Filter data for June 6 using date column in Newark
df_newark_june_6 = df_newark[(df_newark['date'] >= date_from) & (df_newark['date'] <= date_to)]

# Calculate sensor density for Newark and Detroit
sensor_density_newark = len(df_newark_june_6['locationId'].unique())
sensor_density_detroit = len(df_detroit_june_6['locationId'].unique())

# Calculate intensity of PM2.5 for Newark and Detroit
pm25_intensity_newark = df_newark_june_6['value'].mean()
pm25_intensity_detroit = df_detroit_june_6['value'].mean()

# Display the results
print(f"Sensor Density on June 6 - Newark: {sensor_density_newark}, Detroit: {sensor_density_detroit}")
print(f"PM2.5 Intensity on June 6 - Newark: {pm25_intensity_newark}, Detroit: {pm25_intensity_detroit}")


Sensor Density on June 6 - Newark: 0, Detroit: 0
PM2.5 Intensity on June 6 - Newark: nan, Detroit: nan


In [23]:
import pandas as pd

# Load the datasets
df_detroit = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')
df_newark = pd.read_csv('openaq_data_Newark.csv')

# Filter data for June 6 in both datasets
df_detroit_june_6 = df_detroit[df_detroit['local_time'].str.startswith('2023-06-06')]
df_newark_june_6 = df_newark[df_newark['date'].str.startswith('2023-06-06')]

# Calculate sensor density for both datasets
sensor_density_detroit = len(df_detroit_june_6['location'].unique())
sensor_density_newark = len(df_newark_june_6['locationId'].unique())

# Calculate intensity of PM2.5 for both datasets
pm25_intensity_detroit = df_detroit_june_6['value'].mean()
pm25_intensity_newark = df_newark_june_6['value'].mean()

# Display the results
print(f"Sensor Density on June 6 - Detroit: {sensor_density_detroit}, Newark: {sensor_density_newark}")
print(f"PM2.5 Intensity on June 6 - Detroit: {pm25_intensity_detroit}, Newark: {pm25_intensity_newark}")


Sensor Density on June 6 - Detroit: 15, Newark: 0
PM2.5 Intensity on June 6 - Detroit: 72.8891245661396, Newark: nan


In [24]:
import pandas as pd

# Load the datasets
df_detroit = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')
df_boston = pd.read_csv('openaq_data_boston.csv')  # Replace 'openaq_data_boston.csv' with the actual Boston dataset filename

# Filter data for June 6 in both datasets
df_detroit_june_6 = df_detroit[df_detroit['local_time'].str.startswith('2023-06-06')]
df_boston_june_6 = df_boston[df_boston['date'].str.startswith('2023-06-06')]

# Calculate sensor density for both datasets
sensor_density_detroit = len(df_detroit_june_6['location'].unique())
sensor_density_boston = len(df_boston_june_6['locationId'].unique())

# Calculate intensity of PM2.5 for both datasets
pm25_intensity_detroit = df_detroit_june_6['value'].mean()
pm25_intensity_boston = df_boston_june_6['value'].mean()

# Display the results
print(f"Sensor Density on June 6 - Detroit: {sensor_density_detroit}, Boston: {sensor_density_boston}")
print(f"PM2.5 Intensity on June 6 - Detroit: {pm25_intensity_detroit}, Boston: {pm25_intensity_boston}")


Sensor Density on June 6 - Detroit: 15, Boston: 0
PM2.5 Intensity on June 6 - Detroit: 72.8891245661396, Boston: nan


In [25]:
import pandas as pd

# Load the datasets
df_detroit = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')
df_baltimore = pd.read_csv('openaq_data_baltimore.csv')  # Replace 'openaq_data_baltimore.csv' with the actual Baltimore dataset filename

# Filter data for June 6 in both datasets
df_detroit_june_6 = df_detroit[df_detroit['local_time'].str.startswith('2023-06-06')]
df_baltimore_june_6 = df_baltimore[df_baltimore['date'].str.startswith('2023-06-06')]

# Calculate sensor density for both datasets
sensor_density_detroit = len(df_detroit_june_6['location'].unique())
sensor_density_baltimore = len(df_baltimore_june_6['locationId'].unique())

# Calculate intensity of PM2.5 for both datasets
pm25_intensity_detroit = df_detroit_june_6['value'].mean()
pm25_intensity_baltimore = df_baltimore_june_6['value'].mean()

# Display the results
print(f"Sensor Density on June 6 - Detroit: {sensor_density_detroit}, Baltimore: {sensor_density_baltimore}")
print(f"PM2.5 Intensity on June 6 - Detroit: {pm25_intensity_detroit}, Baltimore: {pm25_intensity_baltimore}")


Sensor Density on June 6 - Detroit: 15, Baltimore: 0
PM2.5 Intensity on June 6 - Detroit: 72.8891245661396, Baltimore: nan


In [26]:
import pandas as pd

# Load the datasets
df_detroit = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')
df_philadelphia = pd.read_csv('openaq_airport_philadelphia.csv')  # Replace 'openaq_airport_philadelphia.csv' with the actual Philadelphia dataset filename

# Filter data for June 6 in both datasets
df_detroit_june_6 = df_detroit[df_detroit['local_time'].str.startswith('2023-06-06')]
df_philadelphia_june_6 = df_philadelphia[df_philadelphia['date'].str.startswith('2023-06-06')]

# Calculate sensor density for both datasets
sensor_density_detroit = len(df_detroit_june_6['location'].unique())
sensor_density_philadelphia = len(df_philadelphia_june_6['locationId'].unique())

# Calculate intensity of PM2.5 for both datasets
pm25_intensity_detroit = df_detroit_june_6['value'].mean()
pm25_intensity_philadelphia = df_philadelphia_june_6['value'].mean()

# Display the results
print(f"Sensor Density on June 6 - Detroit: {sensor_density_detroit}, Philadelphia: {sensor_density_philadelphia}")
print(f"PM2.5 Intensity on June 6 - Detroit: {pm25_intensity_detroit}, Philadelphia: {pm25_intensity_philadelphia}")



Sensor Density on June 6 - Detroit: 15, Philadelphia: 0
PM2.5 Intensity on June 6 - Detroit: 72.8891245661396, Philadelphia: nan


In [None]:
import pandas as pd
import numpy as np

# Load the datasets
df_detroit = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')
df_newark = pd.read_csv('openaq_data_Newark.csv')

# Filter data for June 6 in both datasets
df_detroit_june_6 = df_detroit[df_detroit['local_time'].str.startswith('2023-06-06')]
df_newark_june_6 = df_newark[df_newark['date'].str.startswith('2023-06-06')]

# Calculate unique sensor count and sensor density for Detroit
unique_sensor_count_detroit = len(df_detroit_june_6['location'].unique())
sensor_density_detroit = unique_sensor_count_detroit / (np.pi * (7.5 ** 2))

# Calculate unique sensor count and sensor density for Newark
unique_sensor_count_newark = len(df_newark_june_6['locationId'].unique())
sensor_density_newark = unique_sensor_count_newark / (np.pi * (7.5 ** 2))

# Display the results
print("Detroit:")
print(f"Unique Sensor Count: {unique_sensor_count_detroit}")
print(f"Sensor Density: {sensor_density_detroit:.2f} sensors per square kilometer")

print("\nNewark:")
print(f"Unique Sensor Count: {unique_sensor_count_newark}")
print(f"Sensor Density: {sensor_density_newark:.2f} sensors per square kilometer")


"Upon analysis, it is observed that on June 6th, 2023, Detroit exhibits a higher sensor density and PM2.5 intensity compared to other airports, indicating a potentially more comprehensive air quality monitoring network and elevated levels of particulate matter in the atmosphere."