In [None]:
import pandas as pd
import geopandas as gpd
from geocube.api.core import make_geocube
from geocube.rasterize import rasterize_image
from functools import partial
from rasterio.enums import MergeAlg
from shapely.geometry import LineString
import matplotlib as plt
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt2
import rasterio as rio
import numpy as np
from scipy import stats
import requests
from io import BytesIO
from zipfile import ZipFile

In [None]:
url = 'https://raw.githubusercontent.com/IsamAljawarneh/datasets/master/data/NYC_AQ.csv'

# Read the CSV file into a DataFrame
NYC_AQ = pd.read_csv(url)

# Display the first few rows of the DataFrame
print(NYC_AQ.head())

In [None]:
columns_to_keep = ['latitude', 'longitude', 'pm25']
NYC_AQ = NYC_AQ[columns_to_keep]

print(NYC_AQ)

In [None]:
# Convert Long Lat into numeric type
NYC_AQ['longitude'] = pd.to_numeric(NYC_AQ['longitude'])
NYC_AQ['latitude'] = pd.to_numeric(NYC_AQ['latitude'])

In [None]:
AQ_points = gpd.GeoDataFrame(geometry = gpd.points_from_xy(x=NYC_AQ['longitude'], y=NYC_AQ['latitude']))

In [None]:
AQ_points = AQ_points.set_crs('EPSG:4326')
AQ_points = AQ_points.to_crs('EPSG:3857')
AQ_points['pm25'] = NYC_AQ['pm25']
AQ_points = AQ_points[AQ_points.is_valid]
AQ_points = AQ_points[~AQ_points.is_empty]
print(AQ_points)

In [None]:
# prompt: display all unique pm25 values in the last column of AQ_points

unique_pm25_values = AQ_points['pm25'].unique()
print(unique_pm25_values)


In [None]:
# prompt: display the total value count of pm25

pm25_value_counts = AQ_points['pm25'].value_counts()
print(pm25_value_counts)


In [None]:
# Step 2: Rasterize the Data
# Define the bounds and resolution
bounds = AQ_points.total_bounds  # [minx, miny, maxx, maxy]
resolution = 50
width = int((bounds[2] - bounds[0]) / resolution)
height = int((bounds[3] - bounds[1]) / resolution)

# Create a transform
transform = from_origin(bounds[0], bounds[3], resolution, resolution)

# Create a raster array and fill it with a nodata value
pm25_raster = np.full((height, width), -9999, dtype='float32')

# Function to convert coordinates to raster indices
def coords_to_indices(x, y, transform):
    col, row = ~transform * (x, y)
    return int(row), int(col)

# Fill the raster array with PM2.5 values
for point, value in zip(AQ_points.geometry, AQ_points['pm25']):
    row, col = coords_to_indices(point.x, point.y, transform)
    if 0 <= row < height and 0 <= col < width:
        pm25_raster[row, col] = value


file_path = r'/content/drive/MyDrive/AQ_NYC_raster.tiff'

# Create and write the GeoTIFF
with rasterio.open(
    file_path, 'w', driver='GTiff',
    height=pm25_raster.shape[0], width=pm25_raster.shape[1],
    count=1, dtype=pm25_raster.dtype,
    crs='+proj=latlong', transform=transform,
    nodata=-9999
) as dst:
    dst.write(pm25_raster, 1)

# Plotting the data
fig, ax = plt.subplots(figsize=(10, 10))
show(pm25_raster, transform=transform, ax=ax, cmap='viridis', title='PM2.5 Air Quality')
plt.show()