In [None]:
import pandas as pd
import geopandas as gpd
from geocube.api.core import make_geocube
from geocube.rasterize import rasterize_image
from functools import partial
from rasterio.enums import MergeAlg
from shapely.geometry import LineString
import matplotlib as plt
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt2
import rasterio as rio
import numpy as np
from scipy import stats
import requests
from io import BytesIO
from zipfile import ZipFile
from functools import partial
import pygeohash as gh
from shapely.geometry import Polygon

In [None]:
url = 'https://raw.githubusercontent.com/IsamAljawarneh/datasets/master/data/NYC_AQ.csv'
NYC_AQ = pd.read_csv(url)

url = 'https://raw.githubusercontent.com/IsamAljawarneh/datasets/master/data/nyc_polygon.geojson'
NYC_geojson = gpd.read_file(url)

In [None]:
print(NYC_AQ.head())
print(NYC_geojson.head())

In [None]:
def Data_pre_processing(NYC_AQ):
    columns_to_keep = ['latitude', 'longitude', 'pm25']
    NYC_AQ = NYC_AQ[columns_to_keep]

    NYC_AQ = NYC_AQ[(NYC_AQ['latitude'] != 0) & (NYC_AQ['longitude'] != 0)]

    NYC_AQ['longitude'] = pd.to_numeric(NYC_AQ['longitude'])
    NYC_AQ['latitude'] = pd.to_numeric(NYC_AQ['latitude'])
    return NYC_AQ

In [None]:
NYC_AQ = Data_pre_processing(NYC_AQ)
print(NYC_AQ)

In [None]:
geohash_precision = 7
NYC_AQ['geohash']=NYC_AQ.apply(lambda x: gh.encode(x.latitude, x.longitude, precision=geohash_precision), axis=1)
NYC_AQ.head()

In [None]:
NYC_AQ = gpd.GeoDataFrame(NYC_AQ, geometry = gpd.points_from_xy(x=NYC_AQ['longitude'], y=NYC_AQ['latitude']))
NYC_AQ = NYC_AQ.set_crs('EPSG:4326')
NYC_AQ

In [None]:
def decode_geohash(geohash):
    """Decode the geohash to its bounding box (longitude and latitude ranges)."""

    # Geohash character-to-binary mapping
    base32_map = '0123456789bcdefghjkmnpqrstuvwxyz'
    base32_dict = {char: "{:05b}".format(i) for i, char in enumerate(base32_map)}

    # Split geohash into bits for longitude and latitude
    bits = ''.join(base32_dict[c] for c in geohash)
    lon_bits = bits[::2]
    lat_bits = bits[1::2]

    # Function to decode bits to a range
    def decode_range(bits, range_min, range_max):
        for bit in bits:
            mid = (range_min + range_max) / 2
            if bit == '1':
                range_min = mid
            else:
                range_max = mid
        return (range_min, range_max)

    # Decode longitude and latitude ranges
    lon_range = decode_range(lon_bits, -180, 180)
    lat_range = decode_range(lat_bits, -90, 90)

    # Return the bounding box as a dictionary
    return {
        'w': lon_range[0],
        'e': lon_range[1],
        's': lat_range[0],
        'n': lat_range[1],
    }

In [None]:
def bbox_to_polygon(temp):
    temp['geometry'] = temp['bbox'].apply(lambda b: Polygon([
    (b['w'], b['s']),
    (b['w'], b['n']),
    (b['e'], b['n']),
    (b['e'], b['s'])
                    ]))
    return temp

In [None]:
def geohash_to_polygons(NYC_AQ):
    temp = pd.DataFrame(columns=['bbox'])
    temp['bbox'] = NYC_AQ['geohash'].apply(decode_geohash)
    temp = bbox_to_polygon(temp)
    NYC_AQ = gpd.GeoDataFrame(NYC_AQ, geometry = temp['geometry'])
    return NYC_AQ

In [None]:
NYC_AQ = geohash_to_polygons(NYC_AQ)
NYC_AQ.head()

In [None]:
def decode_geohash(geohash):
    latitude, longitude, _, _ = gh.decode_exactly(geohash)
    return pd.Series([latitude, longitude])

In [None]:
NYC_AQ[['geohash_center_latitude', 'geohash_center_longitude']] = NYC_AQ['geohash'].apply(decode_geohash)
NYC_AQ

In [None]:
NYC_AQ['geohash_encoding_key'] = geohash_precision
NYC_AQ.head()

In [None]:
def calc_pm25_avg(sample_NYC_AQ):
    avg_pm25 = sample_NYC_AQ.groupby('geohash', as_index=False).agg({'pm25': 'mean'})
    avg_pm25.rename(columns={'pm25': 'avg_pm25'}, inplace=True)

    sample_NYC_AQ = sample_NYC_AQ.merge(avg_pm25, on = 'geohash', how = 'left')
    
    return sample_NYC_AQ

In [None]:
sampling_fraction = 1
samples_number = 1
sampels_path = r'./generated_samples/'
# %%time
for i in range(samples_number):
    NYC_AQ_sample = NYC_AQ.groupby('geohash', as_index=False).apply(lambda x: x.sample(frac = sampling_fraction))
    
    NYC_AQ_sample = calc_pm25_avg(NYC_AQ_sample)
    NYC_AQ_sample = NYC_AQ_sample.drop_duplicates(subset='geohash')
    AQ_NYC_raster = make_geocube(
        vector_data = NYC_AQ_sample,
        measurements = ['avg_pm25', 'geohash_center_latitude', 'geohash_center_longitude', 'geohash_encoding_key'],
        resolution = (-10, 10),
        rasterize_function=partial(rasterize_image, merge_alg=MergeAlg.add),
        fill= 0,
        output_crs="EPSG:32618")

    imagename = f'AQ_NYC_sample_{i+1}.tiff'  
    full_path = f'{sampels_path}{imagename}'
    AQ_NYC_raster.rio.to_raster(full_path)




In [None]:
# Load the raster file
with rio.open(full_path) as src:
    data = src.read(1)  # read the first band

# Set up the figure
plt2.figure(figsize=(10, 10))

# Display the raster data with a colormap
plt2.imshow(data, cmap='viridis', vmin=0, vmax=10)  # set the range of the colormap
plt2.colorbar(label='PM2.5 Levels')  # add a color bar

# Add titles and labels if necessary
plt2.title('PM2.5 Distribution in NYC')
plt2.xlabel('Longitude Index')
plt2.ylabel('Latitude Index')

# Show the plot
plt2.show()

In [None]:
# Load the raster file
with rio.open(r"C:\Users\user\Desktop\raster\ds-project-raster-compare\code\generated_samples\AQ_NYC_sample_3.tiff") as src:
    data = src.read(1)  # read the first band

# Set up the figure
plt2.figure(figsize=(10, 10))

# Display the raster data with a colormap
plt2.imshow(data, cmap='viridis', vmin=0, vmax=10)  # set the range of the colormap
plt2.colorbar(label='PM2.5 Levels')  # add a color bar

# Add titles and labels if necessary
plt2.title('PM2.5 Distribution in NYC')
plt2.xlabel('Longitude Index')
plt2.ylabel('Latitude Index')

# Show the plot
plt2.show()