<a href="https://colab.research.google.com/github/SM24-Industrial-Software-Dev/ML-forecasting-NOx-levels/blob/ES-8-CBSA-Ingester/Demos/Simple_MSA_Map.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## This is the setup code

In [1]:
# Imports and Installations
from google.colab import userdata
import ee
import requests
import zipfile
import io
import geemap

!pip install pycrs

Collecting pycrs
  Downloading PyCRS-1.0.2.tar.gz (36 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pycrs
  Building wheel for pycrs (setup.py) ... [?25l[?25hdone
  Created wheel for pycrs: filename=PyCRS-1.0.2-py3-none-any.whl size=32687 sha256=19162185e8e043159aa22f0cd3075943390358ab0a2395168ea18760dabce70d
  Stored in directory: /root/.cache/pip/wheels/47/1d/70/7a5bdf33347e7c75e95b06b1fa38f076a59a9506653cc24aff
Successfully built pycrs
Installing collected packages: pycrs
Successfully installed pycrs-1.0.2


In [2]:
# Authenticate GEE
credentials = ee.ServiceAccountCredentials("yeshiva-summer-2024-1@yu-summer-2024.iam.gserviceaccount.com", key_data=userdata.get('GCP_CREDENTIALS'))
ee.Initialize(credentials = credentials, project='yu-summer-2024', opt_url='https://earthengine-highvolume.googleapis.com')

## This is the ingester code

In [3]:
# The method to retrieve a CBSA shapefile from the Census Bureau website and ingest it as a FeatureCollection of MSAs
def ingest_cbsa_shapefile(year: int = 2023, resolution: str = '5m'):
  """
  Downloads a CBSA shapefile from the US Census Bureau, uploads it to Google Earth Engine (GEE), and returns a FeatureCollection of MSAs

  Args:
      year (int): The year of the shapefile (2023 by default, can range from 2014-2023)
      resolution (str): The resolution of the shapefile (5m by default, can be 500k, 5m, or 20m)

  Returns:
      FeatureCollection: A FeatureCollection of Metropolitan Statistical Areas (MSAs)
  """
  # Validate the year and resolution inputs, setting to default if invalid
  if year < 2014:
      year = 2014
  elif year > 2023:
      year = 2023
  if resolution not in ['5m', '500k', '20m']:
      resolution = '5m'

  filename = f'cb_{year}_us_cbsa_{resolution}'
  # Download the shapefile
  response = requests.get(f'https://www2.census.gov/geo/tiger/GENZ{year}/shp/{filename}.zip')

  # Extract the shapefile
  with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
      zip_ref.extractall()

  # Upload the shapefile to GEE by reading it with Latin-1 encoding, which is commonly used for shapefiles
  cbsas = geemap.shp_to_ee(f'{filename}.shp', encoding='latin1')

  # Then filter for Metropolitan Statistical Areas (MSAs), which are CBSAs with a population > 50k
  return cbsas.filter(ee.Filter.eq('LSAD', 'M1'))

# The actual retrieval
msa_geometries = ingest_cbsa_shapefile()

## This is sample code that displays every US MSA on top of Google Maps

In [6]:
Map = geemap.Map(center=[39, -99], zoom=4)
Map.addLayer(msa_geometries, {}, 'MSAs')

Map

Map(center=[39, -99], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(chi…