<a href="https://colab.research.google.com/github/SM24-Industrial-Software-Dev/ML-forecasting-NOx-levels/blob/ES-12-MSA-Class/Demos/Selective_Request_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### This is the setup code

In [None]:
# Imports and Installations
from google.colab import userdata
import ee
import requests
import zipfile
import io
import geemap

!pip install pycrs

Collecting pycrs
  Downloading PyCRS-1.0.2.tar.gz (36 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pycrs
  Building wheel for pycrs (setup.py) ... [?25l[?25hdone
  Created wheel for pycrs: filename=PyCRS-1.0.2-py3-none-any.whl size=32687 sha256=c927e2bf50d255d4b1846f3cd5f4d4599e88e5ea08799ac73ed1e8ee8d8c745d
  Stored in directory: /root/.cache/pip/wheels/47/1d/70/7a5bdf33347e7c75e95b06b1fa38f076a59a9506653cc24aff
Successfully built pycrs
Installing collected packages: pycrs
Successfully installed pycrs-1.0.2


In [None]:
# Authenticate GEE
credentials = ee.ServiceAccountCredentials("yeshiva-summer-2024-1@yu-summer-2024.iam.gserviceaccount.com", key_data=userdata.get('GCP_CREDENTIALS'))
ee.Initialize(credentials = credentials, project='yu-summer-2024', opt_url='https://earthengine-highvolume.googleapis.com')

### This is the code for a class used to obtain a list of all MSAs and their boundaries

In [None]:
# The class definition
class MSA:
    def __init__(self, year: int = 2023):
        """
        Initializes an object representing a collection of Metropolitan Statistical Areas (MSAs)

        Args:
            year (int): The year of the shapefile (2023 by default, can range from 2014-2023)
        """
        self._year = year
        self._year_checker()
        self._msa_medium_res = self._retrieve_msas()
        self._names = self._msa_medium_res.aggregate_array('NAME').getInfo()

    @property
    def all_areas(self) -> ee.FeatureCollection:
      """
      Returns:
        A FeatureCollection containing all the MSAs.
      """
      return self._msa_medium_res

    @property
    def names(self) -> list[str]:
      """
      Returns a list of all the MSA names.
      """
      return self._names

    def areas_by_name(self, names: str | list[str]) -> ee.FeatureCollection:
      """
      Filters a FeatureCollection of MSAs by the selected name(s).

      Args:
        names (str or list[str]): The name(s) to filter by.

      Returns:
        A FeatureCollection containing the MSAs matching the provided name(s).
      """
      if isinstance(names, str):
        names = [names]
      return self._msa_medium_res.filter(ee.Filter.inList('NAME', names))

    # The method to retrieve a shapefile conaining all Core-Based Statistical Areas (CBSAs)
    # Only used so far to obtain the medium resolution shapefile, at a resolution of 1:5,000,000
    def _retrieve_msas(self, resolution='5m'):
      """
      Downloads a CBSA shapefile from the US Census Bureau, uploads it to Google Earth Engine (GEE), and returns a FeatureCollection of MSAs

      Args:
          resolution (str): The resolution of the shapefile (5m by default, can be 500k, 5m, or 20m)

      Returns:
          FeatureCollection: A FeatureCollection of Metropolitan Statistical Areas (MSAs)
      """
      filename = f'cb_{self._year}_us_cbsa_{resolution}'
      # Download the shapefile
      response = requests.get(f'https://www2.census.gov/geo/tiger/GENZ{self._year}/shp/{filename}.zip')

      # Check if the request was successful
      if response.status_code != 200:
          raise Exception(f"Failed to download shapefile. Status code: {response.status_code}")

      # Extract the shapefile
      with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
          zip_ref.extractall()

      # Upload the shapefile to GEE by reading it with Latin-1 encoding, which is commonly used for shapefiles
      cbsas = geemap.shp_to_ee(f'{filename}.shp', encoding='latin1')

      # Then filter for Metropolitan Statistical Areas (MSAs), which are CBSAs with a population > 50k
      return cbsas.filter(ee.Filter.eq('LSAD', 'M1'))

    def _year_checker(self):
      """
      Checks if the year is within the valid range.
      """
      if self._year < 2014:
        self._year = 2014
      elif self._year > 2023:
        self._year = 2023

### This is the demo code

In [None]:
msa_names = ['New York-Newark-Jersey City, NY-NJ', 'Los Angeles-Long Beach-Anaheim, CA']
msa = MSA()
msa_geometries_by_names = msa.areas_by_name(msa_names)

m = geemap.Map(center=(39, -70), zoom=4)
m.addLayer(msa_geometries_by_names)

m

Map(center=[39, -70], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(chi…