<a href="https://colab.research.google.com/github/SM24-Industrial-Software-Dev/ML-forecasting-NOx-levels/blob/get_Data-API/get_Data_API_2_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import auth
from google.api_core import retry
from IPython.display import HTML, Image
from matplotlib import pyplot as plt
import seaborn as sns
from numpy.lib import recfunctions as rfn

import concurrent
import ee
import geemap
import google
import io
import multiprocessing
import numpy as np
import requests
import tensorflow as tf
import pandas as pd
import math


# REPLACE WITH YOUR PROJECT!
PROJECT = 'yu-summer-2024'

auth.authenticate_user()


credentials, _ = google.auth.default()
ee.Initialize(credentials, project=PROJECT, opt_url='https://earthengine-highvolume.googleapis.com')


In [2]:
import geopy
# importing geopy library
from geopy.geocoders import Nominatim
loc = Nominatim(user_agent="GetLoc")


In [4]:
import geopy
# importing geopy library
from geopy.geocoders import Nominatim

def get_Data(locations, sdate, edate, cloudmasking):
  loc = Nominatim(user_agent="GetLoc")

  adminUnits=ee.FeatureCollection(
    'FAO/GAUL_SIMPLIFIED_500m/2015/level2')

  geoCollection = []
  for location in locations:
          getloc = loc.geocode(location)
          coordinates = ee.Geometry.Point(getloc.longitude, getloc.latitude)
          geoCollection.append(ee.Feature(coordinates, {'region': location}))


  geometries = ee.FeatureCollection(geoCollection)

  adminSelect=adminUnits.filterBounds(geometries)

  def copyGeometryProps(index):
      source = ee.Feature(geometries.toList(geometries.size()).get(index))
      dest = ee.Feature(adminSelect.toList(adminSelect.size()).get(index))
      ftr = dest.copyProperties(source)
      return ftr

  seq = ee.List.sequence(0, adminSelect.size().subtract(1))
  adminSelect = ee.FeatureCollection(seq.map(copyGeometryProps))

  no2Raw = ee.ImageCollection('COPERNICUS/S5P/OFFL/L3_NO2')

  CLOUD_MASK_FRACTION = cloudmasking # You can play around with this value.
  def maskClouds(image):
    cf = image.select('cloud_fraction')
    mask=cf.lte(CLOUD_MASK_FRACTION)
    return image.updateMask(mask).copyProperties(image)

  # get start and end dates for sentinel 5P data
  try:
    startDate = ee.Date(sdate)
    endDate = ee.Date(edate)
  except:
    print("Invalid dates")
    return None

  ndays = endDate.difference(startDate, 'days')

  def create_date_list_fn(startDate):
    def date_list_fn(days):
      return startDate.advance(days, 'days')
    return date_list_fn

  # generate the list of dates

  date_list_fn = create_date_list_fn(startDate)
  list_of_dates = ee.List.sequence(0, ndays, 1).map(date_list_fn)

  def image_mediancomposite_by_date(date):
    return ee.ImageCollection('COPERNICUS/S5P/OFFL/L3_NO2')\
    .filterDate(ee.Date(date), ee.Date(date).advance(1, 'day'))\
    .map(maskClouds) \
    .select('tropospheric_NO2_column_number_density')\
    .median()\
    .set('system:time_start', ee.Date(date).millis())

  no2 = ee.ImageCollection(
        ee.List.sequence(0, ndays, 1)\
        .map(date_list_fn)\
        .map(image_mediancomposite_by_date)
      )


  def createConc(img, collectionLabel):

        def getConc(img):
            no2Mean=img.reduceRegion(
                  reducer = ee.Reducer.mean(),
                  geometry = adminSelect.geometry(),
                  scale =  7000
                ).get('tropospheric_NO2_column_number_density')

            region = img.get('region')

            doy=img.date().getRelative('day', 'year')
            dow=img.date().format('E')
            dt=img.date().format("YYYY-MM-dd")


            # Handle potential missing values
            feature_dict = {
                'DOY': doy,
                'DOW': dow,
                'DATE': dt,
                'type': collectionLabel,
            }
            if no2Mean:
                feature_dict['conc'] = no2Mean
                return adminSelect.map(lambda f: f.set(feature_dict)).first()
            else:
                return None

        return getConc(img)

  no2AggChange=no2.filterDate(startDate, endDate) \
                              .map(lambda img:createConc(img, 'lockdown')) \
                              .filter(ee.Filter.notNull(['conc']))


  def fc_to_dict(fc):
    prop_names = fc.first().propertyNames()
    prop_lists = fc.reduceColumns(
        reducer=ee.Reducer.toList().repeat(prop_names.size()),
        selectors=prop_names).get('list')

    return ee.Dictionary.fromLists(prop_names, prop_lists)

  NO2_stat_dict = fc_to_dict(no2AggChange).getInfo()
  NO2_stat_df = pd.DataFrame(NO2_stat_dict)
  NO2_stat_df['region'] = [f['properties']['region'] for f in no2AggChange.getInfo()['features']]

  print(NO2_stat_df)
  return NO2_stat_df

In [5]:
get_Data(["Allentown", "New York", "Boston"], "2020-03-01", "2020-05-01", 0.3)

    ADM0_CODE                 ADM0_NAME  ADM1_CODE      ADM1_NAME  ADM2_CODE  \
0         259  United States of America       3235  Massachusetts      29907   
1         259  United States of America       3235  Massachusetts      29907   
2         259  United States of America       3235  Massachusetts      29907   
3         259  United States of America       3235  Massachusetts      29907   
4         259  United States of America       3235  Massachusetts      29907   
5         259  United States of America       3235  Massachusetts      29907   
6         259  United States of America       3235  Massachusetts      29907   
7         259  United States of America       3235  Massachusetts      29907   
8         259  United States of America       3235  Massachusetts      29907   
9         259  United States of America       3235  Massachusetts      29907   
10        259  United States of America       3235  Massachusetts      29907   
11        259  United States of America 

Unnamed: 0,ADM0_CODE,ADM0_NAME,ADM1_CODE,ADM1_NAME,ADM2_CODE,ADM2_NAME,DATE,DISP_AREA,DOW,DOY,EXP2_YEAR,STATUS,STR2_YEAR,Shape_Area,Shape_Leng,conc,region,system:index,type
0,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-01,NO,Sun,60,3000,Member State,1000,0.017067,2.345468,3.3e-05,Allentown,0,lockdown
1,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-02,NO,Mon,61,3000,Member State,1000,0.017067,2.345468,0.000102,Allentown,1,lockdown
2,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-03,NO,Tue,62,3000,Member State,1000,0.017067,2.345468,0.000131,Allentown,2,lockdown
3,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-04,NO,Wed,63,3000,Member State,1000,0.017067,2.345468,4.5e-05,Allentown,3,lockdown
4,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-05,NO,Thu,64,3000,Member State,1000,0.017067,2.345468,6.4e-05,Allentown,4,lockdown
5,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-07,NO,Sat,66,3000,Member State,1000,0.017067,2.345468,2.7e-05,Allentown,6,lockdown
6,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-08,NO,Sun,67,3000,Member State,1000,0.017067,2.345468,6.1e-05,Allentown,7,lockdown
7,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-09,NO,Mon,68,3000,Member State,1000,0.017067,2.345468,8.7e-05,Allentown,8,lockdown
8,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-10,NO,Tue,69,3000,Member State,1000,0.017067,2.345468,0.000193,Allentown,9,lockdown
9,259,United States of America,3235,Massachusetts,29907,Suffolk,2020-03-11,NO,Wed,70,3000,Member State,1000,0.017067,2.345468,6.2e-05,Allentown,10,lockdown
