In [1]:
import requests
import pandas as pd

In [2]:
import socket

# Get the host name
host_name = socket.gethostname()

# To get the fully qualified domain name (if available)
full_domain_name = socket.getfqdn()

print("Host Name:", host_name)
print("Fully Qualified Domain Name:", full_domain_name)


Host Name: macbook-pro-5.lan
Fully Qualified Domain Name: macbook-pro-5.lan


In [3]:
from dotenv import load_dotenv
from pathlib import Path

def load_secets():
    load_dotenv()
    env_path = Path(".") / ".env"
    load_dotenv(dotenv_path=env_path)

    google_maps_key = os.getenv("AIzaSyCer577h-dY5GJ9_Mqmptlu5DO2XrMuerc")

    return {
        "GOOGLE_MAPS_API_KEY": google_maps_key,
    }

In [4]:
import requests
import io


class Client(object):
    DEFAULT_BASE_URL = "https://airquality.googleapis.com"

    def __init__(self, key):
        self.session = requests.Session()
        self.key = key

    def request_post(self, url, params):
        request_url = self.compose_url(url)
        request_header = self.compose_header()
        request_body = params

        response = self.session.post(
            request_url,
            headers=request_header,
            json=request_body,
        )

        return self.get_body(response)

    def compose_url(self, path):
        return self.DEFAULT_BASE_URL + path + "?" + "key=" + self.key

    @staticmethod
    def get_body(response):
        body = response.json()

        if "error" in body:
            return body["error"]

        return body

    @staticmethod
    def compose_header():
        return {
            "Content-Type": "application/json",
            "Referer": "macbook-pro-5.ds.usfca.edu"
        }


In [5]:
def current_conditions(
    client,
    location,
    include_local_AQI=True,
    include_health_suggestion=False,
    include_all_pollutants=True,
    include_additional_pollutant_info=False,
    include_dominent_pollutant_conc=True,
    language=None,
):
    """
    See documentation for this API here
    https://developers.google.com/maps/documentation/air-quality/reference/rest/v1/currentConditions/lookup
    """
    params = {}

    if isinstance(location, dict):
        params["location"] = location
    else:
        raise ValueError(
            "Location argument must be a dictionary containing latitude and longitude"
        )

    extra_computations = []
    if include_local_AQI:
        extra_computations.append("LOCAL_AQI")

    if include_health_suggestion:
        extra_computations.append("HEALTH_RECOMMENDATIONS")

    if include_additional_pollutant_info:
        extra_computations.append("POLLUTANT_ADDITIONAL_INFO")

    if include_all_pollutants:
        extra_computations.append("POLLUTANT_CONCENTRATION")

    if include_dominent_pollutant_conc:
        extra_computations.append("DOMINANT_POLLUTANT_CONCENTRATION")

    if language:
        params["language"] = language

    params["extraComputations"] = extra_computations

    return client.request_post("/v1/currentConditions:lookup", params)

In [6]:
from IPython.display import display
# set up client

def current_conditions_to_df(data):
    return pd.DataFrame([data])
client = Client(key = "AIzaSyCer577h-dY5GJ9_Mqmptlu5DO2XrMuerc")
# a location in Oakland, CA
location = {"longitude":-122.3,"latitude":37.8}
# a JSON response
current_conditions_data = current_conditions(
  client,
  location,
  include_health_suggestion=True,
  include_additional_pollutant_info=True
)
# Print results
print(current_conditions_data)

{'dateTime': '2023-11-24T20:00:00Z', 'regionCode': 'us', 'indexes': [{'code': 'uaqi', 'displayName': 'Universal AQI', 'aqi': 73, 'aqiDisplay': '73', 'color': {'red': 0.4627451, 'green': 0.7921569, 'blue': 0.2}, 'category': 'Good air quality', 'dominantPollutant': 'pm10'}, {'code': 'usa_epa', 'displayName': 'AQI (US)', 'aqi': 39, 'aqiDisplay': '39', 'color': {'green': 0.89411765}, 'category': 'Good air quality', 'dominantPollutant': 'pm25'}], 'pollutants': [{'code': 'co', 'displayName': 'CO', 'fullName': 'Carbon monoxide', 'concentration': {'value': 402.67, 'units': 'PARTS_PER_BILLION'}, 'additionalInfo': {'sources': 'Typically originates from incomplete combustion of carbon fuels, such as that which occurs in car engines and power plants.', 'effects': 'When inhaled, carbon monoxide can prevent the blood from carrying oxygen. Exposure may cause dizziness, nausea and headaches. Exposure to extreme concentrations can lead to loss of consciousness.'}}, {'code': 'no2', 'displayName': 'NO2',

In [7]:
df = current_conditions_to_df(current_conditions_data)

# Display the DataFrame
display(df)

Unnamed: 0,dateTime,regionCode,indexes,pollutants,healthRecommendations
0,2023-11-24T20:00:00Z,us,"[{'code': 'uaqi', 'displayName': 'Universal AQ...","[{'code': 'co', 'displayName': 'CO', 'fullName...",{'generalPopulation': 'With this level of air ...


# Historical Data 

In [8]:
import requests
import pandas as pd

In [9]:
def request_post(self,url,params):

    request_url = self.compose_url(url)
    request_header = self.compose_header()
    request_body = params

    response = self.session.post(
      request_url,
      headers=request_header,
      json=request_body,
    )

    response_body = self.get_body(response)

    # put the first page in the response dictionary
    page = 1
    final_response = {
        "page_{}".format(page) : response_body
    }
    # fetch all the pages if needed 
    while "nextPageToken" in response_body:
      # call again with the next page's token
      request_body.update({
          "pageToken":response_body["nextPageToken"]
      })
      response = self.session.post(
          request_url,
          headers=request_header,
          json=request_body,
      )
      response_body = self.get_body(response)
      page += 1
      final_response["page_{}".format(page)] = response_body

    return final_response

In [10]:
def historical_conditions(
    client,
    location,
    specific_time=None,
    lag_time=None,
    specific_period=None,
    include_local_AQI=True,
    include_health_suggestion=False,
    include_all_pollutants=True,
    include_additional_pollutant_info=False,
    include_dominant_pollutant_conc=True,
    language=None,
):
    """
    See documentation for this API here https://developers.google.com/maps/documentation/air-quality/reference/rest/v1/history/lookup
    """
    params = {}

    if isinstance(location, dict):
        params["location"] = location
    else:
        raise ValueError(
            "Location argument must be a dictionary containing latitude and longitude"
        )

    if isinstance(specific_period, dict) and not specific_time and not lag_time:
        assert "startTime" in specific_period
        assert "endTime" in specific_period

        params["period"] = specific_period

    elif specific_time and not lag_time and not isinstance(specific_period, dict):
        # note that time must be in the "Zulu" format
        # e.g. datetime.datetime.strftime(datetime.datetime.now(),"%Y-%m-%dT%H:%M:%SZ")
        params["dateTime"] = specific_time

    # lag periods in hours
    elif lag_time and not specific_time and not isinstance(specific_period, dict):
        params["hours"] = lag_time

    else:
        raise ValueError(
            "Must provide specific_time, specific_period or lag_time arguments"
        )

    extra_computations = []
    if include_local_AQI:
        extra_computations.append("LOCAL_AQI")

    if include_health_suggestion:
        extra_computations.append("HEALTH_RECOMMENDATIONS")

    if include_additional_pollutant_info:
        extra_computations.append("POLLUTANT_ADDITIONAL_INFO")

    if include_all_pollutants:
        extra_computations.append("POLLUTANT_CONCENTRATION")

    if include_dominant_pollutant_conc:
        extra_computations.append("DOMINANT_POLLUTANT_CONCENTRATION")

    if language:
        params["language"] = language

    params["extraComputations"] = extra_computations
    # page size default set to 100 here
    params["pageSize"] = 100
    # page token will get filled in if needed by the request_post method
    params["pageToken"] = ""

    return client.request_post("/v1/history:lookup", params)

In [11]:
# set up client
client = Client(key="AIzaSyCer577h-dY5GJ9_Mqmptlu5DO2XrMuerc")
# a location in Oakland, CA
location = {"longitude":-122.3,"latitude":37.8}
# a JSON response
history_conditions_data = historical_conditions(
    client,
    location,
    lag_time=720
)

In [12]:
import pandas as pd

def historical_conditions_to_df(response_dict):
  
    # Directly accessing 'hoursInfo' since it's a list
    chained_pages = response_dict['hoursInfo']

    all_indexes = []
    all_pollutants = []
    for this_element in chained_pages:
        # Need this check in case one of the timestamps is missing data, which can sometimes happen
        if "indexes" in this_element:
            # Fetch the time
            time = this_element["dateTime"]
            # Fetch all the index values and add metadata
            all_indexes += [(time, x["code"], x["displayName"], "index", x["aqi"], None) for x in this_element['indexes']]
            # Fetch all the pollutant values and add metadata
            all_pollutants += [(time, x["code"], x["fullName"], "pollutant", x["concentration"]["value"], x["concentration"]["units"]) for x in this_element['pollutants']]

    all_results = all_indexes + all_pollutants
    # Generate "long format" dataframe
    res = pd.DataFrame(all_results, columns=["time", "code", "name", "type", "value", "unit"])
    res["time"] = pd.to_datetime(res["time"])
    return res


In [13]:
df = historical_conditions_to_df(history_conditions_data)
df

Unnamed: 0,time,code,name,type,value,unit
0,2023-11-24 19:00:00+00:00,uaqi,Universal AQI,index,74.00,
1,2023-11-24 19:00:00+00:00,usa_epa,AQI (US),index,39.00,
2,2023-11-24 18:00:00+00:00,uaqi,Universal AQI,index,79.00,
3,2023-11-24 18:00:00+00:00,usa_epa,AQI (US),index,40.00,
4,2023-11-24 17:00:00+00:00,uaqi,Universal AQI,index,78.00,
...,...,...,...,...,...,...
795,2023-11-20 16:00:00+00:00,no2,Nitrogen dioxide,pollutant,30.09,PARTS_PER_BILLION
796,2023-11-20 16:00:00+00:00,o3,Ozone,pollutant,5.19,PARTS_PER_BILLION
797,2023-11-20 16:00:00+00:00,pm10,Inhalable particulate matter (<10µm),pollutant,15.48,MICROGRAMS_PER_CUBIC_METER
798,2023-11-20 16:00:00+00:00,pm25,Fine particulate matter (<2.5µm),pollutant,6.63,MICROGRAMS_PER_CUBIC_METER


In [14]:
df.to_csv('airqualityoakland.csv')

In [15]:
df['time'] = pd.to_datetime(df['time'])

In [16]:
df.set_index('time', inplace=True)

In [17]:
df = pd.read_csv('airqualityoakland.csv')
df

Unnamed: 0.1,Unnamed: 0,time,code,name,type,value,unit
0,0,2023-11-24 19:00:00+00:00,uaqi,Universal AQI,index,74.00,
1,1,2023-11-24 19:00:00+00:00,usa_epa,AQI (US),index,39.00,
2,2,2023-11-24 18:00:00+00:00,uaqi,Universal AQI,index,79.00,
3,3,2023-11-24 18:00:00+00:00,usa_epa,AQI (US),index,40.00,
4,4,2023-11-24 17:00:00+00:00,uaqi,Universal AQI,index,78.00,
...,...,...,...,...,...,...,...
795,795,2023-11-20 16:00:00+00:00,no2,Nitrogen dioxide,pollutant,30.09,PARTS_PER_BILLION
796,796,2023-11-20 16:00:00+00:00,o3,Ozone,pollutant,5.19,PARTS_PER_BILLION
797,797,2023-11-20 16:00:00+00:00,pm10,Inhalable particulate matter (<10µm),pollutant,15.48,MICROGRAMS_PER_CUBIC_METER
798,798,2023-11-20 16:00:00+00:00,pm25,Fine particulate matter (<2.5µm),pollutant,6.63,MICROGRAMS_PER_CUBIC_METER


In [18]:
df['time'] = pd.to_datetime(df['time'])

In [19]:
one_year_ago = df['time'].max() - pd.DateOffset(years=1)

In [20]:
one_year_ago = df['time'].max() - pd.DateOffset(years=1)
last_year_df = df[df['time'] >= one_year_ago]
last_year_df

Unnamed: 0.1,Unnamed: 0,time,code,name,type,value,unit
0,0,2023-11-24 19:00:00+00:00,uaqi,Universal AQI,index,74.00,
1,1,2023-11-24 19:00:00+00:00,usa_epa,AQI (US),index,39.00,
2,2,2023-11-24 18:00:00+00:00,uaqi,Universal AQI,index,79.00,
3,3,2023-11-24 18:00:00+00:00,usa_epa,AQI (US),index,40.00,
4,4,2023-11-24 17:00:00+00:00,uaqi,Universal AQI,index,78.00,
...,...,...,...,...,...,...,...
795,795,2023-11-20 16:00:00+00:00,no2,Nitrogen dioxide,pollutant,30.09,PARTS_PER_BILLION
796,796,2023-11-20 16:00:00+00:00,o3,Ozone,pollutant,5.19,PARTS_PER_BILLION
797,797,2023-11-20 16:00:00+00:00,pm10,Inhalable particulate matter (<10µm),pollutant,15.48,MICROGRAMS_PER_CUBIC_METER
798,798,2023-11-20 16:00:00+00:00,pm25,Fine particulate matter (<2.5µm),pollutant,6.63,MICROGRAMS_PER_CUBIC_METER


In [21]:
# set up client
client = Client(key="AIzaSyCer577h-dY5GJ9_Mqmptlu5DO2XrMuerc")
# a location in Oakland, CA 94601
location = {"longitude":-122.22,"latitude":37.77}
# a JSON response
history_conditions_data = historical_conditions(
    client,
    location,
    lag_time=720
)

In [22]:
import pandas as pd

def historical_conditions_to_df(response_dict):
  
    # Directly accessing 'hoursInfo' since it's a list
    chained_pages = response_dict['hoursInfo']

    all_indexes = []
    all_pollutants = []
    for this_element in chained_pages:
        # Need this check in case one of the timestamps is missing data, which can sometimes happen
        if "indexes" in this_element:
            # Fetch the time
            time = this_element["dateTime"]
            # Fetch all the index values and add metadata
            all_indexes += [(time, x["code"], x["displayName"], "index", x["aqi"], None) for x in this_element['indexes']]
            # Fetch all the pollutant values and add metadata
            all_pollutants += [(time, x["code"], x["fullName"], "pollutant", x["concentration"]["value"], x["concentration"]["units"]) for x in this_element['pollutants']]

    all_results = all_indexes + all_pollutants
    # Generate "long format" dataframe
    res = pd.DataFrame(all_results, columns=["time", "code", "name", "type", "value", "unit"])
    res["time"] = pd.to_datetime(res["time"])
    return res


In [23]:
df_94601 = historical_conditions_to_df(history_conditions_data)
df_94601

Unnamed: 0,time,code,name,type,value,unit
0,2023-11-24 19:00:00+00:00,uaqi,Universal AQI,index,72.00,
1,2023-11-24 19:00:00+00:00,usa_epa,AQI (US),index,53.00,
2,2023-11-24 18:00:00+00:00,uaqi,Universal AQI,index,70.00,
3,2023-11-24 18:00:00+00:00,usa_epa,AQI (US),index,53.00,
4,2023-11-24 17:00:00+00:00,uaqi,Universal AQI,index,77.00,
...,...,...,...,...,...,...
795,2023-11-20 16:00:00+00:00,no2,Nitrogen dioxide,pollutant,28.43,PARTS_PER_BILLION
796,2023-11-20 16:00:00+00:00,o3,Ozone,pollutant,5.81,PARTS_PER_BILLION
797,2023-11-20 16:00:00+00:00,pm10,Inhalable particulate matter (<10µm),pollutant,26.55,MICROGRAMS_PER_CUBIC_METER
798,2023-11-20 16:00:00+00:00,pm25,Fine particulate matter (<2.5µm),pollutant,8.13,MICROGRAMS_PER_CUBIC_METER


In [24]:
df.to_csv('airqualityoakland_94601.csv')

In [25]:
# set up client
client = Client(key="AIzaSyCer577h-dY5GJ9_Mqmptlu5DO2XrMuerc")
# a location in Oakland, CA 94603
location = {"longitude":-122.17,"latitude":37.74}
# a JSON response
history_conditions_data = historical_conditions(
    client,
    location,
    lag_time=720
)

In [26]:
def historical_conditions_to_df(response_dict):
  
    # Directly accessing 'hoursInfo' since it's a list
    chained_pages = response_dict['hoursInfo']

    all_indexes = []
    all_pollutants = []
    for this_element in chained_pages:
        # Need this check in case one of the timestamps is missing data, which can sometimes happen
        if "indexes" in this_element:
            # Fetch the time
            time = this_element["dateTime"]
            # Fetch all the index values and add metadata
            all_indexes += [(time, x["code"], x["displayName"], "index", x["aqi"], None) for x in this_element['indexes']]
            # Fetch all the pollutant values and add metadata
            all_pollutants += [(time, x["code"], x["fullName"], "pollutant", x["concentration"]["value"], x["concentration"]["units"]) for x in this_element['pollutants']]

    all_results = all_indexes + all_pollutants
    # Generate "long format" dataframe
    res = pd.DataFrame(all_results, columns=["time", "code", "name", "type", "value", "unit"])
    res["time"] = pd.to_datetime(res["time"])
    return res

In [27]:
df_94603 = historical_conditions_to_df(history_conditions_data)
df_94603

Unnamed: 0,time,code,name,type,value,unit
0,2023-11-24 19:00:00+00:00,uaqi,Universal AQI,index,78.00,
1,2023-11-24 19:00:00+00:00,usa_epa,AQI (US),index,54.00,
2,2023-11-24 18:00:00+00:00,uaqi,Universal AQI,index,72.00,
3,2023-11-24 18:00:00+00:00,usa_epa,AQI (US),index,54.00,
4,2023-11-24 17:00:00+00:00,uaqi,Universal AQI,index,70.00,
...,...,...,...,...,...,...
795,2023-11-20 16:00:00+00:00,no2,Nitrogen dioxide,pollutant,23.43,PARTS_PER_BILLION
796,2023-11-20 16:00:00+00:00,o3,Ozone,pollutant,6.02,PARTS_PER_BILLION
797,2023-11-20 16:00:00+00:00,pm10,Inhalable particulate matter (<10µm),pollutant,29.79,MICROGRAMS_PER_CUBIC_METER
798,2023-11-20 16:00:00+00:00,pm25,Fine particulate matter (<2.5µm),pollutant,7.90,MICROGRAMS_PER_CUBIC_METER


In [28]:
df.to_csv('airqualityoakland_94603.csv') 

In [29]:
# set up client
client = Client(key="AIzaSyCer577h-dY5GJ9_Mqmptlu5DO2XrMuerc")
# a location in Oakland, CA 94621
location = {"longitude":-122.20,"latitude":37.75}
# a JSON response
history_conditions_data = historical_conditions(
    client,
    location,
    lag_time=720
)

In [30]:
def historical_conditions_to_df(response_dict):
  
    # Directly accessing 'hoursInfo' since it's a list
    chained_pages = response_dict['hoursInfo']

    all_indexes = []
    all_pollutants = []
    for this_element in chained_pages:
        # Need this check in case one of the timestamps is missing data, which can sometimes happen
        if "indexes" in this_element:
            # Fetch the time
            time = this_element["dateTime"]
            # Fetch all the index values and add metadata
            all_indexes += [(time, x["code"], x["displayName"], "index", x["aqi"], None) for x in this_element['indexes']]
            # Fetch all the pollutant values and add metadata
            all_pollutants += [(time, x["code"], x["fullName"], "pollutant", x["concentration"]["value"], x["concentration"]["units"]) for x in this_element['pollutants']]

    all_results = all_indexes + all_pollutants
    # Generate "long format" dataframe
    res = pd.DataFrame(all_results, columns=["time", "code", "name", "type", "value", "unit"])
    res["time"] = pd.to_datetime(res["time"])
    return res

In [31]:
df_94621 = historical_conditions_to_df(history_conditions_data)
df_94621

Unnamed: 0,time,code,name,type,value,unit
0,2023-11-24 19:00:00+00:00,uaqi,Universal AQI,index,74.00,
1,2023-11-24 19:00:00+00:00,usa_epa,AQI (US),index,57.00,
2,2023-11-24 18:00:00+00:00,uaqi,Universal AQI,index,71.00,
3,2023-11-24 18:00:00+00:00,usa_epa,AQI (US),index,58.00,
4,2023-11-24 17:00:00+00:00,uaqi,Universal AQI,index,78.00,
...,...,...,...,...,...,...
795,2023-11-20 16:00:00+00:00,no2,Nitrogen dioxide,pollutant,26.03,PARTS_PER_BILLION
796,2023-11-20 16:00:00+00:00,o3,Ozone,pollutant,6.01,PARTS_PER_BILLION
797,2023-11-20 16:00:00+00:00,pm10,Inhalable particulate matter (<10µm),pollutant,22.22,MICROGRAMS_PER_CUBIC_METER
798,2023-11-20 16:00:00+00:00,pm25,Fine particulate matter (<2.5µm),pollutant,10.63,MICROGRAMS_PER_CUBIC_METER


In [32]:
df.to_csv('airqualityoakland_94621.csv') 

In [33]:
def request_post(self,url,params):

    request_url = self.compose_url(url)
    request_header = self.compose_header()
    request_body = params

    response = self.session.post(
      request_url,
      headers=request_header,
      json=request_body,
    )

    response_body = self.get_body(response)

    # put the first page in the response dictionary
    page = 1
    final_response = {
        "page_{}".format(page) : response_body
    }
    # fetch all the pages if needed 
    while "nextPageToken" in response_body:
      # call again with the next page's token
      request_body.update({
          "pageToken":response_body["nextPageToken"]
      })
      response = self.session.post(
          request_url,
          headers=request_header,
          json=request_body,
      )
      response_body = self.get_body(response)
      page += 1
      final_response["page_{}".format(page)] = response_body

    return final_response

In [34]:
def hours_info(
    client,
    location,
    specific_time=None,
    lag_time=None,
    specific_period=None,
    include_local_AQI=True,
    include_health_suggestion=False,
    include_all_pollutants=True,
    include_additional_pollutant_info=False,
    include_dominant_pollutant_conc=True,
    language=None,
):
    """
    See documentation for this API here https://developers.google.com/maps/documentation/air-quality/reference/rest/v1/history/lookup
    """
    params = {}

    if isinstance(location, dict):
        params["location"] = location
    else:
        raise ValueError(
            "Location argument must be a dictionary containing latitude and longitude"
        )

    if isinstance(specific_period, dict) and not specific_time and not lag_time:
        assert "startTime" in specific_period
        assert "endTime" in specific_period

        params["period"] = specific_period

    elif specific_time and not lag_time and not isinstance(specific_period, dict):
        # note that time must be in the "Zulu" format
        # e.g. datetime.datetime.strftime(datetime.datetime.now(),"%Y-%m-%dT%H:%M:%SZ")
        params["dateTime"] = specific_time

    # lag periods in hours
    elif lag_time and not specific_time and not isinstance(specific_period, dict):
        params["hours"] = lag_time

    else:
        raise ValueError(
            "Must provide specific_time, specific_period or lag_time arguments"
        )

    extra_computations = []
    if include_local_AQI:
        extra_computations.append("LOCAL_AQI")

    if include_health_suggestion:
        extra_computations.append("HEALTH_RECOMMENDATIONS")

    if include_additional_pollutant_info:
        extra_computations.append("POLLUTANT_ADDITIONAL_INFO")

    if include_all_pollutants:
        extra_computations.append("POLLUTANT_CONCENTRATION")

    if include_dominant_pollutant_conc:
        extra_computations.append("DOMINANT_POLLUTANT_CONCENTRATION")

    if language:
        params["language"] = language

    params["extraComputations"] = extra_computations
    # page size default set to 100 here
    params["pageSize"] = 100
    # page token will get filled in if needed by the request_post method
    params["pageToken"] = ""

    return client.request_post("/v1/history:lookup", params)

In [35]:
# set up client
client = Client(key="AIzaSyCer577h-dY5GJ9_Mqmptlu5DO2XrMuerc")
# a location in Oakland, CA
location = {"longitude":-122.3,"latitude":37.8}
# a JSON response
hours_info_data = historical_conditions(
    client,
    location,
    lag_time=720
)

In [36]:
import pandas as pd

def hours_info_to_df(response_dict):
  
    # Directly accessing 'hoursInfo' since it's a list
    chained_pages = response_dict['hoursInfo']

    all_indexes = []
    all_pollutants = []
    for this_element in chained_pages:
        # Need this check in case one of the timestamps is missing data, which can sometimes happen
        if "indexes" in this_element:
            # Fetch the time
            time = this_element["dateTime"]
            # Fetch all the index values and add metadata
            all_indexes += [(time, x["code"], x["displayName"], "index", x["aqi"], None) for x in this_element['indexes']]
            # Fetch all the pollutant values and add metadata
            all_pollutants += [(time, x["code"], x["fullName"], "pollutant", x["concentration"]["value"], x["concentration"]["units"]) for x in this_element['pollutants']]

    all_results = all_indexes + all_pollutants
    # Generate "long format" dataframe
    res = pd.DataFrame(all_results, columns=["time", "code", "name", "type", "value", "unit"])
    res["time"] = pd.to_datetime(res["time"])
    return res

In [37]:
df = hours_info_to_df(hours_info_data)
df

Unnamed: 0,time,code,name,type,value,unit
0,2023-11-24 19:00:00+00:00,uaqi,Universal AQI,index,74.00,
1,2023-11-24 19:00:00+00:00,usa_epa,AQI (US),index,39.00,
2,2023-11-24 18:00:00+00:00,uaqi,Universal AQI,index,79.00,
3,2023-11-24 18:00:00+00:00,usa_epa,AQI (US),index,40.00,
4,2023-11-24 17:00:00+00:00,uaqi,Universal AQI,index,78.00,
...,...,...,...,...,...,...
795,2023-11-20 16:00:00+00:00,no2,Nitrogen dioxide,pollutant,30.09,PARTS_PER_BILLION
796,2023-11-20 16:00:00+00:00,o3,Ozone,pollutant,5.19,PARTS_PER_BILLION
797,2023-11-20 16:00:00+00:00,pm10,Inhalable particulate matter (<10µm),pollutant,15.48,MICROGRAMS_PER_CUBIC_METER
798,2023-11-20 16:00:00+00:00,pm25,Fine particulate matter (<2.5µm),pollutant,6.63,MICROGRAMS_PER_CUBIC_METER


In [38]:
df.to_csv('airqualityoakland_hoursinfo.csv')

!jupyter nbextension enable --py gmaps
!pip install google

!pip show gmaps

install google.maps

!pip install googlemaps

!pip install --upgrade googlemaps

!pip install google-api-python-client

## San Francisco Code Air Quality Info

In [39]:
def request_post(self,url,params):

    request_url = self.compose_url(url)
    request_header = self.compose_header()
    request_body = params

    response = self.session.post(
      request_url,
      headers=request_header,
      json=request_body,
    )

    response_body = self.get_body(response)

    # put the first page in the response dictionary
    page = 1
    final_response = {
        "page_{}".format(page) : response_body
    }
    # fetch all the pages if needed 
    while "nextPageToken" in response_body:
      # call again with the next page's token
      request_body.update({
          "pageToken":response_body["nextPageToken"]
      })
      response = self.session.post(
          request_url,
          headers=request_header,
          json=request_body,
      )
      response_body = self.get_body(response)
      page += 1
      final_response["page_{}".format(page)] = response_body

    return final_response

In [40]:
def hours_info(
    client,
    location,
    specific_time=None,
    lag_time=None,
    specific_period=None,
    include_local_AQI=True,
    include_health_suggestion=False,
    include_all_pollutants=True,
    include_additional_pollutant_info=False,
    include_dominant_pollutant_conc=True,
    language=None,
):
    """
    See documentation for this API here https://developers.google.com/maps/documentation/air-quality/reference/rest/v1/history/lookup
    """
    params = {}

    if isinstance(location, dict):
        params["location"] = location
    else:
        raise ValueError(
            "Location argument must be a dictionary containing latitude and longitude"
        )

    if isinstance(specific_period, dict) and not specific_time and not lag_time:
        assert "startTime" in specific_period
        assert "endTime" in specific_period

        params["period"] = specific_period

    elif specific_time and not lag_time and not isinstance(specific_period, dict):
        # note that time must be in the "Zulu" format
        # e.g. datetime.datetime.strftime(datetime.datetime.now(),"%Y-%m-%dT%H:%M:%SZ")
        params["dateTime"] = specific_time

    # lag periods in hours
    elif lag_time and not specific_time and not isinstance(specific_period, dict):
        params["hours"] = lag_time

    else:
        raise ValueError(
            "Must provide specific_time, specific_period or lag_time arguments"
        )

    extra_computations = []
    if include_local_AQI:
        extra_computations.append("LOCAL_AQI")

    if include_health_suggestion:
        extra_computations.append("HEALTH_RECOMMENDATIONS")

    if include_additional_pollutant_info:
        extra_computations.append("POLLUTANT_ADDITIONAL_INFO")

    if include_all_pollutants:
        extra_computations.append("POLLUTANT_CONCENTRATION")

    if include_dominant_pollutant_conc:
        extra_computations.append("DOMINANT_POLLUTANT_CONCENTRATION")

    if language:
        params["language"] = language

    params["extraComputations"] = extra_computations
    # page size default set to 100 here
    params["pageSize"] = 100
    # page token will get filled in if needed by the request_post method
    params["pageToken"] = ""

    return client.request_post("/v1/history:lookup", params)

In [41]:
# set up client
client = Client(key="AIzaSyCer577h-dY5GJ9_Mqmptlu5DO2XrMuerc")
# a location in San Francisco, CA
location = {"longitude":-122.4194,"latitude":37.7749}
# a JSON response
hours_info_data = historical_conditions(
    client,
    location,
    lag_time=720
)

In [None]:
import pandas as pd

def hours_info_to_df(response_dict):
  
    # Directly accessing 'hoursInfo' since it's a list
    chained_pages = response_dict['hoursInfo']

    all_indexes = []
    all_pollutants = []
    for this_element in chained_pages:
        # Need this check in case one of the timestamps is missing data, which can sometimes happen
        if "indexes" in this_element:
            # Fetch the time
            time = this_element["dateTime"]
            # Fetch all the index values and add metadata
            all_indexes += [(time, x["code"], x["displayName"], "index", x["aqi"], None) for x in this_element['indexes']]
            # Fetch all the pollutant values and add metadata
            all_pollutants += [(time, x["code"], x["fullName"], "pollutant", x["concentration"]["value"], x["concentration"]["units"]) for x in this_element['pollutants']]

    all_results = all_indexes + all_pollutants
    # Generate "long format" dataframe
    res = pd.DataFrame(all_results, columns=["time", "code", "name", "type", "value", "unit"])
    res["time"] = pd.to_datetime(res["time"])
    return res

In [42]:
df = hours_info_to_df(hours_info_data)
df

Unnamed: 0,time,code,name,type,value,unit
0,2023-11-24 19:00:00+00:00,uaqi,Universal AQI,index,63.00,
1,2023-11-24 19:00:00+00:00,usa_epa,AQI (US),index,32.00,
2,2023-11-24 18:00:00+00:00,uaqi,Universal AQI,index,78.00,
3,2023-11-24 18:00:00+00:00,usa_epa,AQI (US),index,33.00,
4,2023-11-24 17:00:00+00:00,uaqi,Universal AQI,index,70.00,
...,...,...,...,...,...,...
795,2023-11-20 16:00:00+00:00,no2,Nitrogen dioxide,pollutant,18.27,PARTS_PER_BILLION
796,2023-11-20 16:00:00+00:00,o3,Ozone,pollutant,23.90,PARTS_PER_BILLION
797,2023-11-20 16:00:00+00:00,pm10,Inhalable particulate matter (<10µm),pollutant,31.73,MICROGRAMS_PER_CUBIC_METER
798,2023-11-20 16:00:00+00:00,pm25,Fine particulate matter (<2.5µm),pollutant,5.11,MICROGRAMS_PER_CUBIC_METER


In [43]:
df.to_csv('airqualitySanFrancisco_hoursinfo.csv')