# Google - Environmental Insights Explorer

In [1]:
import requests
import pandas as pd

In [2]:
API = "https://alkalienvironmentalinsights-pa.googleapis.com/v1/releases/public/features/ChIJWdeZQOjKwoARqo8qxPo6AKE?key=AIzaSyBofhq7e63zkJXbp-r6SZ8V9MLjQuP01a8&language_code=en-US&alt=protojson"

headers = {
    "Authority": "alkalienvironmentalinsights-pa.googleapis.com",
    "Method": "GET",
    "Path": "/v1/releases/preview/features?locale=en-US&key=AIzaSyBofhq7e63zkJXbp-r6SZ8V9MLjQuP01a8&alt=protojson",
    "Scheme": "https",
    "Accept": "application/json, text/plain, */*",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "en-US,en;q=0.9",
    "Origin": "https://insights.sustainability.google",
    "Referer": "https://insights.sustainability.google/places/ChIJWdeZQOjKwoARqo8qxPo6AKE/download?hl=en-US",
    "Sec-Ch-Ua": '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"',
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "cross-site",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
    "X-Client-Data": "CJK2yQEIo7bJAQipncoBCIuEywEIlKHLAQiFoM0B"
}



### Tests

In [3]:
r = requests.get(API, headers=headers)
print(r.status_code)

200


In [4]:
content = r.json()
city = content[2]
city

'Long Beach'

In [5]:
buildings_raw = content[8][10]
transportation_raw = content[11]

### Buildings Emissions

In [6]:
def get_buildings_data(content):
    """
    Gets buildings data from response 
    and parses it into a dataframe
    """

    # NOTE: A lot of things are hard coded because the data is formatted
    # in a very specific way. If the data format changes, this function
    # will need to be updated.
    
    buildings_raw = content[8][10]
    residential_data = buildings_raw[0]
    non_residential_data = buildings_raw[1]

    buildings_dict_list = []

    for list_content in [residential_data, non_residential_data]:
        buildings_dict = {}
        buildings_dict['Type'] = list_content[0]
        buildings_dict['co2e tons'] = list_content[1]
        buildings_dict['number of buildings'] = list_content[2]
        buildings_dict['energy intensity'] = list_content[3][0][2]
        buildings_dict['floor space'] = list_content[3][0][-1]
        additonal_data = list_content[3][0][7]
        buildings_dict['electricity intensity'] = additonal_data[0][1]
        buildings_dict['electricity fraction'] = additonal_data[0][2]
        buildings_dict['natural gas intensity'] = additonal_data[1][1]
        buildings_dict['natural gas fraction'] = additonal_data[1][2]
        buildings_dict['diesel oil intensity'] = additonal_data[2][1]
        buildings_dict['diesel oil fraction'] = additonal_data[2][2]
        buildings_dict['propane intensity'] = additonal_data[3][1]
        buildings_dict['propane fraction'] = 0

        buildings_dict_list.append(buildings_dict)


    buildings_df = pd.DataFrame.from_dict(buildings_dict_list)

    return buildings_df

In [111]:
def get_transportation_df():
    """
    Gets transportation data from response
    """
    trip_types_dict = {0: "INBOUND", 1: "OUTBOUND", 2: "IN-BOUNDARY"}
    mode_dict = {1: "AUTOMOBILE", 5: "TRAM", 7: "BUS", 9: "CYCLING", 10: "ON FOOT"}
    transportation_raw = content[11]
    dfs = []
    for year_data in transportation_raw:
        year = year_data[9]
        trip_type_data = year_data[19]
        counter = 0
        for trip_type_sub in trip_type_data:
            transport_dicts = []
            data_list = trip_type_sub[3]
            for row in data_list:
                transport_dict = {}
                transport_dict["mode"] = row[0] 
                transport_dict["travel bound"] = counter
                transport_dict["trips"] = row[1]
                transport_dict["full distance km"] = row[4]
                transport_dict["factor 1"] = row[2]
                transport_dict["factor 2"] = row[3]
                if transport_dict["factor 1"]:
                    transport_dict["full co2e tons"] = (row[4] * row[3]) / row[2]
                else:
                    transport_dict["factor 1"] = 0
                    transport_dict["factor 2"] = 0
                    transport_dict["full co2e tons"] = 0
                transport_dicts.append(transport_dict)
            trip_type_df = pd.DataFrame.from_dict(transport_dicts)
            trip_type_df.loc[:, "year"] = year
            counter += 1

            dfs.append(trip_type_df)
    final_df = pd.concat(dfs, ignore_index=True)
    
    # Replace the travel bound column with the actual travel bound
    final_df.loc[:, "travel bound"] = final_df.loc[:, "travel bound"].map(trip_types_dict)
    final_df.loc[:, "mode"] = final_df.loc[:, "mode"].map(mode_dict)

    return final_df

In [112]:
def get_gpc_metrics(df):

    # If travel bound is in-boundary, then the gpc distance is the same as the full distance
    # otherwise, it is half of the full distance
    df.loc[:, "gpc distance km"] = df.loc[:, "full distance km"]
    df.loc[df.loc[:, "travel bound"] != "IN-BOUNDARY", "gpc distance km"] = df.loc[df.loc[:, "travel bound"] != "IN-BOUNDARY", "full distance km"] / 2

    df.loc[:, "gpc co2e tons"] = df.loc[:, "full co2e tons"]
    df.loc[df.loc[:, "travel bound"] != "IN-BOUNDARY", "gpc co2e tons"] = df.loc[df.loc[:, "travel bound"] != "IN-BOUNDARY", "full co2e tons"] / 2

    return df

### Pipeline

In [113]:
buildings_df = get_buildings_data(content)
buildings_df

Unnamed: 0,Type,co2e tons,number of buildings,energy intensity,floor space,electricity intensity,electricity fraction,natural gas intensity,natural gas fraction,diesel oil intensity,diesel oil fraction,propane intensity,propane fraction
0,Residential,900000,130617,147.50464,25662000,0.00024,0.486217,0.000203,0.256892,0.000268,0.256892,0.00021,0
1,Non-residential,505000,7710,275.39346,7660000,0.00024,0.857036,0.000203,0.071482,0.000268,0.071482,0.00021,0


In [114]:
transportation_df = get_transportation_df()
transportation_df = get_gpc_metrics(transportation_df)

In [115]:
transportation_df.loc[transportation_df.loc[:, "mode"] == "AUTOMOBILE", :] 

Unnamed: 0,mode,travel bound,trips,full distance km,factor 1,factor 2,full co2e tons,year,gpc distance km,gpc co2e tons
0,AUTOMOBILE,INBOUND,135033076,3288331340,8.471735,0.002332,905249.653208,2018,1644166000.0,452624.826604
5,AUTOMOBILE,OUTBOUND,134658200,3267620482,8.471735,0.002332,899548.13013,2018,1633810000.0,449774.065065
10,AUTOMOBILE,IN-BOUNDARY,187362321,1186110325,8.471735,0.002332,326526.085529,2018,1186110000.0,326526.085529
14,AUTOMOBILE,INBOUND,139663291,3237191225,8.490479,0.002337,891171.266386,2019,1618596000.0,445585.633193
17,AUTOMOBILE,OUTBOUND,139462789,3231666160,8.490479,0.002337,889650.262889,2019,1615833000.0,444825.131445
20,AUTOMOBILE,IN-BOUNDARY,197470496,1046386775,8.490479,0.002337,288061.39724,2019,1046387000.0,288061.39724
24,AUTOMOBILE,INBOUND,113959480,2291137895,8.490479,0.002337,630730.814906,2020,1145569000.0,315365.407453
26,AUTOMOBILE,OUTBOUND,113788807,2282192464,8.490479,0.002337,628268.213683,2020,1141096000.0,314134.106841
28,AUTOMOBILE,IN-BOUNDARY,162164053,679230027,8.490479,0.002337,186986.261007,2020,679230000.0,186986.261007
32,AUTOMOBILE,INBOUND,133852888,2871757330,8.490479,0.002337,790570.416961,2021,1435879000.0,395285.20848


In [117]:
summed_data = transportation_df.groupby(by=["year", "mode","factor 1", "factor 2"]).sum().reset_index()
summed_data.loc[:, 'travel bound'] = 'TOTAL'
summed_data.loc[:, 'full co2e tons'] = (summed_data.loc[:, 'full distance km'] * summed_data.loc[:, 'factor 2'] / summed_data.loc[:, 'factor 1'])
summed_data.loc[:, 'gpc co2e tons'] = (summed_data.loc[:, 'gpc distance km'] * summed_data.loc[:, 'factor 2'] / summed_data.loc[:, 'factor 1'])
summed_data

Unnamed: 0,year,mode,factor 1,factor 2,travel bound,trips,full distance km,full co2e tons,gpc distance km,gpc co2e tons
0,2018,AUTOMOBILE,8.471735,0.002332,TOTAL,457053597,7742062147,2131324.0,4464086000.0,1228925.0
1,2018,BUS,4.069062,0.002532,TOTAL,2512103,41792402,26010.54,20896200.0,13005.27
2,2018,BUS,4.069063,0.002532,TOTAL,4457588,20408473,12701.72,20408470.0,12701.72
3,2018,CYCLING,0.0,0.0,TOTAL,9337615,33325487,,27210970.0,
4,2018,ON FOOT,0.0,0.0,TOTAL,83537581,65738436,,62204690.0,
5,2018,TRAM,0.0,0.0,TOTAL,2142773,52932679,,26466340.0,
6,2019,AUTOMOBILE,8.490479,0.002337,TOTAL,476596576,7515244160,2068883.0,4280815000.0,1178472.0
7,2019,BUS,3.999031,0.002489,TOTAL,9371405,113043933,70355.68,69818270.0,43453.12
8,2019,CYCLING,0.0,0.0,TOTAL,8875649,21539554,,21539550.0,
9,2019,ON FOOT,0.0,0.0,TOTAL,82133995,62464473,,59258470.0,


In [118]:
pd.concat([transportation_df, summed_data], ignore_index=True)


Unnamed: 0,mode,travel bound,trips,full distance km,factor 1,factor 2,full co2e tons,year,gpc distance km,gpc co2e tons
0,AUTOMOBILE,INBOUND,135033076,3288331340,8.471735,0.002332,9.052497e+05,2018,1.644166e+09,4.526248e+05
1,TRAM,INBOUND,998990,24327363,0.000000,0.000000,0.000000e+00,2018,1.216368e+07,0.000000e+00
2,BUS,INBOUND,1200055,19235551,4.069062,0.002532,1.197172e+04,2018,9.617776e+06,5.985861e+03
3,CYCLING,INBOUND,704298,6052966,0.000000,0.000000,0.000000e+00,2018,3.026483e+06,0.000000e+00
4,ON FOOT,INBOUND,2128363,3502711,0.000000,0.000000,0.000000e+00,2018,1.751356e+06,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...
65,ON FOOT,TOTAL,48477570,42505363,0.000000,0.000000,,2021,4.250536e+07,
66,AUTOMOBILE,TOTAL,464577284,6897849155,8.490479,0.002337,1.898919e+06,2022,3.876720e+09,1.067228e+06
67,BUS,TOTAL,8077871,83004831,3.999031,0.002489,5.166010e+04,2022,5.193797e+07,3.232488e+04
68,CYCLING,TOTAL,8022887,24383095,0.000000,0.000000,,2022,2.438310e+07,


### Class tests

In [119]:
from GoogleScraper import GoogleScraper

In [None]:
google_scraper = GoogleScraper()
