# Comparison between ClimateTRACE and C40 inventories

This analysis compares city aggregated estimates from [climateTRACE](https://climatetrace.org/) to estimates [GPC](https://ghgprotocol.org/ghg-protocol-cities) compliant C40 city inventories downloaded from [here](https://www.c40knowledgehub.org/s/article/C40-cities-greenhouse-gas-emissions-interactive-dashboard?language=en_US).

I am not sure if the C40 inventories are high quality. Comparing to downscaled observations would not be a fair comparison. 

In [2]:
import json
import os
import fnmatch
import pandas as pd
import tarfile
import os
import requests
from sqlalchemy import create_engine, MetaData, text
from sqlalchemy.orm import sessionmaker
from tqdm import tqdm

In [3]:
from utils import (
    get_c40_data, 
    filter_out_notation_keys,
    climatetrace_file_names,
    load_climatetrace_file,
    point_to_lat_lon,
    lat_lon_to_locode
)

## Read raw C40 data

**Units**: metric tonnes CO2-eq. (I am assuming these are units since they should be following the GPC)

In [4]:
df_c40_raw = get_c40_data()

### filter C40

In [5]:
refnos = ['II.4.3']
columns = ['city', 'locode', 'year'] + refnos

df_tmp = filter_out_notation_keys(df_c40_raw, refnos)
df_c40 = (
    df_tmp
    .loc[:, columns]
    .rename(columns = {'II.4.3': 'emissions_c40'})
)

### Read ClimateTRACE

**Units**: Units are tonnes 

In [6]:
asset_file = './transportation/asset_international-aviation_emissions.csv'
df_ct_raw = load_climatetrace_file(asset_file)
filt = (df_ct_raw['gas'] == 'co2e_100yr')
df_tmp = df_ct_raw.loc[filt]

In [7]:
points = set(df_tmp['st_astext'])
df_points = pd.DataFrame([point_to_lat_lon(point) for point in points])

df_merged = df_tmp.merge(df_points, on='st_astext')
df_unique = df_merged[['lon','lat']].drop_duplicates()
print(f"number unique assets: {len(df_unique)}")

db_uri = "postgresql://ccglobal:@localhost/ccglobal"
engine = create_engine(db_uri)
metadata_obj = MetaData()
Session = sessionmaker(bind=engine)
session = Session()

output_list = []
for _, row in tqdm(df_unique.iterrows()):
    lat = row['lat']
    lon = row['lon']
    locode = lat_lon_to_locode(session, lat, lon)
    output_list.append({'lon':lon, 'lat': lat, 'locode': locode})

session.close()

df_locodes = pd.DataFrame(output_list)
df_merged_locodes = df_merged.merge(df_locodes, on =['lat','lon'])
filt = df_merged_locodes['locode'].notnull()
df_data = df_merged_locodes[filt]

number unique assets: 4815


4815it [00:32, 149.46it/s]


In [8]:
df_ct = (
    df_data
    .assign(year = lambda row: pd.to_datetime(row['start_time']).dt.year)
    .loc[:, ['locode', 'year', 'emissions_quantity', 'emissions_factor_units']]
    .rename(columns = {'emissions_quantity': 'emissions_ct'})
)

In [9]:
# check the units
set(df_ct['emissions_factor_units'])

{'tonnes_gas_per_tonnes_fuel'}

### Comparison

In [10]:
df_int = pd.merge(df_ct, df_c40, on = ['year', 'locode'], how='inner')
df_int['diff'] = df_int['emissions_ct'] - df_int['emissions_c40']
df_int['percent_error'] = (df_int['diff'] / df_int['emissions_c40']) * 100

In [11]:
df_int.loc[df_int['diff'].notnull()]

Unnamed: 0,locode,year,emissions_ct,emissions_factor_units,city,emissions_c40,diff,percent_error
2,US NYC,2020,4649397.0,tonnes_gas_per_tonnes_fuel,New York City,1597.586478,4647799.762522,290926.333349
3,US NYC,2020,26885.2,tonnes_gas_per_tonnes_fuel,New York City,1597.586478,25287.617392,1582.86376
4,US CHI,2015,5350988.0,tonnes_gas_per_tonnes_fuel,Chicago,1583779.174578,3767208.392422,237.861973
5,US CHI,2015,81143.45,tonnes_gas_per_tonnes_fuel,Chicago,1583779.174578,-1502635.726628,-94.876593
6,US CHI,2019,5570747.0,tonnes_gas_per_tonnes_fuel,Chicago,1616240.43479,3954506.82821,244.673178
7,US CHI,2019,91065.14,tonnes_gas_per_tonnes_fuel,Chicago,1616240.43479,-1525175.29162,-94.365619
10,BR RIO,2015,1542908.0,tonnes_gas_per_tonnes_fuel,Rio de Janeiro,2933672.945316,-1390764.945316,-47.406953
13,BR RIO,2016,1492186.0,tonnes_gas_per_tonnes_fuel,Rio de Janeiro,2839557.660322,-1347372.145322,-47.450072
16,BR RIO,2017,1323313.0,tonnes_gas_per_tonnes_fuel,Rio de Janeiro,2670365.360836,-1347052.458836,-50.4445
17,BR SSA,2015,110337.8,tonnes_gas_per_tonnes_fuel,Salvador,256624.0,-146286.205,-57.004101
