In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Dataset description and relation to competition
This dataset is available here: http://citycarbonfootprints.info/ and derives from a recent publication on carbon footprints of world cities: https://iopscience.iop.org/article/10.1088/1748-9326/aac72a "Carbon footprints of 13 000 cities", published in 2018. The data appear to come from earlier than 2018, although it seems much effort went in to producing the output, which includes carbon footprints not only of 13,000 cities around the world, but also carbon footprints on a 250m spanning the globe.

The study appears to be one of the most extensive efforts yet to characterize carbon footprints with as much spatial coverage as possible. This opens up numerous possibilities for analysis and incorporation into KPIs for the CDP: Unlocking Climate Solutions competition. The global extent should enable actual carbon footprint estimates to be spatially joined to the competition data, which include:
- City-level information about commitments to improving carbon budgets, as well as
- Fine-grained spatial analysis within cities at the zip code and census tract level

The data presented here could be used for both large, multi-city analysis, as well as within-city analysis due to the 250m spatial resolution.

Here I load the data and visualize for Los Angeles County in California, USA.

### Load Carbon Footprint Data

In [None]:
import rasterio
import geopandas as gpd
co2_df = rasterio.open('../input/global-gridded-model-of-carbon-footprints-ggmcf/GGMCF_v1.0.tif')
co2_df.crs.wkt

In [None]:
## Cities polygons
cities_poly_df = pd.read_pickle('../input/cdp-cities-with-polygons/CDP/CDP_cities_with_polygons.pkl')
cities_poly_df.crs

In [None]:
# create an output directory
!mkdir /kaggle/working/global-gridded-model-of-carbon-footprints-ggmcf

In [None]:
%%time

# The coordinate reference system of these data  need to match other systems, to be spatially joined.
from rasterio.warp import calculate_default_transform, reproject, Resampling

# reprojection of Carbon Footprint data
dst_crs = 'EPSG:4326'

with rasterio.open('../input/global-gridded-model-of-carbon-footprints-ggmcf/GGMCF_v1.0.tif') as src:
    transform, width, height = calculate_default_transform(
        src.crs, dst_crs, src.width, src.height, *src.bounds)
    kwargs = src.meta.copy()
    kwargs.update({
        'crs': dst_crs,
        'transform': transform,
        'width': width,
        'height': height
    })

    with rasterio.open('/kaggle/working/global-gridded-model-of-carbon-footprints-ggmcf/GGMCF_v1.0.EPSG4326.tif', 'w', **kwargs) as dst:
        for i in range(1, src.count + 1):
            reproject(
                source=rasterio.band(src, i),
                destination=rasterio.band(dst, i),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=dst_crs,
                resampling=Resampling.nearest)

## Now we can combine exemplary city mask with carbon footprint

In [None]:
from rasterio.mask import mask

city_name = 'Warsaw'

with rasterio.open('/kaggle/working/global-gridded-model-of-carbon-footprints-ggmcf/GGMCF_v1.0.EPSG4326.tif') as src:
    carbon_data, carbon_transform = mask(src, cities_poly_df.loc[cities_poly_df['name_conve']==city_name]['geometry'], crop=True)
    carbon_meta = src.meta

# let's visualize this:
import matplotlib.pyplot as plt
from rasterio.plot import show

fig, axs = plt.subplots(1,2)
cities_poly_df.loc[cities_poly_df['name_conve']==city_name, 'geometry'].plot(ax=axs[0])
show(carbon_data, ax=axs[1])

## Couple of more examples

In [None]:
from rasterio.mask import mask

city_name = 'Warsaw'

accounts = cities_poly_df.loc[cities_poly_df['geometry'].notnull(), 'Account Number'].values[:5]

cities_poly_df['co2_f_mean'] = None
cities_poly_df['co2_f_sum'] = None

with rasterio.open('/kaggle/working/global-gridded-model-of-carbon-footprints-ggmcf/GGMCF_v1.0.EPSG4326.tif') as src:
    for acc in accounts:
        if(cities_poly_df['geometry'].crs == src.crs):
            carbon_data, carbon_transform = mask(src, cities_poly_df.loc[cities_poly_df['Account Number']==acc]['geometry'], crop=True)
            # carbon_meta = src.meta
            
            fig, axs = plt.subplots(1,2)
            cities_poly_df.loc[cities_poly_df['Account Number']==acc, 'geometry'].plot(ax=axs[0])
            show(carbon_data, ax=axs[1])
                               
            #print(f'Account {acc}, carbon_data: {carbon_data.mean()}')
        else:
            raise "Error. Different CRS"

In [None]:
from rasterio.mask import mask

city_name = 'Warsaw'

accounts = cities_poly_df.loc[cities_poly_df['geometry'].notnull(), 'Account Number'].values[:5]

cities_poly_df['co2_f_mean'] = None
cities_poly_df['co2_f_sum'] = None

with rasterio.open('/kaggle/working/global-gridded-model-of-carbon-footprints-ggmcf/GGMCF_v1.0.EPSG4326.tif') as src:
    for acc in accounts:
        if(cities_poly_df['geometry'].crs == src.crs):
            carbon_data, carbon_transform = mask(src, cities_poly_df.loc[cities_poly_df['Account Number']==acc]['geometry'], crop=True)
            # carbon_meta = src.meta
            
            # Calculate mean co2 footprint
            cities_poly_df.loc[cities_poly_df['Account Number']==acc, 'co2_f_mean'] = carbon_data.mean()
            
            # Calculate sum of co2 footprint
            cities_poly_df.loc[cities_poly_df['Account Number']==acc, 'co2_f_sum'] = carbon_data.sum()
                               
            #print(f'Account {acc}, carbon_data: {carbon_data.mean()}')
        else:
            raise "Error. Different CRS"

In [None]:
cities_poly_df

In [None]:
cities_poly_df.to_pickle('/kaggle/working/CDP_cities_with_co2_footprint.pkl')