In [2]:
# Install the necessary packages and read the proper items
! pip install kaggle
! pip install pyspark
! apt-get install p7zip-full

Collecting pyspark
  Downloading pyspark-3.4.1.tar.gz (310.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285388 sha256=cee7a2045525bf4c28a6a78e4a2ad86a4794dc1d7dd7abb8bb4c1fa456362865
  Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.4.1
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
p7zip-full is already the newest version (16.02+dfsg-8).
0 upgraded, 0 newly installed, 0 to remove and 16 not upgraded.


In [3]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [4]:
# Download the dataset
!kaggle datasets download michaelbryantds/greenhouse-gas-emissions-dataset
#Unzip greenhouse-gas-emissions-dataset.zip
!7za x greenhouse-gas-emissions-dataset.zip


Downloading greenhouse-gas-emissions-dataset.zip to /content
100% 1.88G/1.88G [01:17<00:00, 30.4MB/s]
100% 1.88G/1.88G [01:17<00:00, 26.2MB/s]

7-Zip (a) [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.20GHz (406F0),ASM,AES-NI)

Scanning the drive for archives:
  0M Scan         1 file, 2023032318 bytes (1930 MiB)

Extracting archive: greenhouse-gas-emissions-dataset.zip
--
Path = greenhouse-gas-emissions-dataset.zip
Type = zip
Physical Size = 2023032318

  0%      0% - agriculture/asset_cropland-fires_emissions.csv                                                       1% - agriculture/asset_cropland-fires_emissions.csv                                                     

In [5]:
# Import the libraries
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark import SparkContext, SparkConf
import pyspark as spark
#from pyspark.sql.functions import isnull, col, lit
#from pyspark.sql.functions import regexp_replace
from pyspark.sql.functions import *
import os

spark = SparkSession.builder.getOrCreate()
categories = ['agriculture', 'buildings', 'fluorinated_gases', 'fossil_fuel_operations', 'manufacturing', 'mineral_extraction', 'power', 'waste']

dataset = {'agriculture': [], 'buildings': [], 'fluorinated_gases': [], 'fossil_fuel_operations': [], 'manufacturing': [], 'mineral_extraction': [],
           'power': [], 'waste': []}
name_dataset = {'agriculture': [], 'buildings': [], 'fluorinated_gases': [], 'fossil_fuel_operations': [], 'manufacturing': [], 'mineral_extraction': [],
           'power': [], 'waste': []}

def show_schemas(df):
  """Shows all the schemas in the path."""

  df.printSchema()

def get_uniquness(df, column: str):
  """Show all of the unique values in a column."""

  try:
    return df.select(column).distinct()
  except:
    print('No Column Present')

In [6]:
# Create the dataset
for item in categories:
  print(f'=========================================================')
  print(f'{item} is processing')
  print(f'=========================================================')
  for files in os.listdir(item):
    print(f'{files} is processing...')
    path = f'{item}/{files}'
    df = spark.read.csv(path, header = True, inferSchema = True)
    data_tuple = (files, df)
    dataset[item].append(data_tuple)
print(dataset)

agriculture is processing
country_manure-management_emissions.csv is processing...
country_cropland-fires_emissions.csv is processing...
asset_cropland-fires_emissions.csv is processing...
asset_enteric-fermentation_emissions.csv is processing...
country_rice-cultivation_emissions.csv is processing...
asset_synthetic-fertilizer-application-top500_emissions.csv is processing...
country_synthetic-fertilizer-application_emissions.csv is processing...
country_other-agricultural-soil-emissions_emissions.csv is processing...
asset_enteric-fermentation_ownership.csv is processing...
asset_manure-management_emissions.csv is processing...
asset_rice-cultivation-top500_emissions.csv is processing...
country_enteric-fermentation_emissions.csv is processing...
buildings is processing
country_residential-and-commercial-onsite-fuel-usage_emissions.csv is processing...
country_other-onsite-fuel-usage_emissions.csv is processing...
fluorinated_gases is processing
country_fluorinated-gases_emissions.cs

In [7]:
# Print the Schemas for Dataset (Data tuple = (name, dataframe) )
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')
  for data_tuple in dataset[category]:
    print(data_tuple)
    names, df = data_tuple
    print(f'Schema for: {names}')
    show_schemas(df)
    print(f'=========================================================')

agriculture is processing
('country_manure-management_emissions.csv', DataFrame[iso3_country: string, start_time: timestamp, end_time: timestamp, original_inventory_sector: string, gas: string, emissions_quantity: double, emissions_quantity_units: string, temporal_granularity: string, created_date: timestamp, modified_date: timestamp])
Schema for: country_manure-management_emissions.csv
root
 |-- iso3_country: string (nullable = true)
 |-- start_time: timestamp (nullable = true)
 |-- end_time: timestamp (nullable = true)
 |-- original_inventory_sector: string (nullable = true)
 |-- gas: string (nullable = true)
 |-- emissions_quantity: double (nullable = true)
 |-- emissions_quantity_units: string (nullable = true)
 |-- temporal_granularity: string (nullable = true)
 |-- created_date: timestamp (nullable = true)
 |-- modified_date: timestamp (nullable = true)

('country_cropland-fires_emissions.csv', DataFrame[iso3_country: string, start_time: timestamp, end_time: timestamp, original_i

In [8]:
# Filter Country to South east asian countries

country_list = ['IDN', 'MYR', 'BRN', 'KHM', 'VNM', 'THA', 'MMR', 'SGP', 'PHL', 'TLS', 'LAO']
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')

  removal= []

  for data_tuple in dataset[category]:
    names, df = data_tuple
    try:
      filtered_dataset = df.filter(col('iso3_country').isin(country_list))
      new_tuple = (names, filtered_dataset)
      index = dataset[category].index(data_tuple)
      dataset[category][index] = new_tuple
    except:
      print(f'Error for the dataset: {names}...')
      removal.append(data_tuple)
      continue

  for tuples in removal:
    names, df = tuples
    print(f'removing: {names}')
    dataset[category].remove(tuples)

agriculture is processing
Error for the dataset: asset_enteric-fermentation_ownership.csv...
removing: asset_enteric-fermentation_ownership.csv
buildings is processing
fluorinated_gases is processing
fossil_fuel_operations is processing
Error for the dataset: asset_oil-and-gas-refining_ownership.csv...
Error for the dataset: asset_oil-and-gas-production-and-transport_ownership.csv...
removing: asset_oil-and-gas-refining_ownership.csv
removing: asset_oil-and-gas-production-and-transport_ownership.csv
manufacturing is processing
Error for the dataset: asset_cement_ownership.csv...
Error for the dataset: asset_steel_ownership.csv...
removing: asset_cement_ownership.csv
removing: asset_steel_ownership.csv
mineral_extraction is processing
power is processing
Error for the dataset: asset_electricity-generation_ownership.csv...
removing: asset_electricity-generation_ownership.csv
waste is processing
Error for the dataset: asset_solid-waste-disposal_ownership.csv...
removing: asset_solid-waste

In [9]:
# Checking and removing values without any countries

for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')

  removal = []

  for data_tuple in dataset[category]:
    name, df = data_tuple

    try:
      country = get_uniquness(df, 'iso3_country')
      if country.count() == 0:
        print('Column is empty..., Removing')
        removal.append(data_tuple)
      else:
        country.show()
    except:
      print(f'Countries not shown for {name}')
      removal.append(data_tuple)

  for item in removal:
    names, df = item
    print(f'Removing {names}')
    dataset[category].remove(item)


agriculture is processing
+------------+
|iso3_country|
+------------+
|         BRN|
|         THA|
|         TLS|
|         VNM|
|         SGP|
|         MMR|
|         KHM|
|         IDN|
|         LAO|
|         PHL|
+------------+

+------------+
|iso3_country|
+------------+
|         BRN|
|         THA|
|         TLS|
|         VNM|
|         SGP|
|         KHM|
|         MMR|
|         IDN|
|         LAO|
|         PHL|
+------------+

+------------+
|iso3_country|
+------------+
|         BRN|
|         THA|
|         TLS|
|         VNM|
|         MMR|
|         KHM|
|         IDN|
|         LAO|
|         PHL|
|         SGP|
+------------+

Column is empty..., Removing
+------------+
|iso3_country|
+------------+
|         BRN|
|         THA|
|         TLS|
|         VNM|
|         SGP|
|         KHM|
|         MMR|
|         IDN|
|         LAO|
|         PHL|
+------------+

+------------+
|iso3_country|
+------------+
|         VNM|
|         MMR|
|         IDN|
+----------

In [10]:
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')
  for data_tuple in dataset[category]:
    names, df = data_tuple
    print(f'Schema for: {names}')
    show_schemas(df)
    print(f'=========================================================')

agriculture is processing
Schema for: country_manure-management_emissions.csv
root
 |-- iso3_country: string (nullable = true)
 |-- start_time: timestamp (nullable = true)
 |-- end_time: timestamp (nullable = true)
 |-- original_inventory_sector: string (nullable = true)
 |-- gas: string (nullable = true)
 |-- emissions_quantity: double (nullable = true)
 |-- emissions_quantity_units: string (nullable = true)
 |-- temporal_granularity: string (nullable = true)
 |-- created_date: timestamp (nullable = true)
 |-- modified_date: timestamp (nullable = true)

Schema for: country_cropland-fires_emissions.csv
root
 |-- iso3_country: string (nullable = true)
 |-- start_time: timestamp (nullable = true)
 |-- end_time: timestamp (nullable = true)
 |-- original_inventory_sector: string (nullable = true)
 |-- gas: string (nullable = true)
 |-- emissions_quantity: double (nullable = true)
 |-- emissions_quantity_units: string (nullable = true)
 |-- temporal_granularity: string (nullable = true)
 |-

In [11]:
# Modify the columns.

columns = ['start_time', 'end_time', 'gas', 'emissions_quantity', 'iso3_country', 'temporal_granularity']
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')

  removal = []
  for data_tuple in dataset[category]:
    names, df = data_tuple
    try:
      filtered_dataset = df.select(*columns)
      index = dataset[category].index(data_tuple)
      dataset[category][index] = (names, filtered_dataset)
    except:
      print(f'Error for the dataset: {names}')
      removal.append(data_tuple)
      continue

  for item in removal:
    print(f'Removing {names}')
    dataset[category].remove(item)

agriculture is processing
buildings is processing
fluorinated_gases is processing
fossil_fuel_operations is processing
manufacturing is processing
mineral_extraction is processing
power is processing
waste is processing


In [12]:
# Add a column on each dataset showing which industry (agriculture, buildings etc.)

columns = ['start_time', 'end_time', 'gas', 'emissions_quantity', 'iso3_country', 'temporal_granularity']
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')

  removal = []
  for data_tuple in dataset[category]:
    names, df = data_tuple
    index = dataset[category].index(data_tuple)
    df = df.withColumn('industry', lit(category))
    new_tuple = (names, df)
    dataset[category][index] = new_tuple


agriculture is processing
buildings is processing
fluorinated_gases is processing
fossil_fuel_operations is processing
manufacturing is processing
mineral_extraction is processing
power is processing
waste is processing


In [13]:
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')
  for data_tuple in dataset[category]:
    names, df = data_tuple
    print(f'Schema for: {names}')
    show_schemas(df)
    print(f'=========================================================')

agriculture is processing
Schema for: country_manure-management_emissions.csv
root
 |-- start_time: timestamp (nullable = true)
 |-- end_time: timestamp (nullable = true)
 |-- gas: string (nullable = true)
 |-- emissions_quantity: double (nullable = true)
 |-- iso3_country: string (nullable = true)
 |-- temporal_granularity: string (nullable = true)
 |-- industry: string (nullable = false)

Schema for: country_cropland-fires_emissions.csv
root
 |-- start_time: timestamp (nullable = true)
 |-- end_time: timestamp (nullable = true)
 |-- gas: string (nullable = true)
 |-- emissions_quantity: double (nullable = true)
 |-- iso3_country: string (nullable = true)
 |-- temporal_granularity: string (nullable = true)
 |-- industry: string (nullable = false)

Schema for: asset_cropland-fires_emissions.csv
root
 |-- start_time: timestamp (nullable = true)
 |-- end_time: timestamp (nullable = true)
 |-- gas: string (nullable = true)
 |-- emissions_quantity: double (nullable = true)
 |-- iso3_countr

In [None]:
# Print the gasses (Month, Annual, Null or None at all)
"""
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')
  for data_tuple in dataset[category]:
    names, df = data_tuple
    print(f'Gas for: {names}')
    gran = get_uniquness(df, 'gas')
    if gran is None:
      print(f'Error for {name}')
    else:
      gran.show()
    print(f'=========================================================')
    """

agriculture is processing
Gas for: country_enteric-fermentation_emissions.csv
+----------+
|       gas|
+----------+
| co2e_20yr|
|co2e_100yr|
|       ch4|
|       co2|
|       n2o|
+----------+

Gas for: country_synthetic-fertilizer-application_emissions.csv
+----------+
|       gas|
+----------+
| co2e_20yr|
|co2e_100yr|
|       ch4|
|       co2|
|       n2o|
+----------+

Gas for: asset_rice-cultivation-top500_emissions.csv
+----------+
|       gas|
+----------+
| co2e_20yr|
|co2e_100yr|
|       ch4|
|       co2|
|       n2o|
+----------+

Gas for: country_other-agricultural-soil-emissions_emissions.csv
+----------+
|       gas|
+----------+
| co2e_20yr|
|co2e_100yr|
|       ch4|
|       co2|
|       n2o|
+----------+

Gas for: asset_synthetic-fertilizer-application-top500_emissions.csv
+----------+
|       gas|
+----------+
| co2e_20yr|
|co2e_100yr|
|       ch4|
|       co2|
|       n2o|
+----------+

Gas for: country_manure-management_emissions.csv
+----------+
|       gas|
+-----

In [14]:
# Print the temporal granularities (Month, Annual, Null or None at all)
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')
  for data_tuple in dataset[category]:
    names, df = data_tuple
    print(f'Printing the data for {names}')
    df.show(20)
    print(f'=========================================================')

agriculture is processing
Printing the data for country_manure-management_emissions.csv
+-------------------+-------------------+----------+--------------------+------------+--------------------+-----------+
|         start_time|           end_time|       gas|  emissions_quantity|iso3_country|temporal_granularity|   industry|
+-------------------+-------------------+----------+--------------------+------------+--------------------+-----------+
|2020-01-01 00:00:00|2020-12-31 00:00:00|       co2|                 0.0|         MMR|                null|agriculture|
|2020-01-01 00:00:00|2020-12-31 00:00:00|       co2|                 0.0|         PHL|                null|agriculture|
|2020-01-01 00:00:00|2020-12-31 00:00:00|       co2|                 0.0|         SGP|                null|agriculture|
|2020-01-01 00:00:00|2020-12-31 00:00:00|       co2|                 0.0|         THA|                null|agriculture|
|2020-01-01 00:00:00|2020-12-31 00:00:00|       co2|                 0.0

In [15]:
# Replace the nulls, co2e things
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')
  for data_tuple in dataset[category]:
    names, df = data_tuple
    index = dataset[category].index(data_tuple)
    df = df.na.fill(value = 0, subset = ['emissions_quantity'])
    df = df.na.fill(value = 'annual', subset = ['temporal_granularity'])
    #df = df.withColumn('gas', regexp_replace('gas', 'co2e_100yr', 'co2'))
    #df = df.withColumn('gas', regexp_replace('gas', 'co2e_20yr', 'co2'))
    dataset[category][index] = (names, df)

agriculture is processing
buildings is processing
fluorinated_gases is processing
fossil_fuel_operations is processing
manufacturing is processing
mineral_extraction is processing
power is processing
waste is processing


In [16]:
# Create Rate Column
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')
  for data_tuple in dataset[category]:
    names, df = data_tuple
    index = dataset[category].index(data_tuple)

    df = df.withColumn('rate',
                       when(df.temporal_granularity == 'month', df.emissions_quantity / (datediff(df.end_time, df.start_time))).
                       when(df.temporal_granularity == 'annual', df.emissions_quantity / 365))

    new_tuple = (names, df)
    dataset[category][index] = new_tuple

agriculture is processing
buildings is processing
fluorinated_gases is processing
fossil_fuel_operations is processing
manufacturing is processing
mineral_extraction is processing
power is processing
waste is processing


In [17]:
for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')
  for data_tuple in dataset[category]:
    names, df = data_tuple
    print(f'Printing the data for {names}')
    df.show(20)
    print(f'=========================================================')

agriculture is processing
Printing the data for country_manure-management_emissions.csv
+-------------------+-------------------+----------+--------------------+------------+--------------------+-----------+-------------------+
|         start_time|           end_time|       gas|  emissions_quantity|iso3_country|temporal_granularity|   industry|               rate|
+-------------------+-------------------+----------+--------------------+------------+--------------------+-----------+-------------------+
|2020-01-01 00:00:00|2020-12-31 00:00:00|       co2|                 0.0|         MMR|              annual|agriculture|                0.0|
|2020-01-01 00:00:00|2020-12-31 00:00:00|       co2|                 0.0|         PHL|              annual|agriculture|                0.0|
|2020-01-01 00:00:00|2020-12-31 00:00:00|       co2|                 0.0|         SGP|              annual|agriculture|                0.0|
|2020-01-01 00:00:00|2020-12-31 00:00:00|       co2|                 0.0

In [19]:
# Append all data into one dataframe

d_name, dataframe = dataset['agriculture'][0]

for category in dataset:
  print(f'=========================================================')
  print(f'{category} is processing')
  print(f'=========================================================')
  for data_tuple in dataset[category]:
    name, df = data_tuple
    if name == d_name:
      continue
    else:
      print(f'Appending the data for {name}')
      dataframe = dataframe.union(df)

#print((dataframe.count(), len(dataframe.columns)))

agriculture is processing
Appending the data for country_cropland-fires_emissions.csv
Appending the data for asset_cropland-fires_emissions.csv
Appending the data for country_rice-cultivation_emissions.csv
Appending the data for asset_synthetic-fertilizer-application-top500_emissions.csv
Appending the data for country_synthetic-fertilizer-application_emissions.csv
Appending the data for country_other-agricultural-soil-emissions_emissions.csv
Appending the data for asset_rice-cultivation-top500_emissions.csv
Appending the data for country_enteric-fermentation_emissions.csv
buildings is processing
Appending the data for country_residential-and-commercial-onsite-fuel-usage_emissions.csv
Appending the data for country_other-onsite-fuel-usage_emissions.csv
fluorinated_gases is processing
Appending the data for country_fluorinated-gases_emissions.csv
fossil_fuel_operations is processing
Appending the data for country_other-fossil-fuel-operations_emissions.csv
Appending the data for country_c

In [None]:
# Find the descriptions of the rate and the emissions quantity
# Note: This is for Indonesia only since this is the focus of the project.

description = dataframe.filter(col('iso3_country').isin(['IDN']))
co2 = description.filter(col('gas').isin(['co2'])).describe(['emissions_quantity', 'rate'])
co2e_100yr = description.filter(col('gas').isin(['co2e_100yr'])).describe(['emissions_quantity', 'rate'])
co2e_20yr = description.filter(col('gas').isin(['co2e_20yr'])).describe(['emissions_quantity', 'rate'])
n2o = description.filter(col('gas').isin(['n2o'])).describe(['emissions_quantity', 'rate'])
ch4 = description.filter(col('gas').isin(['ch4'])).describe(['emissions_quantity', 'rate'])

gas_list = [('co2', co2), ('co2e_100yr', co2e_100yr),
            ('co2e_20yr', co2e_20yr), ('n2o', n2o), ('ch4', ch4)]

for gas_data in gas_list:
  name, gas = gas_data
  print(f'=============================={name}==============================')
  gas.show()
  print(f'=================================================================')



In [20]:
# Create csv files from Pyspark
dataframe.write.options(header='True', delimiter=',').csv('environment.csv')

In [32]:
import os
import pandas as pd

final_dataframe = []

for item in os.listdir('environment.csv'):
  try:
    print(item)
    p = pd.DataFrame(pd.read_csv(f'environment.csv/{item}'))
    print(item)
    final_dataframe.append(p)
  except:
    continue

#final_dataframe.to_csv('environment_processed.csv')

.part-00288-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv.crc
.part-00370-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv.crc
part-00399-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
part-00399-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
.part-00347-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv.crc
part-00246-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
part-00246-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
part-00328-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
part-00328-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
.part-00441-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv.crc
part-00125-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
part-00125-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
part-00413-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
part-00413-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
.part-00335-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv.crc
part-00009-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
part-00009-8badf1e4-6569-4213-9378-5d6aab1a3b88-c000.csv
part-0

In [42]:
# Concat everything
p = pd.concat(final_dataframe, ignore_index = True)
p.head()

Unnamed: 0,start_time,end_time,gas,emissions_quantity,iso3_country,temporal_granularity,industry,rate,crc
0,2015-01-01T00:00:00.000Z,2015-12-31T00:00:00.000Z,co2,0.0,VNM,annual,agriculture,0.0,
1,2015-01-01T00:00:00.000Z,2015-12-31T00:00:00.000Z,ch4,14.82,VNM,annual,agriculture,0.040603,
2,2015-01-01T00:00:00.000Z,2015-12-31T00:00:00.000Z,n2o,0.0,VNM,annual,agriculture,0.0,
3,2015-01-01T00:00:00.000Z,2015-12-31T00:00:00.000Z,co2e_100yr,414.96,VNM,annual,agriculture,1.136877,
4,2015-01-01T00:00:00.000Z,2015-12-31T00:00:00.000Z,co2e_20yr,1244.88,VNM,annual,agriculture,3.41063,


In [44]:
# Create csv and then download
p.to_csv('environment_processed.csv')

from google.colab import files
files.download('environment_processed.csv')

In [45]:
from google.colab import files
files.download('environment_processed.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>