# The Impact of Agriculture on CO2-Emissions

## Setup

In [14]:
# For EDA and getting Data
import pandas as pd

# For getting Data into SQL
import requests
from configdef import *
from sqlalchemy import exc #SQLAlchemy provides a nice “Pythonic” way of interacting with databases.
from sqlalchemy import event

In [15]:
# Establish db connection

# Get connection details from configdef file into a list
params = config(section='postgres')

# Use sql alchemy to create connection to database, which is contained within the engine object
engine = pg_engine_connection(**params)

# Cleans up unnecessary database connections
engine.dispose()

Postgres Database connection successful


## ifeu environment foodprint

In [33]:
df_foodprint = pd.read_table("data/ifeu_environment_foodprint.tsv")

### Getting the data into SQL

In [34]:
df_foodprint.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187 entries, 0 to 186
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   food                      187 non-null    object 
 1   co2_footprint             186 non-null    float64
 2   category                  187 non-null    object 
 3   phosphate_rock_footprint  35 non-null     float64
 4   land_use_footprint        35 non-null     float64
 5   water_footprint           35 non-null     object 
 6   energy_demand             34 non-null     float64
 7   comments                  6 non-null      object 
dtypes: float64(4), object(4)
memory usage: 11.8+ KB


In [37]:
# Specify which table within your database you want to push your data to. Here: 'agri_fao_emissions'
# If the specified table doesn't exist yet, it will be created
# With 'append', your data will be appended to the already existing data within the table.

df_foodprint.to_sql('agri_ifeu_foodprint', engine, index=False, if_exists="append", 
                  method='multi', chunksize=5000)
print(f'done uploading')

done uploading


## FAO - Emissions

In [22]:
df_emissions = pd.read_csv("data/Emissions_Agriculture_Agriculture_total_E_All_Data_(Normalized).csv")

In [23]:
df_emissions.columns

Index(['Area Code', 'Area', 'Item Code', 'Item', 'Element Code', 'Element',
       'Year Code', 'Year', 'Unit', 'Value', 'Flag', 'Note'],
      dtype='object')

### Clean up the headers

In [24]:
# make a list column names
cols = df_emissions.columns.tolist()
# replace space with _
cols = [col.replace(' ', '_').lower() for col in cols]
# reassign new column names to dataframe
df_emissions.columns = cols

### Getting the data into SQL

In [25]:
df_emissions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 523661 entries, 0 to 523660
Data columns (total 12 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   area_code     523661 non-null  int64  
 1   area          523661 non-null  object 
 2   item_code     523661 non-null  int64  
 3   item          523661 non-null  object 
 4   element_code  523661 non-null  int64  
 5   element       523661 non-null  object 
 6   year_code     523661 non-null  int64  
 7   year          523661 non-null  int64  
 8   unit          523661 non-null  object 
 9   value         523661 non-null  float64
 10  flag          523661 non-null  object 
 11  note          0 non-null       float64
dtypes: float64(2), int64(5), object(5)
memory usage: 47.9+ MB


In [26]:
# Specify which table within your database you want to push your data to. Here: 'agri_fao_emissions'
# If the specified table doesn't exist yet, it will be created
# With 'append', your data will be appended to the already existing data within the table.

df_emissions.to_sql('agri_fao_emissions', engine, index=False, if_exists="append", 
                  method='multi', chunksize=5000)
print(f'done uploading')

done uploading


## FAO - Definitions and standards

In [7]:
df_def_and_stand = pd.read_csv("data/FAOSTAT_data_6-28-2021.csv")

In [9]:
df_def_and_stand.columns

Index(['Domain Code', 'Domain', 'Item Code', 'Item', 'Description', 'HS Code',
       'HS07 Code', 'HS12 Code', 'CPC Code'],
      dtype='object')

### Clean up the headers

In [10]:
# make a list column names
cols = df_def_and_stand.columns.tolist()
# replace space with _
cols = [col.replace(' ', '_').lower() for col in cols]
# reassign new column names to dataframe
df_def_and_stand.columns = cols

### Getting the data into SQL

In [27]:
df_def_and_stand.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4348 entries, 0 to 4347
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   domain_code  4348 non-null   object 
 1   domain       4348 non-null   object 
 2   item_code    4348 non-null   int64  
 3   item         4348 non-null   object 
 4   description  3558 non-null   object 
 5   hs_code      11 non-null     float64
 6   hs07_code    2917 non-null   object 
 7   hs12_code    2917 non-null   object 
 8   cpc_code     3115 non-null   object 
dtypes: float64(1), int64(1), object(7)
memory usage: 305.8+ KB


In [31]:
# Specify which table within your database you want to push your data to. Here: 'agri_fao_definitions'
# If the specified table doesn't exist yet, it will be created
# With 'append', your data will be appended to the already existing data within the table.

df_def_and_stand.to_sql('agri_fao_definition', engine, index=False, if_exists="append", 
                  method='multi', chunksize=5000)
print(f'done uploading')

done uploading
