# The Impact of Agriculture on CO2-Emissions

## Setup

In [1]:
# For EDA and getting Data
import pandas as pd

# For getting Data into SQL
import requests
from configdef import *
from sqlalchemy import exc #SQLAlchemy provides a nice “Pythonic” way of interacting with databases.
from sqlalchemy import event

In [2]:
# Establish db connection

# Get connection details from configdef file into a list
params = config(section='postgres')

# Use sql alchemy to create connection to database, which is contained within the engine object
engine = pg_engine_connection(**params)

# Cleans up unnecessary database connections
engine.dispose()

Postgres Database connection successful


## ifeu environment foodprint

In [3]:
df_foodprint = pd.read_table("data/ifeu_environment_foodprint.tsv")

FileNotFoundError: [Errno 2] No such file or directory: 'data/ifeu_environment_foodprint.tsv'

### Getting the data into SQL

In [None]:
df_foodprint.info()

In [None]:
# Specify which table within your database you want to push your data to. Here: 'agri_fao_emissions'
# If the specified table doesn't exist yet, it will be created
# With 'append', your data will be appended to the already existing data within the table.

df_foodprint.to_sql('agri_ifeu_foodprint', engine, index=False, if_exists="append", 
                  method='multi', chunksize=5000)
print(f'done uploading')

## Recipes for foodprint

In [3]:
df_foodprint_recipes = pd.read_table("data/recipes_foodprint.tsv")

### Getting the data into SQL

In [4]:
df_foodprint_recipes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188 entries, 0 to 187
Data columns (total 8 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   food                        188 non-null    object 
 1   recipe_zwiebelkuchen        7 non-null      float64
 2   recipe_wiener_schnitzel     0 non-null      float64
 3   recipe_bratkartoffeln       4 non-null      float64
 4   recipe_spaghetti_bolognese  7 non-null      float64
 5   recipe_pizza                8 non-null      float64
 6   recipe_kohlroulade          0 non-null      float64
 7   recipe_wurststulle          3 non-null      float64
dtypes: float64(7), object(1)
memory usage: 11.9+ KB


In [5]:
# Specify which table within your database you want to push your data to. Here: 'agri_fao_emissions'
# If the specified table doesn't exist yet, it will be created
# With 'append', your data will be appended to the already existing data within the table.

df_foodprint_recipes.to_sql('agri_foodprint_recipes', engine, index=False, if_exists="append", 
                  method='multi', chunksize=5000)
print(f'done uploading')

done uploading


## FAO - Emissions

In [None]:
df_emissions = pd.read_csv("data/Emissions_Agriculture_Agriculture_total_E_All_Data_(Normalized).csv")

In [None]:
df_emissions.columns

### Clean up the headers

In [None]:
# make a list column names
cols = df_emissions.columns.tolist()
# replace space with _
cols = [col.replace(' ', '_').lower() for col in cols]
# reassign new column names to dataframe
df_emissions.columns = cols

### Getting the data into SQL

In [None]:
df_emissions.info()

In [None]:
# Specify which table within your database you want to push your data to. Here: 'agri_fao_emissions'
# If the specified table doesn't exist yet, it will be created
# With 'append', your data will be appended to the already existing data within the table.

df_emissions.to_sql('agri_fao_emissions', engine, index=False, if_exists="append", 
                  method='multi', chunksize=5000)
print(f'done uploading')

## FAO - Definitions and standards

In [None]:
df_def_and_stand = pd.read_csv("data/FAOSTAT_data_6-28-2021.csv")

In [None]:
df_def_and_stand.columns

### Clean up the headers

In [None]:
# make a list column names
cols = df_def_and_stand.columns.tolist()
# replace space with _
cols = [col.replace(' ', '_').lower() for col in cols]
# reassign new column names to dataframe
df_def_and_stand.columns = cols

### Getting the data into SQL

In [None]:
df_def_and_stand.info()

In [None]:
# Specify which table within your database you want to push your data to. Here: 'agri_fao_definitions'
# If the specified table doesn't exist yet, it will be created
# With 'append', your data will be appended to the already existing data within the table.

df_def_and_stand.to_sql('agri_fao_definition', engine, index=False, if_exists="append", 
                  method='multi', chunksize=5000)
print(f'done uploading')

## FAO Food Consumption

In [4]:
df_consumption = pd.read_csv("data/fao_food_balance_sheets.csv")

In [5]:
df_consumption.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1439391 entries, 0 to 1439390
Data columns (total 11 columns):
 #   Column        Non-Null Count    Dtype  
---  ------        --------------    -----  
 0   Area Code     1439391 non-null  int64  
 1   Area          1439391 non-null  object 
 2   Item Code     1439391 non-null  int64  
 3   Item          1439391 non-null  object 
 4   Element Code  1439391 non-null  int64  
 5   Element       1439391 non-null  object 
 6   Year Code     1439391 non-null  int64  
 7   Year          1439391 non-null  int64  
 8   Unit          1439391 non-null  object 
 9   Value         1439391 non-null  float64
 10  Flag          1439391 non-null  object 
dtypes: float64(1), int64(5), object(5)
memory usage: 120.8+ MB


In [6]:
df_consumption.columns

Index(['Area Code', 'Area', 'Item Code', 'Item', 'Element Code', 'Element',
       'Year Code', 'Year', 'Unit', 'Value', 'Flag'],
      dtype='object')

In [7]:
df_consumption['Item'].unique()

array(['Population', 'Grand Total', 'Vegetal Products', 'Animal Products',
       'Cereals - Excluding Beer', 'Wheat and products',
       'Rice and products', 'Barley and products', 'Maize and products',
       'Rye and products', 'Oats', 'Millet and products',
       'Sorghum and products', 'Cereals, Other', 'Starchy Roots',
       'Cassava and products', 'Potatoes and products', 'Sweet potatoes',
       'Yams', 'Roots, Other', 'Sugar Crops', 'Sugar cane', 'Sugar beet',
       'Sugar & Sweeteners', 'Sugar non-centrifugal',
       'Sugar (Raw Equivalent)', 'Sweeteners, Other', 'Honey', 'Pulses',
       'Beans', 'Peas', 'Pulses, Other and products', 'Treenuts',
       'Nuts and products', 'Oilcrops', 'Soyabeans', 'Groundnuts',
       'Sunflower seed', 'Rape and Mustardseed', 'Cottonseed',
       'Coconuts - Incl Copra', 'Sesame seed', 'Palm kernels',
       'Olives (including preserved)', 'Oilcrops, Other',
       'Vegetable Oils', 'Soyabean Oil', 'Groundnut Oil',
       'Sunflowerseed

In [8]:
df_consumption['Item Code'].unique()

array([2501, 2901, 2903, 2941, 2905, 2511, 2807, 2513, 2514, 2515, 2516,
       2517, 2518, 2520, 2907, 2532, 2531, 2533, 2535, 2534, 2908, 2536,
       2537, 2909, 2541, 2542, 2543, 2745, 2911, 2546, 2547, 2549, 2912,
       2551, 2913, 2555, 2552, 2557, 2558, 2559, 2560, 2561, 2562, 2563,
       2570, 2914, 2571, 2572, 2573, 2574, 2575, 2576, 2577, 2578, 2579,
       2580, 2581, 2582, 2586, 2918, 2601, 2602, 2605, 2919, 2611, 2612,
       2613, 2614, 2615, 2616, 2617, 2618, 2619, 2620, 2625, 2922, 2630,
       2633, 2635, 2923, 2640, 2641, 2642, 2645, 2924, 2655, 2656, 2657,
       2658, 2659, 2943, 2731, 2732, 2733, 2734, 2735, 2945, 2736, 2946,
       2740, 2743, 2737, 2949, 2744, 2948, 2848, 2960, 2761, 2928, 2680,
       2899, 2781, 2782, 2762, 2763, 2764, 2765, 2766, 2767, 2961, 2769,
       2775, 2768])

### Cleaning up the headers

In [10]:
# make a list column names
cols = df_consumption.columns.tolist()
# replace space with _
cols = [col.replace(' ', '_').lower() for col in cols]
# reassign new column names to dataframe
df_consumption.columns = cols

### Uploading the data to the database

In [None]:
# Specify which table within your database you want to push your data to. Here: 'agri_fao_definitions'
# If the specified table doesn't exist yet, it will be created
# With 'append', your data will be appended to the already existing data within the table.

df_consumption.to_sql('agri_fao_consumption', engine, index=False, if_exists="append", 
                  method='multi', chunksize=5000)
print(f'done uploading')

## UN World Population

In [14]:
df_population = pd.read_csv("data/un_world_population.csv")

In [15]:
df_population.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280932 entries, 0 to 280931
Data columns (total 10 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   LocID       280932 non-null  int64  
 1   Location    280932 non-null  object 
 2   VarID       280932 non-null  int64  
 3   Variant     280932 non-null  object 
 4   Time        280932 non-null  int64  
 5   MidPeriod   280932 non-null  float64
 6   PopMale     250876 non-null  float64
 7   PopFemale   250876 non-null  float64
 8   PopTotal    280932 non-null  float64
 9   PopDensity  280932 non-null  float64
dtypes: float64(5), int64(3), object(2)
memory usage: 21.4+ MB


### Cleaning up the headers

In [16]:
# make a list column names
cols = df_population.columns.tolist()
# replace space with _
cols = [col.replace(' ', '_').lower() for col in cols]
# reassign new column names to dataframe
df_population.columns = cols

### Uploading the data to the database

In [None]:
# Specify which table within your database you want to push your data to. Here: 'agri_fao_definitions'
# If the specified table doesn't exist yet, it will be created
# With 'append', your data will be appended to the already existing data within the table.

df_population.to_sql('agri_fao_consumption', engine, index=False, if_exists="append", 
                  method='multi', chunksize=5000)
print(f'done uploading')