In [107]:
# loading required packages
import pandas as pd
from sqlalchemy import create_engine

In [None]:
# creating a connection to the database
engine = create_engine('postgresql+psycopg2://postgres:1995@localhost:5432/faostat_ms_dsci_project')

# list of table names to be read
table_names = [
    'producer_prices', 'production_value', 'production_index', 'crop_production',
    'landuse', 'trade', 'trade_indicators', 'trade_indices', 'economic_indicators', 
    'foreign_investment', 'government_investment', 'credit_to_agri', 'emission_indicators', 
    'employment_indicators', 'annual_population', 'agri_trade_indicators', 
    'nitrogen_fertilizer', 'phosphorus_fertilizer', 'potassium_fertilizer', 
    'cropland_nutrient_balance', 'pesticide', 'pesticide_trade', 'temperature_change'
] 

# dictionary to hold tables
dfs = {name: pd.read_sql_table(name, engine) for name in table_names}

In [None]:
# this is how we can access individual tables
dfs["trade"].head()

Unnamed: 0,area_code,area,year_code,year,item_code,item,export_quantity,export_value,import_quantity,import_value
0,1,Armenia,1992,1992,10,Total Merchandise Trade,,12000.0,,200000.0
1,1,Armenia,1993,1993,10,Total Merchandise Trade,,30000.0,,200000.0
2,1,Armenia,1994,1994,10,Total Merchandise Trade,,232495.0,,393841.0
3,1,Armenia,1995,1995,10,Total Merchandise Trade,,270943.0,,673917.0
4,1,Armenia,1996,1996,10,Total Merchandise Trade,,290314.0,,855801.0


In [None]:
# set of keys to join the tables on
key_cols_1 = ['area_code', 'area', 'year_code', 'year', 'item_code', 'item']
key_cols_2 = ['area_code', 'area', 'year_code', 'year']

# list of table names (excluding 'producer_prices' and 'crop_production')
table_names = [
    'production_value', 'production_index',
    'landuse', 'trade', 'trade_indicators', 'trade_indices', 'economic_indicators', 
    'foreign_investment', 'government_investment', 'credit_to_agri', 'emission_indicators', 
    'employment_indicators', 'annual_population', 'agri_trade_indicators', 
    'nitrogen_fertilizer', 'phosphorus_fertilizer', 'potassium_fertilizer', 
    'cropland_nutrient_balance', 'pesticide', 'pesticide_trade', 'temperature_change'
] 

# list of table names (excluding 'producer_prices' and 'crop_production') 
# that are to be joined based on key_cols_1
table_group_1 = [
    'production_value', 'production_index',
    'trade', 'trade_indicators', 'trade_indices' 
]

# joining tables to form an integrated table/dataframe
df_merged = dfs['producer_prices'].merge(dfs['crop_production'], on=key_cols_1, how='left')
for table in table_names:
    if table in table_group_1:
        df_merged = df_merged.merge(dfs[table], on=key_cols_1, how='left')
    else:
        df_merged = df_merged.merge(dfs[table], on=key_cols_2, how='left')

df_merged.head()

Unnamed: 0,area_code,area,year_code,year,item_code,item,producer_price,producer_price_index,area_harvested,laying,milk_animals,producing_animals_or_slaughtered,production,yield,yield_or_carcass_weight,gross_production_value,gross_production_index,area_agri_land,area_arable_land,area_cropland,area_with_irrigation,area_permanent_crops,area_temporary_crops,value_per_unit_agri_land,cropland_area_per_capita,export_quantity,export_value,import_quantity,import_value,export_market_concentration_index,import_dependency_ratio,import_market_concentration_index,revealed_comparative_advantage_index,self_sufficiency_ratio,terms_of_trade,export_quantity_index,export_value_index,import_quantity_index,import_value_index,gross_domestic_product,gross_fixed_capital_formation,value_added_ag_forest_fish,gdp_annual_growth,gfcf_annual_growth,value_added_ag_forest_fish_annual_growth,gfcf_share_in_total_gdp,ag_forest_fish_share_in_total_gdp,fdi_ag_forest_fish,fdi_food_industry,total_fdi_inflows,fdi_ag_forest_fish_share,fdi_food_industry_share,agri_orientation_index_govt_expenditure,govt_expenditure_on_ag,govt_expenditure_on_ag_forest_fish,ag_forest_fish_as_share_of_total_expenditure,credit_to_ag_forest_fish_2015_usd,...,emission_share_land_use_change,emission_share_pre_and_post_production,emission_share_end_to_end_agrifood,emission_share_crops,emission_share_livestock,emission_share_ipcc_agriculture,emission_share_energy_use,emission_share_agri_waste_mgt,value_added_per_worker,employment_in_agri,agri_employment_share_in_total_employment,total_employment_afs,afs_employment_share_in_total_employment,rural_population,urban_population,total_population,agri_trade_openness_index,share_of_agri_exports_to_gdp,nitrogen_agri_use,nitrogen_export_quantity,nitrogen_import_quantity,nitrogen_production,nitrogen_use_per_area_of_cropland,nitrogen_use_per_capita,nitrogen_use_per_value_of_ag_production,phosphorus_agri_use,phosphorus_export_quantity,phosphorus_import_quantity,phosphorus_production,phosphorus_use_per_area_of_cropland,phosphorus_use_per_capita,phosphorus_use_per_value_of_ag_production,potassium_agri_use,potassium_export_quantity,potassium_import_quantity,potassium_use_per_area_of_cropland,potassium_use_per_capita,potassium_use_per_value_of_ag_production,cropland_nitrogen_per_unit_area,cropland_nitrogen_use_efficiency,cropland_phosphorus_per_unit_area,cropland_phosphorus_use_efficiency,cropland_potassium_per_unit_area,cropland_potassium_use_efficiency,total_pesticide_use_for_agriculture,total_pesticide_use_per_area_of_cropland,total_pesticide_use_per_capita,total_pesticide_use_per_value_of_agri_production,total_pesticide_export_quantity,total_pesticide_export_value,total_pesticide_import_quantity,total_pesticide_import_value,temp_change_dec_jan_feb,temp_change_mar_apr_may,temp_change_jun_jul_aug,temp_change_sep_oct_nov,temp_change_meteorological_year
0,1,Armenia,1997,1997,15,Wheat,225.3,89.4,108000.0,,,,183700.0,1700.9,,50189.0,52.42,1332.0,455.0,498.0,285.0,43.0,,637.93,0.1522,,,210742.0,50366.0,,,,,,,,,69.0,65.0,3321.410346,584.593684,636.26429,3.321007,2.099717,-4.499913,17.600767,19.156449,,,98.719641,,,,,,,,...,1.04,11.44,37.2,1.4,19.88,21.3,60.86,11.12,873.68,728.26,56.1,,,1080.782,2052.304,3246.289,,,8000.0,0.0,8000.0,,16.06,2.45,9.41,,2300.0,3.3,,,,,,365.03,6.52,,,,24.0845,39.6858,0.6367,83.1393,-0.3189,103.1832,41.46,0.08,0.01,0.05,3.604,3.687,117.861,298.379,2.166,-0.319,0.87,0.184,0.725
1,1,Armenia,1998,1998,15,Wheat,205.6,83.91,118300.0,,,,244300.0,2065.1,,66745.0,69.72,1328.0,455.0,493.0,285.0,38.0,,694.61,0.1521,,,282127.0,40100.0,,,,,,,,,93.0,52.0,3565.209041,654.999152,718.342383,7.340216,12.043487,12.899999,18.37197,20.148675,,,410.567815,,,,,,,,...,1.02,11.6,35.42,1.63,17.86,19.53,62.03,10.96,976.11,735.92,56.1,,,1081.011,2027.673,3208.401,,,9620.0,0.0,9620.0,,19.51,2.97,10.43,,0.0,0.0,,,,,,0.0,0.0,,,,23.453,45.4276,-0.4984,114.7784,-2.885,131.0245,50.92,0.1,0.02,0.06,3.604,3.687,207.171,340.124,-0.259,1.483,2.145,2.027,1.349
2,1,Armenia,1999,1999,15,Wheat,177.5,76.79,110129.0,,,,214380.0,1946.6,,58571.0,61.18,1323.0,450.0,488.0,274.0,38.0,,691.05,0.1522,,,239694.0,37686.0,,,,,,,,,79.0,49.0,3681.264873,658.736569,728.720081,3.255232,0.570598,1.444672,17.8943,19.795372,,,228.265384,,,,,,,,...,1.1,13.46,39.23,1.31,19.64,20.97,59.99,11.73,1014.6,718.23,56.0,,,1082.796,2006.221,3167.286,,,6158.0,0.09,6158.0,,12.62,1.92,6.74,,0.0,1.5,,,,,,0.0,6.22,,,,18.1205,50.6764,-0.4698,114.6871,-2.7453,130.0706,60.38,0.12,0.02,0.07,3.604,3.687,296.481,381.869,3.535,0.814,1.611,0.264,1.556
3,1,Armenia,2000,2000,15,Wheat,163.5,71.3,106581.0,,,,177762.0,1667.9,,48566.0,50.73,1323.0,450.0,488.0,274.0,38.0,,670.41,0.154,,,375221.0,53695.0,,,,,,,,,123.0,70.0,3898.298463,765.591218,720.704274,5.895625,16.22115,-1.099985,19.639112,18.487663,,-19.505502,199.262087,,-9.788868,,,,,36.830029,...,1.0,15.17,39.64,1.18,18.03,19.25,63.38,10.97,1006.95,715.73,55.9,852.09,66.56,1084.598,1984.99,3125.628,,,7000.0,0.14,7000.0,,14.34,2.21,7.89,,0.0,0.0,,,,,,0.0,10.71,,,,22.7442,40.0838,-0.3237,112.3245,-1.9773,125.2821,69.85,0.14,0.02,0.08,3.604,3.687,160.902,482.955,1.148,0.723,2.166,0.364,1.1
4,1,Armenia,2001,2001,15,Wheat,166.0,74.51,108554.0,,,,241679.0,2226.3,,66029.0,68.97,1328.0,455.0,493.0,274.0,38.0,318.5,693.99,0.1574,,,294960.0,42391.0,,,,,,,,,97.0,55.0,4270.847611,806.204236,805.026484,9.556711,5.304791,11.699973,18.876914,18.849338,0.3781,5.293404,132.14606,0.286123,4.005722,,,,,32.00798,...,3.76,16.42,43.63,1.13,18.33,19.51,63.13,10.93,1131.27,711.61,55.7,850.33,66.54,1086.476,1964.179,3084.349,,,5000.0,0.01,5000.0,,10.14,1.6,5.43,,0.0,19.26,,,,,,0.0,49.71,,,,13.3452,61.1667,-1.7765,176.5419,-4.5395,159.8944,79.31,0.16,0.03,0.09,1.145,9.068,200.884,698.022,1.818,2.072,1.578,0.409,1.469


In [120]:
df_merged.shape

(392856, 117)

In [121]:
# exporting integrated dataset as csv file
df_merged.to_csv('/Users/gurjitsingh/Desktop/MS Data Science/MS_Project_Python/integrated_data.csv', index=False)