# Import all necessary libraries

In [2]:
# Import all necessary libraries
import pandas as pd
import numpy as np
import psycopg2 # needed to get database exception errors when uploading dataframe
import requests # package for getting data from the web
from zipfile import * # package for unzipping zip files
import sqlalchemy
import os

# agricultural land

In [48]:
df_land = pd.read_csv('kaggle/Agricultural Land.csv')
df_crops_units = pd.read_csv('kaggle/Crops_Units.csv')
df_groundwater = pd.read_csv('kaggle/Fresh groundwater abstracted.csv')
df_freshwater = pd.read_csv('kaggle/Freshwater abstracted.csv')
df_ghg = pd.read_csv('kaggle/GHG_Emissions_by_Sector.csv')

# agricultural land

In [11]:
df_land.describe()

Unnamed: 0,Unnamed: 8
count,0.0
mean,
std,
min,
25%,
50%,
75%,
max,


In [25]:
df_land.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225 entries, 0 to 224
Data columns (total 9 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Country                225 non-null    object 
 1   agri_area_2013         225 non-null    object 
 2   change_agri_area       225 non-null    object 
 3   total_agri_area        225 non-null    object 
 4   arable_land            225 non-null    object 
 5   perm_crops             225 non-null    object 
 6   perm_meadows_pastures  225 non-null    object 
 7   irrigated_agri_area    225 non-null    object 
 8   Unnamed: 8             0 non-null      float64
dtypes: float64(1), object(8)
memory usage: 15.9+ KB


In [14]:
df_land.shape

(225, 9)

In [63]:
land_column_names = {
    'Agricultural area in 2013 (km2)': 'agri_area_2013',
    '% change of agricultural area since 1990': 'change_agri_area',
    '% of total land area covered by agricultural area in 2013': 'total_agri_area',
    'Arable land in 2013 (km2)': 'arable_land',
    'Permanent crops in 2013 (km2)': 'perm_crops',
    'Permanent meadows and pastures in 2013 (km2)': 'perm_meadows_pastures',
    'Agricultural area actually irrigated in 2013 (km2)': 'irrigated_agri_area'
}

df_land.rename(columns=land_column_names, inplace=True)

In [64]:
df_land

Unnamed: 0,Country,agri_area_2013,change_agri_area,total_agri_area,arable_land,perm_crops,perm_meadows_pastures,irrigated_agri_area,Unnamed: 8
0,Afghanistan,379 100,-0.3,58.1,77 850,1 250,300 000,20 920,
1,Albania,11 873,5.9,43.3,6 171,792,4 910,2 053,
2,Algeria,414 316,7.1,17.4,74 962,9 390,329 964,10 895,
3,American Samoa,49,63.3,24.5,30,19,...,...,
4,Andorra,208,9.5,44.3,29,...,179,...,
...,...,...,...,...,...,...,...,...,...
220,Wallis and Futuna Islands,60,0.0,42.9,10,50,...,...,
221,Western Sahara,50 040,0.0,18.8,40,...,50 000,...,
222,Yemen,235 460,-0.3,44.6,12 480,2 980,220 000,...,
223,Zambia,237 360,14.1,31.9,37 000,360,200 000,...,


In [67]:
df_land.replace('...', np.nan, inplace=True)

# Drop rows with missing values
df_cleaned = df_land.dropna()

# Display the cleaned DataFrame
df_land

Unnamed: 0,Country,agri_area_2013,change_agri_area,total_agri_area,arable_land,perm_crops,perm_meadows_pastures,irrigated_agri_area,Unnamed: 8
0,Afghanistan,379 100,-0.3,58.1,77 850,1 250,300 000,20 920,
1,Albania,11 873,5.9,43.3,6 171,792,4 910,2 053,
2,Algeria,414 316,7.1,17.4,74 962,9 390,329 964,10 895,
3,American Samoa,49,63.3,24.5,30,19,,,
4,Andorra,208,9.5,44.3,29,,179,,
...,...,...,...,...,...,...,...,...,...
220,Wallis and Futuna Islands,60,0.0,42.9,10,50,,,
221,Western Sahara,50 040,0.0,18.8,40,,50 000,,
222,Yemen,235 460,-0.3,44.6,12 480,2 980,220 000,,
223,Zambia,237 360,14.1,31.9,37 000,360,200 000,,


In [68]:
null_values = df_land.isnull()

null_counts = null_values.sum()

null_counts

Country                    0
agri_area_2013             0
change_agri_area          33
total_agri_area            2
arable_land                6
perm_crops                19
perm_meadows_pastures     18
irrigated_agri_area      184
Unnamed: 8               225
dtype: int64

# Crops units description

In [31]:
df_crops_units.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Unit Name    60 non-null     object
 1   Description  60 non-null     object
dtypes: object(2)
memory usage: 1.1+ KB


In [29]:
df_crops_units

Unnamed: 0,Unit Name,Description
0,%,percent
1,% of total LSU,percent of total Livestock Units
2,(LCU/1000 kcal),local currency units per 1000 kilocalories
3,°C,degrees celsius
4,0.1g/An,0.1 grams per animal
5,1000 $,1000 dollars
6,1000 ha,thousand hectares
7,1000 Head,thousand head
8,1000 I$,1000 international dollars
9,1000 No,thousand number


# fresh water abstracted

In [49]:
df_freshwater.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126 entries, 0 to 125
Data columns (total 58 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CountryID        124 non-null    float64
 1   Country          124 non-null    object 
 2   1990             124 non-null    object 
 3   1991             124 non-null    object 
 4   1992             124 non-null    object 
 5   1993             124 non-null    object 
 6   1994             124 non-null    object 
 7   1995             124 non-null    object 
 8   1996             124 non-null    object 
 9   1997             124 non-null    object 
 10  1998             124 non-null    object 
 11  1999             124 non-null    object 
 12  2000             124 non-null    object 
 13  2001             124 non-null    object 
 14  2002             124 non-null    object 
 15  2003             124 non-null    object 
 16  2004             124 non-null    object 
 17  2005            

In [50]:
df_freshwater

Unnamed: 0,CountryID,Country,1990,1991,1992,1993,1994,1995,1996,1997,...,Footnotes\r\n2008,Footnotes\r\n2009,Footnotes\r\n2010,Footnotes\r\n2011,Footnotes\r\n2012,Footnotes\r\n2013,Footnotes\r\n2014,Footnotes\r\n2015,Footnotes\r\n2016,Footnotes\r\n2017
0,8.0,Albania,...,...,...,...,...,...,...,...,...,,,,,,,,,,1.0
1,12.0,Algeria,...,...,...,...,...,...,...,...,...,,,,,,,,,,
2,20.0,Andorra,...,...,...,...,...,...,...,...,...,2,2,2.0,2.0,2.0,2,2.0,2.0,,
3,28.0,Antigua and Barbuda,3.380000114,...,...,...,...,0.970000029,...,...,...,,,,,,,,,,
4,51.0,Armenia,3942,...,...,...,...,2331,2077,2250.699951,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121,887.0,Yemen,...,...,...,...,...,3942,...,...,...,,,,,,,,,,
122,894.0,Zambia,...,...,...,...,...,2431,...,...,...,,,,,,,,,,
123,716.0,Zimbabwe,33107.64063,...,...,...,...,27045.39453,38103.42188,41901.375,...,39,39,39.0,39.0,39.0,39,39.0,39.0,39.0,39.0
124,,,,,,,,,,,...,,,,,,,,,,


In [51]:
columns_to_drop = ['Footnotes\r\n1990',	'Footnotes\r\n1991', 'Footnotes\r\n1992',
                   'Footnotes\r\n1993',	'Footnotes\r\n1994', 'Footnotes\r\n1995',
                   'Footnotes\r\n1996',	'Footnotes\r\n1997','Footnotes\r\n1998',	
                   'Footnotes\r\n1999', 'Footnotes\r\n2000','Footnotes\r\n2001',
                   'Footnotes\r\n2002','Footnotes\r\n2003', 'Footnotes\r\n2004', 
                   'Footnotes\r\n2005',	'Footnotes\r\n2006', 'Footnotes\r\n2007', 
                   'Footnotes\r\n2008',	'Footnotes\r\n2009', 'Footnotes\r\n2010', 
                   'Footnotes\r\n2011', 'Footnotes\r\n2012', 'Footnotes\r\n2013', 
                   'Footnotes\r\n2014', 'Footnotes\r\n2015', 'Footnotes\r\n2016', 
                   'Footnotes\r\n2017'
                    ]
df_freshwater.drop(columns=columns_to_drop, inplace=True)

#df_freshwater = df_freshwater.drop(columns=columns_to_drop)


KeyError: "['Footnotes\\r\\n1990', 'Footnotes\\r\\n1991', 'Footnotes\\r\\n1992', 'Footnotes\\r\\n1993', 'Footnotes\\r\\n1994', 'Footnotes\\r\\n1995', 'Footnotes\\r\\n1996', 'Footnotes\\r\\n1997', 'Footnotes\\r\\n1998', 'Footnotes\\r\\n1999', 'Footnotes\\r\\n2000', 'Footnotes\\r\\n2001', 'Footnotes\\r\\n2002', 'Footnotes\\r\\n2003', 'Footnotes\\r\\n2004', 'Footnotes\\r\\n2005', 'Footnotes\\r\\n2006', 'Footnotes\\r\\n2007', 'Footnotes\\r\\n2008', 'Footnotes\\r\\n2009', 'Footnotes\\r\\n2010', 'Footnotes\\r\\n2011', 'Footnotes\\r\\n2012', 'Footnotes\\r\\n2013', 'Footnotes\\r\\n2014', 'Footnotes\\r\\n2015', 'Footnotes\\r\\n2016', 'Footnotes\\r\\n2017'] not found in axis"

In [52]:
df_freshwater

Unnamed: 0,CountryID,Country,1990,1991,1992,1993,1994,1995,1996,1997,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,8.0,Albania,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,1123,1194,...,1188
1,12.0,Algeria,...,...,...,...,...,...,...,...,...,6950,7150,7203,7515,7800,4642.759766,4819.819824,5512.660156,7800,7730
2,20.0,Andorra,...,...,...,...,...,...,...,...,...,16.39779282,15.4166193,15.82946682,16.04419327,16.58762169,17.15185165,15.44569111,15.66994095,...,...
3,28.0,Antigua and Barbuda,3.380000114,...,...,...,...,0.970000029,...,...,...,3.529999971,3.950000048,3.789999962,3.75999999,3.809999943,2.769999981,1.080000043,1.25,...,...
4,51.0,Armenia,3942,...,...,...,...,2331,2077,2250.699951,...,2873.5,2504.699951,2326.399902,2438.300049,2941.199951,2955.100098,2860.199951,3271.699951,3181.899902,2865.399902
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121,887.0,Yemen,...,...,...,...,...,3942,...,...,...,...,...,5305,...,...,...,...,5911,...,...
122,894.0,Zambia,...,...,...,...,...,2431,...,...,...,...,...,...,...,...,...,...,...,...,...
123,716.0,Zimbabwe,33107.64063,...,...,...,...,27045.39453,38103.42188,41901.375,...,52445.32813,59317.16797,60038.69531,60379.23047,49003.97656,52851.39844,49350.78125,37037.32813,44669.73047,48978.59766
124,,,,,,,,,,,...,,,,,,,,,,


In [69]:
df_freshwater.replace('...', np.nan, inplace=True)

# Drop rows with missing values
df_cleaned = df_freshwater.dropna()

# Display the cleaned DataFrame
df_freshwater

Unnamed: 0,CountryID,Country,1990,1991,1992,1993,1994,1995,1996,1997,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,8.0,Albania,,,,,,,,,...,,,,,,,1123,1194,,1188
1,12.0,Algeria,,,,,,,,,...,6950,7150,7203,7515,7800,4642.759766,4819.819824,5512.660156,7800,7730
2,20.0,Andorra,,,,,,,,,...,16.39779282,15.4166193,15.82946682,16.04419327,16.58762169,17.15185165,15.44569111,15.66994095,,
3,28.0,Antigua and Barbuda,3.380000114,,,,,0.970000029,,,...,3.529999971,3.950000048,3.789999962,3.75999999,3.809999943,2.769999981,1.080000043,1.25,,
4,51.0,Armenia,3942,,,,,2331,2077,2250.699951,...,2873.5,2504.699951,2326.399902,2438.300049,2941.199951,2955.100098,2860.199951,3271.699951,3181.899902,2865.399902
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121,887.0,Yemen,,,,,,3942,,,...,,,5305,,,,,5911,,
122,894.0,Zambia,,,,,,2431,,,...,,,,,,,,,,
123,716.0,Zimbabwe,33107.64063,,,,,27045.39453,38103.42188,41901.375,...,52445.32813,59317.16797,60038.69531,60379.23047,49003.97656,52851.39844,49350.78125,37037.32813,44669.73047,48978.59766
124,,,,,,,,,,,...,,,,,,,,,,


In [70]:
null_values = df_freshwater.isnull()

null_counts = null_values.sum()

null_counts

CountryID      2
Country        2
1990          77
1991         107
1992         111
1993         109
1994         106
1995          68
1996          81
1997          79
1998          77
1999          75
2000          62
2001          63
2002          65
2003          62
2004          54
2005          42
2006          46
2007          43
2008          47
2009          41
2010          36
2011          43
2012          37
2013          43
2014          40
2015          47
2016          66
2017          81
dtype: int64

# fresh groundwater abstracted

In [54]:
df_groundwater.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 115 entries, 0 to 114
Data columns (total 58 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   CountryID       113 non-null    float64
 1   Country         113 non-null    object 
 2   1990            113 non-null    object 
 3   1991            113 non-null    object 
 4   1992            113 non-null    object 
 5   1993            113 non-null    object 
 6   1994            113 non-null    object 
 7   1995            113 non-null    object 
 8   1996            113 non-null    object 
 9   1997            113 non-null    object 
 10  1998            113 non-null    object 
 11  1999            113 non-null    object 
 12  2000            113 non-null    object 
 13  2001            113 non-null    object 
 14  2002            113 non-null    object 
 15  2003            113 non-null    object 
 16  2004            113 non-null    object 
 17  2005            113 non-null    obj

In [55]:
df_groundwater

Unnamed: 0,CountryID,Country,1990,1991,1992,1993,1994,1995,1996,1997,...,Footnotes 2008,Footnotes 2009,Footnotes 2010,Footnotes 2011,Footnotes 2012,Footnotes 2013,Footnotes 2014,Footnotes 2015,Footnotes 2016,Footnotes 2017
0,8.0,Albania,...,...,...,...,...,...,...,...,...,,,,,,,,,,1.0
1,12.0,Algeria,...,...,...,...,...,...,...,...,...,,,,,,2,2.0,2.0,2.0,2.0
2,28.0,Antigua and Barbuda,1.139999986,...,...,...,...,0.550000012,...,...,...,,,,,,,,,,
3,51.0,Armenia,1325.400024,...,...,...,...,851,616,520,...,,,,,,,,,,
4,31.0,Azerbaijan,1706,...,...,...,...,1150,987,1098,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,887.0,Yemen,...,...,...,...,...,2442,...,...,...,,,,,,,,,,
111,894.0,Zambia,...,...,...,...,...,286.5,...,...,...,,,,,,,,,,
112,716.0,Zimbabwe,9270.139648,...,...,...,...,7572.710449,10668.95801,11732.38574,...,,,,,,,,,,
113,,,,,,,,,,,...,,,,,,,,,,


In [56]:
columns_to_drop = ['Footnotes 1990',	'Footnotes 1991', 'Footnotes 1992',
                   'Footnotes 1993',	'Footnotes 1994', 'Footnotes 1995',
                   'Footnotes 1996',	'Footnotes 1997','Footnotes 1998',	
                   'Footnotes 1999', 'Footnotes 2000','Footnotes 2001',
                   'Footnotes 2002','Footnotes 2003', 'Footnotes 2004', 
                   'Footnotes 2005',	'Footnotes 2006', 'Footnotes 2007', 
                   'Footnotes 2008',	'Footnotes 2009', 'Footnotes 2010', 
                   'Footnotes 2011', 'Footnotes 2012', 'Footnotes 2013', 
                   'Footnotes 2014', 'Footnotes 2015', 'Footnotes 2016', 
                   'Footnotes 2017'
                    ]
df_groundwater.drop(columns=columns_to_drop, inplace=True)




In [57]:
df_groundwater

Unnamed: 0,CountryID,Country,1990,1991,1992,1993,1994,1995,1996,1997,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,8.0,Albania,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,235,262,...,107
1,12.0,Algeria,...,...,...,...,...,...,...,...,...,4150,4200,2603,2915,3000,3046.909912,3117.449951,3654.939941,3900,3860
2,28.0,Antigua and Barbuda,1.139999986,...,...,...,...,0.550000012,...,...,...,1.399999976,1.600000024,1.600000024,1.200000048,1.200000048,0.720000029,0.680000007,0.74000001,...,...
3,51.0,Armenia,1325.400024,...,...,...,...,851,616,520,...,852,854.4000244,875.7999878,1002.799988,1314.400024,1348.699951,1312,1304.400024,1136.300049,1154.5
4,31.0,Azerbaijan,1706,...,...,...,...,1150,987,1098,...,1127,1262,1272,1506,1696,1738,1819,2016,2075,2066
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,887.0,Yemen,...,...,...,...,...,2442,...,...,...,...,...,3805,...,...,...,...,4411,...,...
111,894.0,Zambia,...,...,...,...,...,286.5,...,...,...,...,...,...,...,...,...,...,...,...,...
112,716.0,Zimbabwe,9270.139648,...,...,...,...,7572.710449,10668.95801,11732.38574,...,14684.69141,16608.80664,16810.83594,16906.18359,13721.11328,14798.3916,13818.21973,10370.45215,12507.52441,13714.00684
113,,,,,,,,,,,...,,,,,,,,,,


In [71]:
df_groundwater.replace('...', np.nan, inplace=True)

# Drop rows with missing values
df_cleaned = df_groundwater.dropna()

# Display the cleaned DataFrame
df_groundwater

Unnamed: 0,CountryID,Country,1990,1991,1992,1993,1994,1995,1996,1997,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,8.0,Albania,,,,,,,,,...,,,,,,,235,262,,107
1,12.0,Algeria,,,,,,,,,...,4150,4200,2603,2915,3000,3046.909912,3117.449951,3654.939941,3900,3860
2,28.0,Antigua and Barbuda,1.139999986,,,,,0.550000012,,,...,1.399999976,1.600000024,1.600000024,1.200000048,1.200000048,0.720000029,0.680000007,0.74000001,,
3,51.0,Armenia,1325.400024,,,,,851,616,520,...,852,854.4000244,875.7999878,1002.799988,1314.400024,1348.699951,1312,1304.400024,1136.300049,1154.5
4,31.0,Azerbaijan,1706,,,,,1150,987,1098,...,1127,1262,1272,1506,1696,1738,1819,2016,2075,2066
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,887.0,Yemen,,,,,,2442,,,...,,,3805,,,,,4411,,
111,894.0,Zambia,,,,,,286.5,,,...,,,,,,,,,,
112,716.0,Zimbabwe,9270.139648,,,,,7572.710449,10668.95801,11732.38574,...,14684.69141,16608.80664,16810.83594,16906.18359,13721.11328,14798.3916,13818.21973,10370.45215,12507.52441,13714.00684
113,,,,,,,,,,,...,,,,,,,,,,


In [72]:
null_values = df_groundwater.isnull()

null_counts = null_values.sum()

null_counts

CountryID      2
Country        2
1990          76
1991          98
1992         100
1993          98
1994          98
1995          69
1996          77
1997          79
1998          76
1999          71
2000          60
2001          58
2002          61
2003          60
2004          50
2005          44
2006          46
2007          39
2008          43
2009          39
2010          35
2011          37
2012          33
2013          37
2014          33
2015          38
2016          58
2017          69
dtype: int64

# GHG emissions by sector

In [59]:
df_ghg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 192 entries, 0 to 191
Data columns (total 14 columns):
 #   Column                                                                         Non-Null Count  Dtype 
---  ------                                                                         --------------  ----- 
 0   Country ID                                                                     192 non-null    int64 
 1   Country                                                                        192 non-null    object
 2   Latest Year Available                                                          192 non-null    int64 
 3   Total GHG emissions without LULUCF (1000 tonnes of CO₂ equivalent)             192 non-null    object
 4   GHG from energy (1000 tonnes of CO₂ equivalent)                                192 non-null    object
 5   GHG from energy, as percentage to total                                        192 non-null    object
 6   GHG from energy, of which: from tr

In [78]:
ghg_column_names = {
    'Total GHG emissions without LULUCF (1000 tonnes of CO₂ equivalent)': 'ghg_total',
    'GHG from energy (1000 tonnes of CO₂ equivalent)': 'ghg_energy',
    'GHG from energy, as percentage to total': 'ghg_energy_perc',
    'GHG from energy, of which: from transport (1000 tonnes of CO₂ equivalent)': 'ghg_transport',
    'GHG from energy, of which: from transport, as percentage to total': 'ghg_transport_perc',
    'GHG from industrial processes and product use (1000 tonnes of CO₂ equivalent)': 'ghg_industrial',
    'GHG from industrial processes and product use, as percentage to total': 'ghg_industrial_perc',
    'GHG from agriculture (1000 tonnes of CO₂ equivalent)': 'ghg_agri',
    'GHG from agriculture, as percentage to total': 'ghg_agri_perc',
    'GHG from waste (1000 tonnes of CO₂ equivalent)' : 'ghg_waste',
    'GHG from waste, as percentage to total' : 'ghg_waste_perc'
}

df_ghg.rename(columns=ghg_column_names, inplace=True)

df_ghg

Unnamed: 0,Country ID,Country,Latest Year Available,ghg_total,ghg_energy,ghg_energy_perc,ghg_transport,ghg_transport_perc,ghg_industrial,ghg_industrial_perc,ghg_agri,ghg_agri_perc,ghg_waste,ghg_waste_perc
0,4,Afghanistan,2013,43377.00,10281.00,23.7,5217.00,12.0,212.00,0.5,32744.00,75.5,140.00,0.3
1,8,Albania,2009,8125.70,4466.04,55.0,2306.46,28.4,1701.12,20.9,1130.86,13.9,827.68,10.2
2,12,Algeria,2000,111022.59,87595.60,78.9,12789.98,11.5,5463.83,4.9,6534.62,5.9,11428.54,10.3
3,24,Angola,2005,61610.76,37732.06,61.2,,,352.00,0.6,22575.40,36.6,951.30,1.5
4,28,Antigua and Barbuda,2000,597.75,372.72,62.4,182.63,30.6,,,104.33,17.5,120.70,20.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,862,Venezuela (Bolivarian Republic of),1999,192192.24,143561.20,74.7,33991.00,17.7,9206.20,4.8,32959.70,17.1,6465.14,3.4
188,704,Viet Nam,2013,278441.86,147703.33,53.0,29680.72,10.7,31767.39,11.4,81166.04,29.2,17805.10,6.4
189,887,Yemen,2012,37942.87,23549.08,62.1,5739.01,15.1,1398.00,3.7,10879.79,28.7,2116.00,5.6
190,894,Zambia,2000,14404.70,2628.21,18.2,584.63,4.1,1005.53,7.0,10359.37,71.9,411.59,2.9


In [79]:
df_ghg.replace('...', np.nan, inplace=True)

# Drop rows with missing values
df_cleaned = df_ghg.dropna()

# Display the cleaned DataFrame
df_ghg

Unnamed: 0,Country ID,Country,Latest Year Available,ghg_total,ghg_energy,ghg_energy_perc,ghg_transport,ghg_transport_perc,ghg_industrial,ghg_industrial_perc,ghg_agri,ghg_agri_perc,ghg_waste,ghg_waste_perc
0,4,Afghanistan,2013,43377.00,10281.00,23.7,5217.00,12.0,212.00,0.5,32744.00,75.5,140.00,0.3
1,8,Albania,2009,8125.70,4466.04,55.0,2306.46,28.4,1701.12,20.9,1130.86,13.9,827.68,10.2
2,12,Algeria,2000,111022.59,87595.60,78.9,12789.98,11.5,5463.83,4.9,6534.62,5.9,11428.54,10.3
3,24,Angola,2005,61610.76,37732.06,61.2,,,352.00,0.6,22575.40,36.6,951.30,1.5
4,28,Antigua and Barbuda,2000,597.75,372.72,62.4,182.63,30.6,,,104.33,17.5,120.70,20.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,862,Venezuela (Bolivarian Republic of),1999,192192.24,143561.20,74.7,33991.00,17.7,9206.20,4.8,32959.70,17.1,6465.14,3.4
188,704,Viet Nam,2013,278441.86,147703.33,53.0,29680.72,10.7,31767.39,11.4,81166.04,29.2,17805.10,6.4
189,887,Yemen,2012,37942.87,23549.08,62.1,5739.01,15.1,1398.00,3.7,10879.79,28.7,2116.00,5.6
190,894,Zambia,2000,14404.70,2628.21,18.2,584.63,4.1,1005.53,7.0,10359.37,71.9,411.59,2.9


In [80]:
null_values = df_ghg.isnull()

null_counts = null_values.sum()

null_counts

Country ID                0
Country                   0
Latest Year Available     0
ghg_total                 0
ghg_energy                1
ghg_energy_perc           1
ghg_transport            12
ghg_transport_perc       10
ghg_industrial           31
ghg_industrial_perc      30
ghg_agri                  6
ghg_agri_perc             6
ghg_waste                 3
ghg_waste_perc            3
dtype: int64