In [1]:
#set the columns name to lower case, strip and replace empty spaces
def column_names(df_name):
    new_column_names = [column.strip().lower().replace(" ", "_") for column in df_name.columns]
    df_name.columns = new_column_names

#change the name of a column
def change_column_name(df, old_name, new_name):
    df.rename(columns={old_name: new_name}, inplace=True)
    return df 

#drop one column
def drop_column(df, column):
    df = df.drop(column,axis='columns')
    return df
#drop one row by the index
def drop_row_index(df, df_index):
    df = df.drop(index=df_index)
    return df
#change to numeric 
def to_numeric(df, columns_to_proces):
    for column in columns_to_proces: 
        df[column] = pd.to_numeric(df[column], errors='coerce')

# **Data merge**

In [2]:
# We have two cleaned data sets, one about the population and CO2 
# emission and one with the temperature, all of them from 1999 to 
# 2019. We will reshaped them and check all the values are ready 
# to be analyzed.

## Import all the libraries we will use

In [3]:
import pandas as pd
import numpy as np

## Import the data sets

In [4]:

df1= pd.read_csv('../Data/Clean Data/1. co2_population_cleaned.csv')
df2= pd.read_csv('../Data/Clean Data/2. Temperature_cleaned.csv')

## Chekout the two data sets

In [5]:
#df1: Population and CO2 emissions

df1

Unnamed: 0,country,year,population,co2_emission
0,Aruba,1999,86867.0,810.407000
1,Africa Eastern and Southern,1999,391486231.0,351070.000000
2,Afghanistan,1999,19262847.0,810.000000
3,Africa Western and Central,1999,262397030.0,133013.187487
4,Angola,1999,15870753.0,17610.000000
...,...,...,...,...
5265,Samoa,2019,211905.0,300.000012
5266,"Yemen, Rep.",2019,31546691.0,11100.000381
5267,South Africa,2019,58087055.0,439640.014648
5268,Zambia,2019,18380477.0,6800.000191


In [6]:
# df2: Earth temperature dataset
df2

Unnamed: 0,country,year,temperature_change
0,Afghanistan,1999,1.301
1,Albania,1999,0.722
2,Algeria,1999,1.456
3,American Samoa,1999,0.243
4,Andorra,1999,1.020
...,...,...,...
4741,Palestine,2019,1.298
4742,Western Sahara,2019,1.430
4743,World,2019,1.449
4744,Zambia,2019,1.295


## Standardize the country names: Ensure consistency in country names.

## Check the name of the countries

In [7]:
country_name_mapping = {
 'Afghanistan': 'Afghanistan',
 'Afghanistan, Islamic Rep. of': 'Afghanistan',
 'Africa Eastern and Southern': 'Africa Eastern',
 'Africa Western and Central': 'Africa Western',
 'Albania': 'Albania',
 'Algeria': 'Algeria',
 'American Samoa': 'American Samoa',
 'Andorra': 'Andorra',
 'Andorra, Principality of': 'Andorra',
 'Angola': 'Angola',
 'Anguilla': 'Anguilla',
 'Antigua and Barbuda': 'Antigua and Barbuda',
 'Arab World': 'Arab World',
 'Argentina': 'Argentina',
 'Armenia': 'Armenia',
 'Armenia, Rep. of': 'Armenia',
 'Aruba': 'Aruba',
 'Aruba, Kingdom of the Netherlands': 'Aruba',
 'Australia': 'Australia',
 'Austria': 'Austria',
 'Azerbaijan': 'Azerbaijan',
 'Azerbaijan, Rep. of': 'Azerbaijan',
 'Bahamas, The': 'Bahamas',
 'Bahrain': 'Bahrain',
 'Bahrain, Kingdom of': 'Bahrain',
 'Bangladesh': 'Bangladesh',
 'Barbados': 'Barbados',
 'Belarus': 'Belarus',
 'Belarus, Rep. of': 'Belarus',
 'Belgium': 'Belgium',
 'Belize': 'Belize',
 'Benin': 'Benin',
 'Bermuda': 'Bermuda',
 'Bhutan': 'Bhutan',
 'Bolivia': 'Bolivia',
 'Bosnia and Herzegovina': 'Bosnia and Herzegovina',
 'Botswana': 'Botswana',
 'Brazil': 'Brazil',
 'British Virgin Islands': 'British Virgin Islands',
 'Brunei Darussalam': 'Brunei Darussalam',
 'Bulgaria': 'Bulgaria',
 'Burkina Faso': 'Burkina Faso',
 'Burundi': 'Burundi',
 'Cabo Verde': 'Cabo Verde',
 'Cambodia': 'Cambodia',
 'Cameroon': 'Cameroon',
 'Canada': 'Canada',
 'Caribbean small states': 'Caribbean small states',
 'Cayman Islands': 'Cayman Islands',
 'Central African Rep.': 'Central African Rep.',
 'Central African Republic': 'Central African Rep.',
 'Central Europe and the Baltics': 'Central Europe and the Baltics',
 'Chad': 'Chad',
 'Chile': 'Chile',
 'China': 'China',
 'China, P.R.: Hong Kong':  'China',
 'China, P.R.: Macao':  'China',
 'China, P.R.: Mainland':  'China',
 'Colombia': 'Colombia',
 'Comoros': 'Comoros',
 'Comoros, Union of the': 'Comoros, Union of the',
 'Congo, Dem. Rep.': 'Congo, Dem. Rep.',
 'Congo, Dem. Rep. of the': 'Congo, Dem. Rep.',
 'Congo, Rep.': 'Congo, Dem. Rep.',
 'Congo, Rep. of': 'Congo, Dem. Rep.',
 'Cook Islands': 'Cook Islands',
 'Costa Rica': 'Costa Rica',
 "Cote d'Ivoire": "Cote d'Ivoire",
 'Croatia': 'Croatia',
 'Croatia, Rep. of': 'Croatia',
 'Cuba': 'Cuba',
 'Curacao': 'Curacao',
 'Cyprus': 'Cyprus',
 'Czech Rep.': 'Czech Rep.',
 'Czechia': 'Czechia',
 'Denmark': 'Denmark',
 'Djibouti': 'Djibouti',
 'Dominica': 'Dominica',
 'Dominican Rep.': 'Dominican Rep.',
 'Dominican Republic': 'Dominican Rep.',
 'Early-demographic dividend': 'Early-demographic dividend',
 'East Asia & Pacific': 'East Asia & Pacific',
 'East Asia & Pacific (IDA & IBRD countries)': 'East Asia & Pacific (IDA & IBRD countries)',
 'East Asia & Pacific (excluding high income)': 'East Asia & Pacific (excluding high income)',
 'Ecuador': 'Ecuador',
 'Egypt, Arab Rep.': 'Egypt, Arab Rep.',
 'Egypt, Arab Rep. of': 'Egypt, Arab Rep.',
 'El Salvador': 'El Salvador',
 'Equatorial Guinea': 'Equatorial Guinea',
 'Equatorial Guinea, Rep. of': 'Equatorial Guinea',
 'Eritrea': 'Eritrea',
 'Eritrea, The State of': 'Eritrea',
 'Estonia': 'Estonia',
 'Estonia, Rep. of': 'Estonia',
 'Eswatini': 'Eswatini',
 'Eswatini, Kingdom of': 'Eswatini',
 'Ethiopia': 'Ethiopia',
 'Ethiopia, The Federal Dem. Rep. of': 'Ethiopia',
 'Euro area': 'Euro area',
 'Europe & Central Asia': 'Europe & Central Asia',
 'Europe & Central Asia (IDA & IBRD countries)': 'Europe & Central Asia (IDA & IBRD countries)',
 'Europe & Central Asia (excluding high income)': 'Europe & Central Asia (excluding high income)',
 'European Union': 'European Union',
 'Falkland Islands (Malvinas)': 'Falkland Islands (Malvinas)',
 'Faroe Islands': 'Faroe Islands',
 'Fiji': 'Fiji',
 'Fiji, Rep. of': 'Fiji',
 'Finland': 'Finland',
 'Fragile and conflict affected situations': 'Fragile and conflict affected situations',
 'France': 'France',
 'French Polynesia': 'French Polynesia',
 'Gabon': 'Gabon',
 'Gambia, The': 'Gambia, The',
 'Georgia': 'Georgia',
 'Germany': 'Germany',
 'Ghana': 'Ghana',
 'Gibraltar': 'Gibraltar',
 'Greece': 'Greece',
 'Greenland': 'Greenland',
 'Grenada': 'Grenada',
 'Guadeloupe': 'Guadeloupe',
 'Guatemala': 'Guatemala',
 'Guiana, French': 'Guiana, French',
 'Guinea': 'Guinea',
 'Guinea-Bissau': 'Guinea-Bissau',
 'Guyana': 'Guyana',
 'Haiti': 'Haiti',
 'Heavily indebted poor countries (HIPC)': 'Heavily indebted poor countries (HIPC)',
 'High income': 'High income',
 'Holy See': 'Holy See',
 'Honduras': 'Honduras',
 'Hong Kong SAR, China': 'China',
 'Hungary': 'Hungary',
 'IBRD only': 'IBRD only',
 'IDA & IBRD total': 'IDA & IBRD total',
 'IDA blend': 'IDA blend',
 'IDA only': 'IDA only',
 'IDA total': 'IDA total',
 'Iceland': 'Iceland',
 'India': 'India',
 'Indonesia': 'Indonesia',
 'Iran, Islamic Rep.': 'Iran, Islamic Rep.',
 'Iran, Islamic Rep. of': 'Iran, Islamic Rep.',
 'Iraq': 'Iraq',
 'Ireland': 'Ireland',
 'Isle of Man': 'Isle of Man',
 'Israel': 'Israel',
 'Italy': 'Italy',
 'Jamaica': 'Jamaica',
 'Japan': 'Japan',
 'Jordan': 'Jordan',
 'Kazakhstan': 'Kazakhstan',
 'Kazakhstan, Rep. of': 'Kazakhstan',
 'Kenya': 'Kenya',
 'Kiribati': 'Kiribati',
 "Korea, Dem. People's Rep.": "Korea, Dem. People's Rep.",
 "Korea, Dem. People's Rep. of": "Korea, Dem. People's Rep.",
 'Korea, Rep.': 'Korea, Rep.',
 'Korea, Rep. of': 'Korea, Rep.',
 'Kosovo': 'Kosovo',
 'Kuwait': 'Kuwait',
 'Kyrgyz Rep.': 'Kyrgyz Rep.',
 'Kyrgyz Republic': 'Kyrgyz Rep.',
 'Lao PDR': 'Lao PDR',
 "Lao People's Dem. Rep.": 'Lao PDR',
 'Late-demographic dividend': 'Late-demographic dividend',
 'Latin America & Caribbean': 'Latin America & Caribbean',
 'Latin America & Caribbean (excluding high income)': 'Latin America & Caribbean (excluding high income)',
 'Latin America & the Caribbean (IDA & IBRD countries)': 'Latin America & the Caribbean (IDA & IBRD countries)',
 'Latvia': 'Latvia',
 'Least developed countries: UN classification': 'Least developed countries: UN classification',
 'Lebanon': 'Lebanon',
 'Lesotho': 'Lesotho',
 'Lesotho, Kingdom of': 'Lesotho',
 'Liberia': 'Liberia',
 'Libya': 'Libya',
 'Liechtenstein': 'Liechtenstein',
 'Lithuania': 'Lithuania',
 'Low & middle income': 'Low & middle income',
 'Low income': 'Low income',
 'Lower middle income': 'Lower middle income',
 'Luxembourg': 'Luxembourg',
 'Macao SAR, China': 'China',
 'Madagascar': 'Madagascar',
 'Madagascar, Rep. of': 'Madagascar',
 'Malawi': 'Malawi',
 'Malaysia': 'Malaysia',
 'Maldives': 'Maldives',
 'Mali': 'Mali',
 'Malta': 'Malta',
 'Marshall Islands': 'Marshall Islands',
 'Marshall Islands, Rep. of the': 'Marshall Islands',
 'Martinique': 'Martinique',
 'Mauritania': 'Mauritania',
 'Mauritania, Islamic Rep. of': 'Mauritania',
 'Mauritius': 'Mauritius',
 'Mayotte': 'Mayotte',
 'Mexico': 'Mexico',
 'Micronesia, Fed. Sts.': 'Micronesia, Fed. Sts.',
 'Micronesia, Federated States of': 'Micronesia, Fed. Sts.',
 'Middle East & North Africa': 'Middle East & North Africa',
 'Middle East & North Africa (IDA & IBRD countries)': 'Middle East & North Africa (IDA & IBRD countries)',
 'Middle East & North Africa (excluding high income)': 'Middle East & North Africa (excluding high income)',
 'Middle income': 'Middle income',
 'Moldova': 'Moldova, Rep.',
 'Moldova, Rep. of': 'Moldova, Rep.',
 'Monaco': 'Monaco',
 'Mongolia': 'Mongolia',
 'Montenegro': 'Montenegro',
 'Montserrat': 'Montserrat',
 'Morocco': 'Morocco',
 'Mozambique': 'Mozambique, Rep.',
 'Mozambique, Rep. of': 'Mozambique, Rep.',
 'Myanmar': 'Myanmar',
 'Namibia': 'Namibia',
 'Nauru': 'Nauru, Rep.',
 'Nauru, Rep. of': 'Nauru, Rep.',
 'Nepal': 'Nepal',
 'Netherlands': 'Netherlands',
 'Netherlands, The': 'Netherlands',
 'New Caledonia': 'New Caledonia',
 'New Zealand': 'New Zealand',
 'Nicaragua': 'Nicaragua',
 'Niger': 'Niger',
 'Nigeria': 'Niger',
 'Niue': 'Niue',
 'Norfolk Island': 'Norfolk Island',
 'North America': 'North America',
 'North Macedonia': 'North Macedonia',
 'North Macedonia, Republic of ': 'North Macedonia',
 'Norway': 'Norway',
 'OECD members': 'OECD members',
 'Oman': 'Oman',
 'Other small states': 'Other small states',
 'Pacific island small states': 'Pacific island small states',
 'Pakistan': 'Pakistan',
 'Palau': 'Palau',
 'Palau, Rep. of': 'Palau',
 'Panama': 'Panama',
 'Papua New Guinea': 'Papua New Guinea',
 'Paraguay': 'Paraguay',
 'Peru': 'Peru',
 'Philippines': 'Philippines',
 'Pitcairn Islands': 'Pitcairn Islands',
 'Poland': 'Poland',
 'Poland, Rep. of': 'Poland',
 'Portugal': 'Portugal',
 'Post-demographic dividend': 'Post-demographic dividend',
 'Pre-demographic dividend': 'Pre-demographic dividend',
 'Puerto Rico': 'Puerto Rico',
 'Qatar': 'Qatar',
 'Romania': 'Romania',
 'Russian Federation': 'Russian Federation',
 'Rwanda': 'Rwanda',
 'Saint Helena': 'Saint Helena',
 'Saint Pierre and Miquelon': 'Saint Pierre and Miquelon',
 'Samoa': 'Samoa',
 'San Marino, Rep. of': 'San Marino, Rep. of',
 'Sao Tome and Principe': 'Sao Tome and Principe',
 'Saudi Arabia': 'Saudi Arabia',
 'Senegal': 'Senegal',
 'Serbia': 'Serbia',
 'Serbia, Rep. of': 'Serbia',
 'Seychelles': 'Seychelles',
 'Sierra Leone': 'Sierra Leone',
 'Singapore': 'Singapore',
 'Sint Maarten (Dutch part)': 'Sint Maarten (Dutch part)',
 'Slovak Rep.': 'Slovak Rep.',
 'Slovak Republic': 'Slovak Rep.',
 'Slovenia': 'Slovenia',
 'Slovenia, Rep. of': 'Slovenia',
 'Small states': 'Small states',
 'Solomon Islands': 'Solomon Islands',
 'Somalia': 'Somalia',
 'South Africa': 'South Africa',
 'South Asia': 'South Asia',
 'South Asia (IDA & IBRD)': 'South Asia (IDA & IBRD)',
 'South Sudan': 'South Sudan',
 'South Sudan, Rep. of': 'South Sudan',
 'Spain': 'Spain',
 'Sri Lanka': 'Sri Lanka',
 'St. Kitts and Nevis': 'St. Kitts and Nevis',
 'St. Lucia': 'St. Lucia',
 'St. Vincent and the Grenadines': 'St. Vincent and the Grenadines',
 'Sub-Saharan Africa': 'Sub-Saharan Africa',
 'Sub-Saharan Africa (IDA & IBRD countries)': 'Sub-Saharan Africa (IDA & IBRD countries)',
 'Sub-Saharan Africa (excluding high income)': 'Sub-Saharan Africa (excluding high income)',
 'Sudan': 'Sudan',
 'Suriname': 'Suriname',
 'Sweden': 'Sweden',
 'Switzerland': 'Switzerland',
 'Syrian Arab Rep.': 'Syrian Arab Rep.',
 'Syrian Arab Republic': 'Syrian Arab Rep.',
 'São Tomé and Príncipe, Dem. Rep. of': 'São Tomé and Príncipe, Dem. Rep. of',
 'Taiwan Province of China': 'Taiwan Province of China',
 'Tajikistan': 'Tajikistan',
 'Tajikistan, Rep. of': 'Tajikistan',
 'Tanzania': 'Tanzania',
 'Tanzania, United Rep. of': 'Tanzania',
 'Thailand': 'Thailand',
 'Timor-Leste': 'Timor-Leste',
 'Timor-Leste, Dem. Rep. of': 'Timor-Leste',
 'Togo': 'Togo',
 'Tokelau': 'Tokelau',
 'Tonga': 'Tonga',
 'Trinidad and Tobago': 'Trinidad and Tobago',
 'Tunisia': 'Tunisia',
 'Turkey': 'Turkey',
 'Turkiye': 'Turkey',
 'Turkmenistan': 'Turkmenistan',
 'Turks and Caicos Islands': 'Turks and Caicos Islands',
 'Tuvalu': 'Tuvalu',
 'Uganda': 'Uganda',
 'Ukraine': 'Ukraine',
 'United Arab Emirates': 'United Arab Emirates',
 'United Kingdom': 'United Kingdom',
 'United States': 'United States',
 'United States Virgin Islands': 'United States Virgin Islands',
 'Upper middle income': 'Upper middle income',
 'Uruguay': 'Uruguay',
 'Uzbekistan': 'Uzbekistan',
 'Uzbekistan, Rep. of': 'Uzbekistan',
 'Vanuatu': 'Vanuatu',
 'Venezuela, RB': 'Venezuela',
 'Venezuela, Rep. Bolivariana de': 'Venezuela',
 'Vietnam': 'Vietnam',
 'Wallis and Futuna Islands': 'Wallis and Futuna Islands',
 'West Bank and Gaza': 'Palestine',
 'Western Sahara': 'Western Sahara',
 'World': 'World',
 'Yemen, Rep.': 'Yemen, Rep.',
 'Yemen, Rep. of': 'Yemen, Rep.',
 'Zambia': 'Zambia',
 'Zimbabwe': 'Zimbabwe'}

In [30]:
df1['country'] = df1['country'].replace(country_name_mapping)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['country'] = df1['country'].replace(country_name_mapping)


In [31]:
df2['country'] = df2['country'].replace(country_name_mapping)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['country'] = df2['country'].replace(country_name_mapping)


In [11]:
len(df1['country'])

5270

In [12]:
len(df2['country'])

4746

In [13]:
#droping duplicates
df1.drop_duplicates(subset=['country', 'year'], keep='first', inplace=True)
df2.drop_duplicates(subset=['country', 'year'], keep='first', inplace=True)

# Merge both DataFrames to a new dataset called df3 

In [33]:
df2.shape

(4725, 3)

In [34]:
df1.shape

(5249, 4)

In [32]:
df3 = pd.merge(df1, df2, on=['country', 'year'], how='inner')
df3

Unnamed: 0,country,year,population,co2_emission,temperature_change
0,Aruba,1999,86867.0,810.407000,0.216
1,Afghanistan,1999,19262847.0,810.000000,1.301
2,Angola,1999,15870753.0,17610.000000,0.591
3,Albania,1999,3108778.0,2970.000000,0.722
4,Andorra,1999,65655.0,510.000000,1.020
...,...,...,...,...,...
4294,Vanuatu,2019,304404.0,209.999993,0.803
4295,Samoa,2019,211905.0,300.000012,0.828
4296,South Africa,2019,58087055.0,439640.014648,1.752
4297,Zambia,2019,18380477.0,6800.000191,1.295


Drop world

In [35]:
df2 = df2[~df2['country'].isin(['World'])]
df1 = df1[~df1['country'].isin(['World'])]
df3.country.tail(20)

4279                       Timor-Leste
4280                             Tonga
4281               Trinidad and Tobago
4282                           Tunisia
4283                            Turkey
4284                            Tuvalu
4285                          Tanzania
4286                            Uganda
4287                           Ukraine
4288                           Uruguay
4289                     United States
4290                        Uzbekistan
4291    St. Vincent and the Grenadines
4292                         Venezuela
4293                           Vietnam
4294                           Vanuatu
4295                             Samoa
4296                      South Africa
4297                            Zambia
4298                          Zimbabwe
Name: country, dtype: object

## Recheck the data

### Check the data types 

In [36]:
df3.dtypes

country                object
year                    int64
population            float64
co2_emission          float64
temperature_change    float64
dtype: object

In [37]:
len(df3)

4299

In [40]:
#check for duplicates
df3.duplicated().any()

True

In [41]:
# Remove rows with NaN values
df3.dropna(inplace=True)

In [43]:
# Remove rows where the country name includes the word "world"
df3 = df3[~df3['country'].str.contains(r'world', case=False, regex=True)]

## Setup Data Frame

### Categorize countries: Add a column to categorize countries into Global North and Global South.

In [44]:
# Categorize countries into Global North and Global South

global_north = [
    'United States', 'Canada', 'United Kingdom', 'Germany', 'France', 
    'Italy', 'Japan', 'Australia', 'New Zealand', 'Belgium', 'Netherlands',
    'Sweden', 'Norway', 'Denmark', 'Finland', 'Switzerland', 'Austria', 
    'Ireland', 'Luxembourg'
]

df3['region'] = np.where(df3['country'].isin(global_north), 'Global_North', 'Global_South')

In [45]:
# Categorize countries into  industrial and not industrial countries 
industrial_countries = [
    "United States",
    "Canada",
    "United Kingdom",
    "Germany",
    "France",
    "Italy",
    "Spain",
    "Netherlands",
    "Sweden",
    "Switzerland",
    "Austria",
    "Belgium",
    "Denmark",
    "Norway",
    "Finland",
    "Japan",
    "South Korea",
    "Australia",
    "New Zealand",
    "Ireland",
    "Singapore",
    "Luxembourg",
    "Hong Kong",
    "Israel",
    "Russia",
    "China",
    "Taiwan",
    "Portugal",
    "Greece",
    "Iceland",
    "Czech Republic",
    "Poland",
    "Hungary",
    "Slovakia",
    "Slovenia",
    "Estonia",
    "Lithuania",
    "Latvia",
    "Turkey",
    "United Arab Emirates"
]


df3['industrial_level'] = np.where(df3['country'].isin(industrial_countries), 'industrial', 'non_industrial')

In [46]:
# Create a Decade Column
df3['decade'] = (df3['year'] // 10) * 10

# Save the code

In [47]:
# save the cleaned data to a new CSV file
df3.to_csv('../Data/Clean Data/3. Complete_Data_Merged.csv', index=False)

In [48]:
df3

Unnamed: 0,country,year,population,co2_emission,temperature_change,region,industrial_level,decade
0,Aruba,1999,86867.0,810.407000,0.216,Global_South,non_industrial,1990
1,Afghanistan,1999,19262847.0,810.000000,1.301,Global_South,non_industrial,1990
2,Angola,1999,15870753.0,17610.000000,0.591,Global_South,non_industrial,1990
3,Albania,1999,3108778.0,2970.000000,0.722,Global_South,non_industrial,1990
4,Andorra,1999,65655.0,510.000000,1.020,Global_South,non_industrial,1990
...,...,...,...,...,...,...,...,...
4294,Vanuatu,2019,304404.0,209.999993,0.803,Global_South,non_industrial,2010
4295,Samoa,2019,211905.0,300.000012,0.828,Global_South,non_industrial,2010
4296,South Africa,2019,58087055.0,439640.014648,1.752,Global_South,non_industrial,2010
4297,Zambia,2019,18380477.0,6800.000191,1.295,Global_South,non_industrial,2010


In [49]:
#Define the mapping of countries to continents
country_to_continent = {
    'Afghanistan': 'Asia',
    'Albania': 'Europe',
    'Algeria': 'Africa',
    'Andorra': 'Europe',
    'Angola': 'Africa',
    'Antigua and Barbuda': 'North America',
    'Argentina': 'South America',
    'Armenia': 'Asia',
    'Australia': 'Australia',
    'Austria': 'Europe',
    'Azerbaijan': 'Asia',
    'Bahamas': 'North America',
    'Bahrain': 'Asia',
    'Bangladesh': 'Asia',
    'Barbados': 'North America',
    'Belarus': 'Europe',
    'Belgium': 'Europe',
    'Belize': 'North America',
    'Benin': 'Africa',
    'Bhutan': 'Asia',
    'Bolivia': 'South America',
    'Bosnia and Herzegovina': 'Europe',
    'Botswana': 'Africa',
    'Brazil': 'South America',
    'Brunei': 'Asia',
    'Bulgaria': 'Europe',
    'Burkina Faso': 'Africa',
    'Burundi': 'Africa',
    'Cabo Verde': 'Africa',
    'Cambodia': 'Asia',
    'Cameroon': 'Africa',
    'Canada': 'North America',
    'Central African Republic': 'Africa',
    'Chad': 'Africa',
    'Chile': 'South America',
    'China': 'Asia',
    'Colombia': 'South America',
    'Comoros': 'Africa',
    'Congo': 'Africa',
    'Costa Rica': 'North America',
    'Croatia': 'Europe',
    'Cuba': 'North America',
    'Cyprus': 'Asia',
    'Czech Republic': 'Europe',
    'Denmark': 'Europe',
    'Djibouti': 'Africa',
    'Dominica': 'North America',
    'Dominican Republic': 'North America',
    'Ecuador': 'South America',
    'Egypt': 'Africa',
    'El Salvador': 'North America',
    'Equatorial Guinea': 'Africa',
    'Eritrea': 'Africa',
    'Estonia': 'Europe',
    'Eswatini': 'Africa',
    'Ethiopia': 'Africa',
    'Fiji': 'Oceania',
    'Finland': 'Europe',
    'France': 'Europe',
    'Gabon': 'Africa',
    'Gambia': 'Africa',
    'Georgia': 'Asia',
    'Germany': 'Europe',
    'Ghana': 'Africa',
    'Greece': 'Europe',
    'Grenada': 'North America',
    'Guatemala': 'North America',
    'Guinea': 'Africa',
    'Guinea-Bissau': 'Africa',
    'Guyana': 'South America',
    'Haiti': 'North America',
    'Honduras': 'North America',
    'Hungary': 'Europe',
    'Iceland': 'Europe',
    'India': 'Asia',
    'Indonesia': 'Asia',
    'Iran': 'Asia',
    'Iraq': 'Asia',
    'Ireland': 'Europe',
    'Israel': 'Asia',
    'Italy': 'Europe',
    'Jamaica': 'North America',
    'Japan': 'Asia',
    'Jordan': 'Asia',
    'Kazakhstan': 'Asia',
    'Kenya': 'Africa',
    'Kiribati': 'Oceania',
    'Kuwait': 'Asia',
    'Kyrgyzstan': 'Asia',
    'Laos': 'Asia',
    'Latvia': 'Europe',
    'Lebanon': 'Asia',
    'Lesotho': 'Africa',
    'Liberia': 'Africa',
    'Libya': 'Africa',
    'Liechtenstein': 'Europe',
    'Lithuania': 'Europe',
    'Luxembourg': 'Europe',
    'Madagascar': 'Africa',
    'Malawi': 'Africa',
    'Malaysia': 'Asia',
    'Maldives': 'Asia',
    'Mali': 'Africa',
    'Malta': 'Europe',
    'Marshall Islands': 'Oceania',
    'Mauritania': 'Africa',
    'Mauritius': 'Africa',
    'Mexico': 'North America',
    'Micronesia': 'Oceania',
    'Moldova': 'Europe',
    'Monaco': 'Europe',
    'Mongolia': 'Asia',
    'Montenegro': 'Europe',
    'Morocco': 'Africa',
    'Mozambique': 'Africa',
    'Myanmar': 'Asia',
    'Namibia': 'Africa',
    'Nauru': 'Oceania',
    'Nepal': 'Asia',
    'Netherlands': 'Europe',
    'New Zealand': 'Oceania',
    'Nicaragua': 'North America',
    'Niger': 'Africa',
    'Nigeria': 'Africa',
    'North Korea': 'Asia',
    'North Macedonia': 'Europe',
    'Norway': 'Europe',
    'Oman': 'Asia',
    'Pakistan': 'Asia',
    'Palau': 'Oceania',
    'Panama': 'North America',
    'Papua New Guinea': 'Oceania',
    'Paraguay': 'South America',
    'Peru': 'South America',
    'Philippines': 'Asia',
    'Poland': 'Europe',
    'Portugal': 'Europe',
    'Qatar': 'Asia',
    'Romania': 'Europe',
    'Russia': 'Europe',
    'Rwanda': 'Africa',
    'Saint Kitts and Nevis': 'North America',
    'Saint Lucia': 'North America',
    'Saint Vincent and the Grenadines': 'North America',
    'Samoa': 'Oceania',
    'San Marino': 'Europe',
    'Sao Tome and Principe': 'Africa',
    'Saudi Arabia': 'Asia',
    'Senegal': 'Africa',
    'Serbia': 'Europe',
    'Seychelles': 'Africa',
    'Sierra Leone': 'Africa',
    'Singapore': 'Asia',
    'Slovakia': 'Europe',
    'Slovenia': 'Europe',
    'Solomon Islands': 'Oceania',
    'Somalia': 'Africa',
    'South Africa': 'Africa',
    'South Korea': 'Asia',
    'South Sudan': 'Africa',
    'Spain': 'Europe',
    'Sri Lanka': 'Asia',
    'Sudan': 'Africa',
    'Suriname': 'South America',
    'Sweden': 'Europe',
    'Switzerland': 'Europe',
    'Syria': 'Asia',
    'Taiwan': 'Asia',
    'Tajikistan': 'Asia',
    'Tanzania': 'Africa',
    'Thailand': 'Asia',
    'Timor-Leste': 'Asia',
    'Togo': 'Africa',
    'Tonga': 'Oceania',
    'Trinidad and Tobago': 'North America',
    'Tunisia': 'Africa',
    'Turkey': 'Asia/Europe',
    'Turkmenistan': 'Asia',
    'Tuvalu': 'Oceania',
    'Uganda': 'Africa',
    'Ukraine': 'Europe',
    'United Arab Emirates': 'Asia',
    'United Kingdom': 'Europe',
    'United States': 'North America',
    'Uruguay': 'South America',
    'Uzbekistan': 'Asia',
    'Vanuatu': 'Oceania',
    'Vatican City': 'Europe',
    'Venezuela': 'South America',
    'Vietnam': 'Asia',
    'Yemen': 'Asia',
    'Zambia': 'Africa',
    'Zimbabwe': 'Africa'
}

# Create the 'continent' column using the mapping
df3['continent'] = df3['country'].map(country_to_continent)
df3

Unnamed: 0,country,year,population,co2_emission,temperature_change,region,industrial_level,decade,continent
0,Aruba,1999,86867.0,810.407000,0.216,Global_South,non_industrial,1990,
1,Afghanistan,1999,19262847.0,810.000000,1.301,Global_South,non_industrial,1990,Asia
2,Angola,1999,15870753.0,17610.000000,0.591,Global_South,non_industrial,1990,Africa
3,Albania,1999,3108778.0,2970.000000,0.722,Global_South,non_industrial,1990,Europe
4,Andorra,1999,65655.0,510.000000,1.020,Global_South,non_industrial,1990,Europe
...,...,...,...,...,...,...,...,...,...
4294,Vanuatu,2019,304404.0,209.999993,0.803,Global_South,non_industrial,2010,Oceania
4295,Samoa,2019,211905.0,300.000012,0.828,Global_South,non_industrial,2010,Oceania
4296,South Africa,2019,58087055.0,439640.014648,1.752,Global_South,non_industrial,2010,Africa
4297,Zambia,2019,18380477.0,6800.000191,1.295,Global_South,non_industrial,2010,Africa


In [50]:

# Define a list of officially recognized countries (UN member states and observer states)
recognized_countries = [
    'Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina',
    'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados',
    'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana',
    'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon',
    'Canada', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo',
    'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Denmark', 'Djibouti', 'Dominica',
    'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
    'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany',
    'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras',
    'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica',
    'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia',
    'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Madagascar',
    'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Mauritania', 'Mauritius',
    'Mexico', 'Micronesia', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique',
    'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua', 'Niger',
    'Nigeria', 'North Korea', 'North Macedonia', 'Norway', 'Oman', 'Pakistan', 'Palau', 'Panama',
    'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar', 'Romania',
    'Russia', 'Rwanda', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent and the Grenadines',
    'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Seychelles',
    'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa',
    'South Korea', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland',
    'Syria', 'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand', 'Timor-Leste', 'Togo', 'Tonga',
    'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu', 'Uganda', 'Ukraine',
    'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu',
    'Vatican City', 'Venezuela', 'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe'
]

# Filter the DataFrame to include only recognized countries
df3_filtered = df3[df3['country'].isin(recognized_countries)]

# Verify the changes
df3 = df3_filtered

In [51]:
df3

Unnamed: 0,country,year,population,co2_emission,temperature_change,region,industrial_level,decade,continent
1,Afghanistan,1999,19262847.0,810.000000,1.301,Global_South,non_industrial,1990,Asia
2,Angola,1999,15870753.0,17610.000000,0.591,Global_South,non_industrial,1990,Africa
3,Albania,1999,3108778.0,2970.000000,0.722,Global_South,non_industrial,1990,Europe
4,Andorra,1999,65655.0,510.000000,1.020,Global_South,non_industrial,1990,Europe
5,United Arab Emirates,1999,3097563.0,85680.000000,1.351,Global_South,industrial,1990,Asia
...,...,...,...,...,...,...,...,...,...
4294,Vanuatu,2019,304404.0,209.999993,0.803,Global_South,non_industrial,2010,Oceania
4295,Samoa,2019,211905.0,300.000012,0.828,Global_South,non_industrial,2010,Oceania
4296,South Africa,2019,58087055.0,439640.014648,1.752,Global_South,non_industrial,2010,Africa
4297,Zambia,2019,18380477.0,6800.000191,1.295,Global_South,non_industrial,2010,Africa
