In [115]:
#set the columns name to lower case, strip and replace empty spaces
def column_names(df_name):
    new_column_names = [column.strip().lower().replace(" ", "_") for column in df_name.columns]
    df_name.columns = new_column_names

#change the name of a column
def change_column_name(df, old_name, new_name):
    df.rename(columns={old_name: new_name}, inplace=True)
    return df 

#drop one column
def drop_column(df, column):
    df = df.drop(column,axis='columns')
    return df
#drop one row by the index
def drop_row_index(df, df_index):
    df = df.drop(index=df_index)
    return df
#change to numeric 
def to_numeric(df, columns_to_proces):
    for column in columns_to_proces: 
        df[column] = pd.to_numeric(df[column], errors='coerce')

# **Data merge**

In [116]:
# We have two cleaned data sets, one about the population and CO2 
# emission and one with the temperature, all of them from 1999 to 
# 2019. We will reshaped them and check all the values are ready 
# to be analyzed.

## Import all the libraries we will use

In [117]:
import pandas as pd
import numpy as np

## Import the data sets

In [118]:

df1= pd.read_csv('../Data/Clean Data/1. co2_population_cleaned.csv')
df2= pd.read_csv('../Data/Clean Data/2. Temperature_cleaned.csv')

## Chekout the two data sets

In [119]:
#df1: Population and CO2 emissions

df1

Unnamed: 0,country,year,population,co2_emission
0,Aruba,1999,86867.0,810.407000
1,Africa Eastern and Southern,1999,391486231.0,351070.000000
2,Afghanistan,1999,19262847.0,810.000000
3,Africa Western and Central,1999,262397030.0,133013.187487
4,Angola,1999,15870753.0,17610.000000
...,...,...,...,...
5265,Samoa,2019,211905.0,300.000012
5266,"Yemen, Rep.",2019,31546691.0,11100.000381
5267,South Africa,2019,58087055.0,439640.014648
5268,Zambia,2019,18380477.0,6800.000191


In [120]:
# df2: Earth temperature dataset
df2

Unnamed: 0,country,year,temperature_change
0,Afghanistan,1999,1.301
1,Albania,1999,0.722
2,Algeria,1999,1.456
3,American Samoa,1999,0.243
4,Andorra,1999,1.020
...,...,...,...
4741,Palestine,2019,1.298
4742,Western Sahara,2019,1.430
4743,World,2019,1.449
4744,Zambia,2019,1.295


## Standardize the country names: Ensure consistency in country names.

## Check the name of the countries

In [121]:
country_name_mapping =  {
    'Afghanistan, Islamic Rep. of' : 'Afghanistan',
    'Afghanistan' : 'Afghanistan',
    "Islamic Rep. of', Albania" : 'Albania',
    'Algeria' : 'Algeria',
    'American Samoa' :  'American Samoa', 
    'Andorra' : 'Andorra',
    'Andorra, Principality of': 'Andorra',
    'Angola' : 'Angola',
    'Antigua and Barbuda' : 'Antigua and Barbuda',
    'Argentina' : 'Argentina' ,
    'Armenia' : 'Armenia',
    'Armenia, Rep. of' : 'Armenia',
    'Aruba, Kingdom of the Netherlands' :'Aruba',
    'Australia' : 'Australia',
    'Austria' : 'Austria',
    'Azerbaijan' : 'Azerbaijan',
    'Azerbaijan, Rep. of' : 'Azerbaijan',
    'Bahamas, The' : 'Bahamas',
    'Bahamas, The' : 'Bahamas',
    'Bahrain' : 'Bahrain',
    'Bahrain, Kingdom of' : 'Bahrain',
    'Bangladesh' : 'Bangladesh',
    'Barbados' : 'Barbados',
    'Barbados' : 'Barbados',
    'Belarus' : 'Belarus',
    'Belarus, Rep. of' : 'Belarus',
    'Belgium' : 'Belgium',
    'Belize' :'Belize',
    'Benin' : 'Benin',
    'Bhutan' : 'Bhutan',
    'Bolivia' : 'Bolivia',
    'Bosnia and Herzegovina' : 'Bosnia and Herzegovina',
    'Botswana' : 'Botswana',
    'Brazil' : 'Brazil',
    'British Virgin Islands' : 'British Virgin Islands',
    'Central African Republic' :'Central African Republic',
    'Central African Rep.' : 'Central African Republic',
    'Comoros' : 'Comoros',
    'Comoros, Union of the' : 'Comoros',
    'Congo, Rep.' : 'Congo, Rep.',
    'Congo, Rep. of' : 'Congo, Rep.',
    'Croatia, Rep. of' : 'Croatia',
    'Dominican Republic' : 'Dominican Rep.',
    'Egypt, Arab Rep.' : 'Egypt, Arab Rep. of',
    'Equatorial Guinea, Rep. of' : 'Equatorial Guinea',
    'Eritrea, The State of' : 'Eritrea',
    'Estonia, Rep. of' : 'Estonia',
    'Eswatini, Kingdom of' : 'Eswatini',
    'Ethiopia, The Federal Dem. Rep. of' : 'Ethiopia',
    'Fiji, Rep. of' : 'Fiji',
    'Kazakhstan, Rep. of' : 'Kazakhstan',
    "Korea, Dem. People's Rep. of" :"Korea, Dem. People's Rep.",
    'Korea, Rep. of' : 'Korea, Rep.',
    'Kyrgyz Republic' : 'Kyrgyz Rep.',
    "Lao People's Dem. Rep." : 'Lao PDR',
    'Lesotho, Kingdom of' : 'Lesotho',
    'Marshall Islands, Rep. of the' : 'Marshall Islands',
    'Mauritania, Islamic Rep. of' : 'Mauritania',
    'Micronesia, Federated States of' : 'Micronesia',
    'Micronesia, Fed. Sts.' : 'Micronesia',
    'Moldova, Rep. of' : 'Moldova',
    'Mozambique, Rep. of' : 'Mozambique',
    'Nauru, Rep. of' : 'Nauru',
    'Netherlands, The' : 'Netherlands',
    'North Macedonia, Republic of' : 'North Macedonia',
    'Palau, Rep. of' : 'Palau',
    'Poland, Rep. of' : 'Poland',
    'San Marino, Rep. of' : 'San Marino', 
    'Serbia, Rep. of' : 'Serbia',
    'Slovak Rep.': 'Slovak',
    'South Sudan, Rep. of' : 'South Sudan',
    'Syrian Arab Republic' : 'Syrian Arab Rep',
    'Tajikistan, Rep. of' : 'Tajikistan',
    'Tanzania, United Rep. of' : 'Tanzania',
    'Timor-Leste, Dem. Rep. of' : 'Timor-Leste',
    'Turkiye' : 'Turkey',
    "Viet Nam": "Vietnam", 
    "Türkiye": "Turkey",
    'Virgin Islands (U.S.)' : 'United States Virgin Islands',
    'Uzbekistan, Rep. of' : 'Uzbekistan',
    'Venezuela, Rep. Bolivariana de' : 'Venezuela',
    'West Bank and Gaza' : 'Palestine',
    'Yemen, Rep. of' : 'Yemen, Rep.',
    }

In [122]:
df1['country'] = df2['country'].replace(country_name_mapping)

In [123]:
df1.country.unique()

array(['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra',
       'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina',
       'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan',
       'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus',
       'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei Darussalam', 'Bulgaria',
       'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon',
       'Canada', 'Cayman Islands', 'Central African Republic', 'Chad',
       'Chile', 'China, P.R.: Hong Kong', 'China, P.R.: Macao',
       'China, P.R.: Mainland', 'Colombia', 'Comoros',
       'Congo, Dem. Rep. of the', 'Congo, Rep.', 'Cook Islands',
       'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Czech Rep.', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Rep.', 'Ecuador',
       'Egypt, Arab Rep. of', 'El Salvador', 'Equatorial Guinea',
       'Eritrea', 'Eston

In [29]:
df2

Unnamed: 0,country,year,temperature_change
0,Afghanistan,1999,1.301
1,Albania,1999,0.722
2,Algeria,1999,1.456
3,American Samoa,1999,0.243
4,Andorra,1999,1.020
...,...,...,...
4740,Wallis and Futuna Islands,2019,1.110
4741,Palestine,2019,1.298
4742,Western Sahara,2019,1.430
4744,Zambia,2019,1.295


In [11]:
len(df1['country'])

5270

In [10]:
df2['country'] = df1['country'].replace(country_name_mapping)

In [11]:
len(df2['country'])

4746

In [12]:
#droping duplicates
df1.drop_duplicates(subset=['country', 'year'], keep='first', inplace=True)
df2.drop_duplicates(subset=['country', 'year'], keep='first', inplace=True)

# Merge both DataFrames to a new dataset called df3 

In [13]:
df3 = pd.merge(df1, df2, on=['country', 'year'], how='inner')
df3

Unnamed: 0,country,year,population,co2_emission,temperature_change
0,Afghanistan,1999,86867.0,810.407000,1.301
1,Albania,1999,391486231.0,351070.000000,0.722
2,Algeria,1999,19262847.0,810.000000,1.456
3,American Samoa,1999,262397030.0,133013.187487,0.243
4,Andorra,1999,15870753.0,17610.000000,1.020
...,...,...,...,...,...
4256,Palestine,2017,643634.0,349.999994,0.821
4257,Western Sahara,2017,7677565.0,939.999998,2.078
4258,World,2017,6266654.0,6380.000114,1.424
4259,Zambia,2017,14864221.0,660.000026,0.721


Drop world

In [17]:
df2 = df2[~df2['country'].isin(['World'])]
df1 = df1[~df1['country'].isin(['World'])]
df3.country.tail(20)

4241                    Turkmenistan
4242        Turks and Caicos Islands
4243                          Tuvalu
4244                          Uganda
4245                         Ukraine
4246            United Arab Emirates
4247                  United Kingdom
4248                   United States
4249    United States Virgin Islands
4250                         Uruguay
4251                      Uzbekistan
4252                         Vanuatu
4253                       Venezuela
4254                         Vietnam
4255       Wallis and Futuna Islands
4256                       Palestine
4257                  Western Sahara
4258                           World
4259                          Zambia
4260                        Zimbabwe
Name: country, dtype: object

## Recheck the data

### Check the data types 

In [14]:
df3.dtypes

country                object
year                    int64
population            float64
co2_emission          float64
temperature_change    float64
dtype: object

In [15]:
len(df3)

4261

In [16]:
#check for duplicates
df3.duplicated().any()

False

In [17]:
# Remove rows with NaN values
df3.dropna(inplace=True)

In [18]:
# Remove rows where the country name includes the word "world"
df3 = df3[~df3['country'].str.contains(r'world', case=False, regex=True)]

## Setup Data Frame

### Categorize countries: Add a column to categorize countries into Global North and Global South.

In [19]:
# Categorize countries into Global North and Global South

global_north = [
    'United States', 'Canada', 'United Kingdom', 'Germany', 'France', 
    'Italy', 'Japan', 'Australia', 'New Zealand', 'Belgium', 'Netherlands',
    'Sweden', 'Norway', 'Denmark', 'Finland', 'Switzerland', 'Austria', 
    'Ireland', 'Luxembourg'
]

df3['region'] = np.where(df3['country'].isin(global_north), 'Global_North', 'Global_South')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['region'] = np.where(df3['country'].isin(global_north), 'Global_North', 'Global_South')


In [20]:
# Categorize countries into  industrial and not industrial countries 
industrial_countries = [
    "United States",
    "Canada",
    "United Kingdom",
    "Germany",
    "France",
    "Italy",
    "Spain",
    "Netherlands",
    "Sweden",
    "Switzerland",
    "Austria",
    "Belgium",
    "Denmark",
    "Norway",
    "Finland",
    "Japan",
    "South Korea",
    "Australia",
    "New Zealand",
    "Ireland",
    "Singapore",
    "Luxembourg",
    "Hong Kong",
    "Israel",
    "Russia",
    "China",
    "Taiwan",
    "Portugal",
    "Greece",
    "Iceland",
    "Czech Republic",
    "Poland",
    "Hungary",
    "Slovakia",
    "Slovenia",
    "Estonia",
    "Lithuania",
    "Latvia",
    "Turkey",
    "United Arab Emirates"
]


df3['industrial_level'] = np.where(df3['country'].isin(industrial_countries), 'industrial', 'non_industrial')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['industrial_level'] = np.where(df3['country'].isin(industrial_countries), 'industrial', 'non_industrial')


In [21]:
# Create a Decade Column
df3['decade'] = (df3['year'] // 10) * 10

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['decade'] = (df3['year'] // 10) * 10


# Save the code

In [22]:
# save the cleaned data to a new CSV file
df3.to_csv('../Data/Clean Data/3. Complete_Data_Merged.csv', index=False)

In [23]:
df3

Unnamed: 0,country,year,population,co2_emission,temperature_change,region,industrial_level,decade
0,Afghanistan,1999,86867.0,810.407000,1.301,Global_South,non_industrial,1990
1,Albania,1999,391486231.0,351070.000000,0.722,Global_South,non_industrial,1990
2,Algeria,1999,19262847.0,810.000000,1.456,Global_South,non_industrial,1990
3,American Samoa,1999,262397030.0,133013.187487,0.243,Global_South,non_industrial,1990
4,Andorra,1999,15870753.0,17610.000000,1.020,Global_South,non_industrial,1990
...,...,...,...,...,...,...,...,...
4255,Wallis and Futuna Islands,2017,5612253.0,49139.999390,0.960,Global_South,non_industrial,2010
4256,Palestine,2017,643634.0,349.999994,0.821,Global_South,non_industrial,2010
4257,Western Sahara,2017,7677565.0,939.999998,2.078,Global_South,non_industrial,2010
4259,Zambia,2017,14864221.0,660.000026,0.721,Global_South,non_industrial,2010


In [24]:
#Define the mapping of countries to continents
country_to_continent = {
    'Afghanistan': 'Asia',
    'Albania': 'Europe',
    'Algeria': 'Africa',
    'Andorra': 'Europe',
    'Angola': 'Africa',
    'Antigua and Barbuda': 'North America',
    'Argentina': 'South America',
    'Armenia': 'Asia',
    'Australia': 'Australia',
    'Austria': 'Europe',
    'Azerbaijan': 'Asia',
    'Bahamas': 'North America',
    'Bahrain': 'Asia',
    'Bangladesh': 'Asia',
    'Barbados': 'North America',
    'Belarus': 'Europe',
    'Belgium': 'Europe',
    'Belize': 'North America',
    'Benin': 'Africa',
    'Bhutan': 'Asia',
    'Bolivia': 'South America',
    'Bosnia and Herzegovina': 'Europe',
    'Botswana': 'Africa',
    'Brazil': 'South America',
    'Brunei': 'Asia',
    'Bulgaria': 'Europe',
    'Burkina Faso': 'Africa',
    'Burundi': 'Africa',
    'Cabo Verde': 'Africa',
    'Cambodia': 'Asia',
    'Cameroon': 'Africa',
    'Canada': 'North America',
    'Central African Republic': 'Africa',
    'Chad': 'Africa',
    'Chile': 'South America',
    'China': 'Asia',
    'Colombia': 'South America',
    'Comoros': 'Africa',
    'Congo': 'Africa',
    'Costa Rica': 'North America',
    'Croatia': 'Europe',
    'Cuba': 'North America',
    'Cyprus': 'Asia',
    'Czech Republic': 'Europe',
    'Denmark': 'Europe',
    'Djibouti': 'Africa',
    'Dominica': 'North America',
    'Dominican Republic': 'North America',
    'Ecuador': 'South America',
    'Egypt': 'Africa',
    'El Salvador': 'North America',
    'Equatorial Guinea': 'Africa',
    'Eritrea': 'Africa',
    'Estonia': 'Europe',
    'Eswatini': 'Africa',
    'Ethiopia': 'Africa',
    'Fiji': 'Oceania',
    'Finland': 'Europe',
    'France': 'Europe',
    'Gabon': 'Africa',
    'Gambia': 'Africa',
    'Georgia': 'Asia',
    'Germany': 'Europe',
    'Ghana': 'Africa',
    'Greece': 'Europe',
    'Grenada': 'North America',
    'Guatemala': 'North America',
    'Guinea': 'Africa',
    'Guinea-Bissau': 'Africa',
    'Guyana': 'South America',
    'Haiti': 'North America',
    'Honduras': 'North America',
    'Hungary': 'Europe',
    'Iceland': 'Europe',
    'India': 'Asia',
    'Indonesia': 'Asia',
    'Iran': 'Asia',
    'Iraq': 'Asia',
    'Ireland': 'Europe',
    'Israel': 'Asia',
    'Italy': 'Europe',
    'Jamaica': 'North America',
    'Japan': 'Asia',
    'Jordan': 'Asia',
    'Kazakhstan': 'Asia',
    'Kenya': 'Africa',
    'Kiribati': 'Oceania',
    'Kuwait': 'Asia',
    'Kyrgyzstan': 'Asia',
    'Laos': 'Asia',
    'Latvia': 'Europe',
    'Lebanon': 'Asia',
    'Lesotho': 'Africa',
    'Liberia': 'Africa',
    'Libya': 'Africa',
    'Liechtenstein': 'Europe',
    'Lithuania': 'Europe',
    'Luxembourg': 'Europe',
    'Madagascar': 'Africa',
    'Malawi': 'Africa',
    'Malaysia': 'Asia',
    'Maldives': 'Asia',
    'Mali': 'Africa',
    'Malta': 'Europe',
    'Marshall Islands': 'Oceania',
    'Mauritania': 'Africa',
    'Mauritius': 'Africa',
    'Mexico': 'North America',
    'Micronesia': 'Oceania',
    'Moldova': 'Europe',
    'Monaco': 'Europe',
    'Mongolia': 'Asia',
    'Montenegro': 'Europe',
    'Morocco': 'Africa',
    'Mozambique': 'Africa',
    'Myanmar': 'Asia',
    'Namibia': 'Africa',
    'Nauru': 'Oceania',
    'Nepal': 'Asia',
    'Netherlands': 'Europe',
    'New Zealand': 'Oceania',
    'Nicaragua': 'North America',
    'Niger': 'Africa',
    'Nigeria': 'Africa',
    'North Korea': 'Asia',
    'North Macedonia': 'Europe',
    'Norway': 'Europe',
    'Oman': 'Asia',
    'Pakistan': 'Asia',
    'Palau': 'Oceania',
    'Panama': 'North America',
    'Papua New Guinea': 'Oceania',
    'Paraguay': 'South America',
    'Peru': 'South America',
    'Philippines': 'Asia',
    'Poland': 'Europe',
    'Portugal': 'Europe',
    'Qatar': 'Asia',
    'Romania': 'Europe',
    'Russia': 'Europe',
    'Rwanda': 'Africa',
    'Saint Kitts and Nevis': 'North America',
    'Saint Lucia': 'North America',
    'Saint Vincent and the Grenadines': 'North America',
    'Samoa': 'Oceania',
    'San Marino': 'Europe',
    'Sao Tome and Principe': 'Africa',
    'Saudi Arabia': 'Asia',
    'Senegal': 'Africa',
    'Serbia': 'Europe',
    'Seychelles': 'Africa',
    'Sierra Leone': 'Africa',
    'Singapore': 'Asia',
    'Slovakia': 'Europe',
    'Slovenia': 'Europe',
    'Solomon Islands': 'Oceania',
    'Somalia': 'Africa',
    'South Africa': 'Africa',
    'South Korea': 'Asia',
    'South Sudan': 'Africa',
    'Spain': 'Europe',
    'Sri Lanka': 'Asia',
    'Sudan': 'Africa',
    'Suriname': 'South America',
    'Sweden': 'Europe',
    'Switzerland': 'Europe',
    'Syria': 'Asia',
    'Taiwan': 'Asia',
    'Tajikistan': 'Asia',
    'Tanzania': 'Africa',
    'Thailand': 'Asia',
    'Timor-Leste': 'Asia',
    'Togo': 'Africa',
    'Tonga': 'Oceania',
    'Trinidad and Tobago': 'North America',
    'Tunisia': 'Africa',
    'Turkey': 'Asia/Europe',
    'Turkmenistan': 'Asia',
    'Tuvalu': 'Oceania',
    'Uganda': 'Africa',
    'Ukraine': 'Europe',
    'United Arab Emirates': 'Asia',
    'United Kingdom': 'Europe',
    'United States': 'North America',
    'Uruguay': 'South America',
    'Uzbekistan': 'Asia',
    'Vanuatu': 'Oceania',
    'Vatican City': 'Europe',
    'Venezuela': 'South America',
    'Vietnam': 'Asia',
    'Yemen': 'Asia',
    'Zambia': 'Africa',
    'Zimbabwe': 'Africa'
}

# Create the 'continent' column using the mapping
df3['continent'] = df3['country'].map(country_to_continent)
df3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['continent'] = df3['country'].map(country_to_continent)


Unnamed: 0,country,year,population,co2_emission,temperature_change,region,industrial_level,decade,continent
0,"Afghanistan, Islamic Rep. of",1999,86867.0,810.407000,1.301,Global_South,non_industrial,1990,
1,Albania,1999,391486231.0,351070.000000,0.722,Global_South,non_industrial,1990,Europe
2,Algeria,1999,19262847.0,810.000000,1.456,Global_South,non_industrial,1990,Africa
3,American Samoa,1999,262397030.0,133013.187487,0.243,Global_South,non_industrial,1990,
4,Andorra,1999,15870753.0,17610.000000,1.020,Global_South,non_industrial,1990,Europe
...,...,...,...,...,...,...,...,...,...
4255,Wallis and Futuna Islands,2017,5612253.0,49139.999390,0.960,Global_South,non_industrial,2010,
4256,Palestine,2017,643634.0,349.999994,0.821,Global_South,non_industrial,2010,
4257,Western Sahara,2017,7677565.0,939.999998,2.078,Global_South,non_industrial,2010,
4259,Zambia,2017,14864221.0,660.000026,0.721,Global_South,non_industrial,2010,Africa


In [139]:

# Define a list of officially recognized countries (UN member states and observer states)
recognized_countries = [
    'Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina',
    'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados',
    'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana',
    'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon',
    'Canada', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo',
    'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Denmark', 'Djibouti', 'Dominica',
    'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
    'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany',
    'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras',
    'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica',
    'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia',
    'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Madagascar',
    'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Mauritania', 'Mauritius',
    'Mexico', 'Micronesia', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique',
    'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua', 'Niger',
    'Nigeria', 'North Korea', 'North Macedonia', 'Norway', 'Oman', 'Pakistan', 'Palau', 'Panama',
    'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar', 'Romania',
    'Russia', 'Rwanda', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent and the Grenadines',
    'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Seychelles',
    'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa',
    'South Korea', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland',
    'Syria', 'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand', 'Timor-Leste', 'Togo', 'Tonga',
    'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu', 'Uganda', 'Ukraine',
    'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu',
    'Vatican City', 'Venezuela', 'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe'
]

# Filter the DataFrame to include only recognized countries
df3_filtered = df3[df3['country'].isin(recognized_countries)]

# Verify the changes
df3 = df3_filtered

Unnamed: 0,country,year,population,co2_emission,temperature_change,region,industrial_level,decade,continent
1,Albania,1999,391486231.0,351070.000000,0.722,Global_South,non_industrial,1990,Europe
2,Algeria,1999,19262847.0,810.000000,1.456,Global_South,non_industrial,1990,Africa
4,Andorra,1999,15870753.0,17610.000000,1.020,Global_South,non_industrial,1990,Europe
5,Angola,1999,3108778.0,2970.000000,0.591,Global_South,non_industrial,1990,Africa
7,Antigua and Barbuda,1999,280650496.0,901624.461624,0.611,Global_South,non_industrial,1990,North America
...,...,...,...,...,...,...,...,...,...
4252,Vanuatu,2017,34193122.0,545070.007324,1.032,Global_South,non_industrial,2010,Oceania
4253,Venezuela,2017,40679828.0,20959.999084,0.752,Global_South,non_industrial,2010,South America
4254,Vietnam,2017,15157793.0,9800.000191,1.213,Global_South,non_industrial,2010,Asia
4259,Zambia,2017,14864221.0,660.000026,0.721,Global_South,non_industrial,2010,Africa
