<h1>Dataset cleaning</h1>

In [232]:
import pandas as pd

In [233]:
emissions = pd.read_csv('co-emissions-per-capita.csv')
emissions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26600 entries, 0 to 26599
Data columns (total 4 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Entity                             26600 non-null  object 
 1   Code                               23046 non-null  object 
 2   Year                               26600 non-null  int64  
 3   Annual CO₂ emissions (per capita)  26600 non-null  float64
dtypes: float64(1), int64(1), object(2)
memory usage: 831.4+ KB


In [234]:
emissions = emissions.rename(columns={"Annual CO₂ emissions (per capita)": "CO2"})
emissions.head()

Unnamed: 0,Entity,Code,Year,CO2
0,Afghanistan,AFG,1949,0.001992
1,Afghanistan,AFG,1950,0.011266
2,Afghanistan,AFG,1951,0.012098
3,Afghanistan,AFG,1952,0.011946
4,Afghanistan,AFG,1953,0.013685


In [235]:
micro_idx = (emissions['Entity']=='Micronesia (country)').fillna(False)
wrl_inx = emissions['Code'].str.contains('OWID_WRL', na=False)
kos_inx = emissions['Code'].str.contains('OWID_KOS', na=False)
emissions.loc[micro_idx, 'Entity'] = 'Micronesia'
emissions.loc[wrl_inx, 'Code'] = 'WRL'
emissions.loc[kos_inx, 'Code'] = 'KOS'

In [236]:
emissions_not_countries = emissions[(emissions['Code'].isnull()) | (emissions['Code']=='WRL')]
emissions_not_countries['Entity'].unique()

array(['Africa', 'Asia', 'Asia (excl. China and India)', 'Europe',
       'Europe (excl. EU-27)', 'Europe (excl. EU-28)',
       'European Union (27)', 'European Union (28)',
       'High-income countries', 'Low-income countries',
       'Lower-middle-income countries', 'North America',
       'North America (excl. USA)', 'Oceania', 'South America',
       'Upper-middle-income countries', 'World'], dtype=object)

In [237]:
emissions_countries = emissions[emissions['Code'].notnull() & (emissions['Code']!='WRL')]
emissions_countries.head()

Unnamed: 0,Entity,Code,Year,CO2
0,Afghanistan,AFG,1949,0.001992
1,Afghanistan,AFG,1950,0.011266
2,Afghanistan,AFG,1951,0.012098
3,Afghanistan,AFG,1952,0.011946
4,Afghanistan,AFG,1953,0.013685


In [238]:
countries_continents = [
    ('Afghanistan', 'Asia'), ('Albania', 'Europe'), ('Algeria', 'Africa'), ('Andorra', 'Europe'), 
    ('Angola', 'Africa'), ('Anguilla', 'North America'), ('Antigua and Barbuda', 'North America'), 
    ('Argentina', 'South America'), ('Armenia', 'Asia'), ('Aruba', 'North America'), 
    ('Australia', 'Oceania'), ('Austria', 'Europe'), ('Azerbaijan', 'Asia'), ('Bahamas', 'North America'), 
    ('Bahrain', 'Asia'), ('Bangladesh', 'Asia'), ('Barbados', 'North America'), ('Belarus', 'Europe'), 
    ('Belgium', 'Europe'), ('Belize', 'North America'), ('Benin', 'Africa'), ('Bermuda', 'North America'), 
    ('Bhutan', 'Asia'), ('Bolivia', 'South America'), ('Bonaire Sint Eustatius and Saba', 'North America'), 
    ('Bosnia and Herzegovina', 'Europe'), ('Botswana', 'Africa'), ('Brazil', 'South America'), 
    ('British Virgin Islands', 'North America'), ('Brunei', 'Asia'), ('Bulgaria', 'Europe'), 
    ('Burkina Faso', 'Africa'), ('Burundi', 'Africa'), ('Cambodia', 'Asia'), ('Cameroon', 'Africa'), 
    ('Canada', 'North America'), ('Cape Verde', 'Africa'), ('Central African Republic', 'Africa'), 
    ('Chad', 'Africa'), ('Chile', 'South America'), ('China', 'Asia'), ('Colombia', 'South America'), 
    ('Comoros', 'Africa'), ('Congo', 'Africa'), ('Cook Islands', 'Oceania'), ('Costa Rica', 'North America'), 
    ("Cote d'Ivoire", 'Africa'), ('Croatia', 'Europe'), ('Cuba', 'North America'), ('Curacao', 'North America'), 
    ('Cyprus', 'Europe'), ('Czechia', 'Europe'), ('Democratic Republic of Congo', 'Africa'), ('Denmark', 'Europe'), 
    ('Djibouti', 'Africa'), ('Dominica', 'North America'), ('Dominican Republic', 'North America'), 
    ('East Timor', 'Asia'), ('Ecuador', 'South America'), ('Egypt', 'Africa'), ('El Salvador', 'North America'), 
    ('Equatorial Guinea', 'Africa'), ('Eritrea', 'Africa'), ('Estonia', 'Europe'), ('Eswatini', 'Africa'), 
    ('Ethiopia', 'Africa'), ('Faroe Islands', 'Europe'), ('Fiji', 'Oceania'), ('Finland', 'Europe'), 
    ('France', 'Europe'), ('French Polynesia', 'Oceania'), ('Gabon', 'Africa'), ('Gambia', 'Africa'), 
    ('Georgia', 'Asia'), ('Germany', 'Europe'), ('Ghana', 'Africa'), ('Greece', 'Europe'), ('Greenland', 'North America'), 
    ('Grenada', 'North America'), ('Guatemala', 'North America'), ('Guinea', 'Africa'), ('Guinea-Bissau', 'Africa'), 
    ('Guyana', 'South America'), ('Haiti', 'North America'), ('Honduras', 'North America'), ('Hong Kong', 'Asia'), 
    ('Hungary', 'Europe'), ('Iceland', 'Europe'), ('India', 'Asia'), ('Indonesia', 'Asia'), ('Iran', 'Asia'), 
    ('Iraq', 'Asia'), ('Ireland', 'Europe'), ('Israel', 'Asia'), ('Italy', 'Europe'), ('Jamaica', 'North America'), 
    ('Japan', 'Asia'), ('Jordan', 'Asia'), ('Kazakhstan', 'Asia'), ('Kenya', 'Africa'), ('Kiribati', 'Oceania'), 
    ('Kosovo', 'Europe'), ('Kuwait', 'Asia'), ('Kyrgyzstan', 'Asia'), ('Laos', 'Asia'), ('Latvia', 'Europe'), 
    ('Lebanon', 'Asia'), ('Lesotho', 'Africa'), ('Liberia', 'Africa'), ('Libya', 'Africa'), ('Liechtenstein', 'Europe'), 
    ('Lithuania', 'Europe'), ('Luxembourg', 'Europe'), ('Macao', 'Asia'), ('Madagascar', 'Africa'), 
    ('Malawi', 'Africa'), ('Malaysia', 'Asia'), ('Maldives', 'Asia'), ('Mali', 'Africa'), ('Malta', 'Europe'), 
    ('Marshall Islands', 'Oceania'), ('Mauritania', 'Africa'), ('Mauritius', 'Africa'), ('Mexico', 'North America'), 
    ('Micronesia', 'Oceania'), ('Moldova', 'Europe'), ('Mongolia', 'Asia'), ('Montenegro', 'Europe'), 
    ('Montserrat', 'North America'), ('Morocco', 'Africa'), ('Mozambique', 'Africa'), ('Myanmar', 'Asia'), 
    ('Namibia', 'Africa'), ('Nauru', 'Oceania'), ('Nepal', 'Asia'), ('Netherlands', 'Europe'), 
    ('New Caledonia', 'Oceania'), ('New Zealand', 'Oceania'), ('Nicaragua', 'North America'), ('Niger', 'Africa'), 
    ('Nigeria', 'Africa'), ('Niue', 'Oceania'), ('North Korea', 'Asia'), ('North Macedonia', 'Europe'), 
    ('Norway', 'Europe'), ('Oman', 'Asia'), ('Pakistan', 'Asia'), ('Palau', 'Oceania'), ('Palestine', 'Asia'), 
    ('Panama', 'North America'), ('Papua New Guinea', 'Oceania'), ('Paraguay', 'South America'), 
    ('Peru', 'South America'), ('Philippines', 'Asia'), ('Poland', 'Europe'), ('Portugal', 'Europe'), 
    ('Qatar', 'Asia'), ('Romania', 'Europe'), ('Russia', 'Europe'), ('Rwanda', 'Africa'), ('Saint Helena', 'Africa'), 
    ('Saint Kitts and Nevis', 'North America'), ('Saint Lucia', 'North America'), 
    ('Saint Pierre and Miquelon', 'North America'), ('Saint Vincent and the Grenadines', 'North America'), 
    ('Samoa', 'Oceania'), ('Sao Tome and Principe', 'Africa'), ('Saudi Arabia', 'Asia'), ('Senegal', 'Africa'), 
    ('Serbia', 'Europe'), ('Seychelles', 'Africa'), ('Sierra Leone', 'Africa'), ('Singapore', 'Asia'), 
    ('Sint Maarten (Dutch part)', 'North America'), ('Slovakia', 'Europe'), ('Slovenia', 'Europe'), 
    ('Solomon Islands', 'Oceania'), ('Somalia', 'Africa'), ('South Africa', 'Africa'), ('South Korea', 'Asia'), 
    ('South Sudan', 'Africa'), ('Spain', 'Europe'), ('Sri Lanka', 'Asia'), ('Sudan', 'Africa'), 
    ('Suriname', 'South America'), ('Sweden', 'Europe'), ('Switzerland', 'Europe'), ('Syria', 'Asia'), 
    ('Taiwan', 'Asia'), ('Tajikistan', 'Asia'), ('Tanzania', 'Africa'), ('Thailand', 'Asia'), ('Togo', 'Africa'), 
    ('Tonga', 'Oceania'), ('Trinidad and Tobago', 'North America'), ('Tunisia', 'Africa'), ('Turkey', 'Asia'), 
    ('Turkmenistan', 'Asia'), ('Turks and Caicos Islands', 'North America'), ('Tuvalu', 'Oceania'), 
    ('Uganda', 'Africa'), ('Ukraine', 'Europe'), ('United Arab Emirates', 'Asia'), ('United Kingdom', 'Europe'), 
    ('United States', 'North America'), ('Uruguay', 'South America'), ('Uzbekistan', 'Asia'), ('Vanuatu', 'Oceania'),
    ('Venezuela', 'South America'), ('Vietnam', 'Asia'), ('Wallis and Futuna', 'Oceania'), 
    ('Yemen', 'Asia'), ('Zambia', 'Africa'), ('Zimbabwe', 'Africa')
]


In [239]:
for country, continent in countries_continents:
    emissions_countries.loc[emissions_countries['Entity']==country, 'Continent'] = continent

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  emissions_countries.loc[emissions_countries['Entity']==country, 'Continent'] = continent


In [240]:
emissions_countries.Continent.unique()
emissions_countries.to_csv('emissions_countries.csv', index=False)

In [241]:
emissions_2022_countries = emissions_countries[emissions_countries['Year'] == 2022]
emissions_2022_countries = emissions_2022_countries.drop(columns=['Year'])
emissions_2022_countries = emissions_2022_countries.sort_values(by='CO2', ascending=False).reset_index(drop=True)
emissions_2022_countries.shape
emissions_2022_countries.to_csv('emissions_2022_countries.csv', index=False)

In [242]:
emissions_2022_countries['CO2'].mean()

4.581112053560747

BONUS: "Emissions from international aviation and shipping are not included in any country or region's emissions. They are only included in the global total emissions."

In [243]:
emissions_2022_countries.head()

Unnamed: 0,Entity,Code,CO2,Continent
0,Qatar,QAT,37.601273,Asia
1,United Arab Emirates,ARE,25.833244,Asia
2,Bahrain,BHR,25.672274,Asia
3,Kuwait,KWT,25.578102,Asia
4,Brunei,BRN,23.950201,Asia
