In [1]:
import pandas as pd
import plotly.express as px

df = pd.read_csv('co-emissions-per-capita.csv')
df = df[~df['Code'].isna()]
df = df.rename(columns={'Annual CO₂ emissions (per capita)': 'Annual CO2 emissions (per capita)'})
df['Annual CO2 emissions (per capita)'] = df['Annual CO2 emissions (per capita)'].round(1)
top_n_row = 10
df

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions (per capita)
0,Afghanistan,AFG,1949,0.0
1,Afghanistan,AFG,1950,0.0
2,Afghanistan,AFG,1951,0.0
3,Afghanistan,AFG,1952,0.0
4,Afghanistan,AFG,1953,0.0
...,...,...,...,...
26595,Zimbabwe,ZWE,2018,0.7
26596,Zimbabwe,ZWE,2019,0.6
26597,Zimbabwe,ZWE,2020,0.5
26598,Zimbabwe,ZWE,2021,0.5


## 1. First Section

### 1.1 Top N countries with the most CO2 emissions chosen 1 year 

In [2]:
df_last_year = df[df['Year']==max(df['Year'])]
df_last_year = df_last_year.sort_values('Annual CO2 emissions (per capita)', ascending=False).reset_index(drop=True)
df_last_year.iloc[:top_n_row].to_csv('co-emissions-per-capita-last-year.csv')
# df_last_year
px.bar(df_last_year.iloc[:top_n_row], x='Entity', y='Annual CO2 emissions (per capita)')

### 1.1 Top N countries with the most CO2 emissions chosen a decade 

In [3]:
df_last_decade = df[df['Year']>max(df['Year'])-10]
df_last_decade = df_last_decade[['Entity', 'Code', 'Annual CO2 emissions (per capita)']].groupby(['Entity', 'Code']).mean().reset_index()
df_last_decade = df_last_decade.sort_values('Annual CO2 emissions (per capita)', ascending=False).reset_index(drop=True)
df_last_decade.iloc[:top_n_row].to_csv('co-emissions-per-capita-last-decade.csv')
# df_last_decade
px.bar(df_last_decade.iloc[:top_n_row], x='Entity', y='Annual CO2 emissions (per capita)')

## 2. Second Section

### 2.1 Regions comparison of CO2 emissions (chosen 1 year)

In [4]:
df_last_year = df[df['Year']==max(df['Year'])].reset_index(drop=True)
df_last_year

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions (per capita)
0,Afghanistan,AFG,2022,0.3
1,Albania,ALB,2022,1.7
2,Algeria,DZA,2022,3.9
3,Andorra,AND,2022,4.6
4,Angola,AGO,2022,0.5
...,...,...,...,...
210,Wallis and Futuna,WLF,2022,2.3
211,World,OWID_WRL,2022,4.7
212,Yemen,YEM,2022,0.3
213,Zambia,ZMB,2022,0.4


In [5]:
df_country_region_mapping = pd.read_csv('country-region-mapping.csv')
df_country_region_mapping

Unnamed: 0,Entity,Region
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,Andorra,Europe
4,Angola,Africa
...,...,...
209,Vietnam,Asia
210,Wallis and Futuna,Oceania
211,Yemen,Asia
212,Zambia,Africa


In [6]:
df_last_year_with_regions = pd.merge(df_last_year, df_country_region_mapping, on='Entity', how='left')
df_last_year_with_regions

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions (per capita),Region
0,Afghanistan,AFG,2022,0.3,Asia
1,Albania,ALB,2022,1.7,Europe
2,Algeria,DZA,2022,3.9,Africa
3,Andorra,AND,2022,4.6,Europe
4,Angola,AGO,2022,0.5,Africa
...,...,...,...,...,...
210,Wallis and Futuna,WLF,2022,2.3,Oceania
211,World,OWID_WRL,2022,4.7,
212,Yemen,YEM,2022,0.3,Asia
213,Zambia,ZMB,2022,0.4,Africa


In [7]:
top_n = 5

df_last_year_with_regions_sorted = df_last_year_with_regions.sort_values(by=['Region', 'Annual CO2 emissions (per capita)'], ascending=[True, False])

df_last_year_with_regions_sorted['Rank'] = df_last_year_with_regions_sorted.groupby('Region')['Annual CO2 emissions (per capita)'].rank(method='first', ascending=False)

df_last_year_with_regions_sorted['Entity'] = df_last_year_with_regions_sorted.apply(lambda row: row['Entity'] if row['Rank'] <= top_n else 'Other', axis=1)

df_last_year_with_regions_aggregated = df_last_year_with_regions_sorted.groupby(['Region', 'Entity'], as_index=False).agg({'Annual CO2 emissions (per capita)': 'sum', 'Rank' : 'min'}).reset_index()

df_last_year_with_regions_aggregated = df_last_year_with_regions_aggregated.sort_values(by=['Region', 'Rank'], ascending=True)

df_last_year_with_regions_aggregated.to_csv('co-emissions-per-capita-last-year-region-comparison.csv')

df_last_year_with_regions_aggregated

Unnamed: 0,index,Region,Entity,Annual CO2 emissions (per capita),Rank
1,1,Africa,Libya,9.2,1.0
5,5,Africa,South Africa,6.7,2.0
4,4,Africa,Seychelles,6.1,3.0
0,0,Africa,Algeria,3.9,4.0
2,2,Africa,Mauritius,3.3,5.0
3,3,Africa,Other,36.6,6.0
10,10,Asia,Qatar,37.6,1.0
11,11,Asia,United Arab Emirates,25.8,2.0
6,6,Asia,Bahrain,25.7,3.0
8,8,Asia,Kuwait,25.6,4.0


## 3. Third Section

### 3.1 Countries comparison of CO2 emissions type (chosen 1 year)

In [8]:
df_emission_type = pd.read_csv('co2-fossil-plus-land-use.csv')
df_emission_type = df_emission_type[~((df_emission_type['Code'].isna())|(df_emission_type['Code']=='OWID_WRL'))]
df_emission_type = df_emission_type.rename(columns={
    'Annual CO₂ emissions including land-use change': 'Annual CO2 emissions including land-use change',
    'Annual CO₂ emissions from land-use change': 'Annual CO2 emissions from land-use change',
    'Annual CO₂ emissions': 'Annual CO2 emissions'
    })
df_emission_type

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions including land-use change,Annual CO2 emissions from land-use change,Annual CO2 emissions
0,Afghanistan,AFG,1850,,2979601.5,
1,Afghanistan,AFG,1851,,3001842.0,
2,Afghanistan,AFG,1852,,3021554.2,
3,Afghanistan,AFG,1853,,3038335.5,
4,Afghanistan,AFG,1854,,3052734.8,
...,...,...,...,...,...,...
40594,Zimbabwe,ZWE,2018,19422790.0,8708192.0,10714598.0
40595,Zimbabwe,ZWE,2019,17854144.0,8078717.0,9775428.0
40596,Zimbabwe,ZWE,2020,15458631.0,7608992.0,7849639.0
40597,Zimbabwe,ZWE,2021,15513038.0,7116880.0,8396158.0


In [9]:
df_emission_type_last_year = df_emission_type[df_emission_type['Year']==max(df_emission_type['Year'])]
df_emission_type_last_year = df_emission_type_last_year.sort_values('Annual CO2 emissions', ascending=False).reset_index(drop=True)
df_emission_type_last_year = df_emission_type_last_year.iloc[:top_n_row]
df_emission_type_last_year['Annual CO2 emissions including land-use change'] = df_emission_type_last_year['Annual CO2 emissions including land-use change'].round(-7)
df_emission_type_last_year['Annual CO2 emissions from land-use change'] = df_emission_type_last_year['Annual CO2 emissions from land-use change'].round(-5)
df_emission_type_last_year['Annual CO2 emissions'] = df_emission_type_last_year['Annual CO2 emissions'].round(-7)
df_emission_type_last_year.to_csv('co2-emission-type-last-year.csv')
df_emission_type_last_year

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions including land-use change,Annual CO2 emissions from land-use change,Annual CO2 emissions
0,China,CHN,2022,12380000000.0,981500000.0,11400000000.0
1,United States,USA,2022,5200000000.0,141700000.0,5060000000.0
2,India,IND,2022,2860000000.0,34400000.0,2830000000.0
3,Russia,RUS,2022,1970000000.0,313000000.0,1650000000.0
4,Japan,JPN,2022,1060000000.0,4200000.0,1050000000.0
5,Indonesia,IDN,2022,1650000000.0,918600000.0,730000000.0
6,Iran,IRN,2022,680000000.0,-13200000.0,690000000.0
7,Germany,DEU,2022,660000000.0,-8100000.0,670000000.0
8,Saudi Arabia,SAU,2022,670000000.0,4000000.0,660000000.0
9,South Korea,KOR,2022,590000000.0,-6600000.0,600000000.0
