# Emissions vs Disaster Severity
<br/>

In [169]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [170]:
plt.rcParams["figure.figsize"] = (12, 10)

# Datasets

### EM-DAT natural disasters dataset (aggregated disaster counts)

**Note**: Data exploration, processing and visualization for the EM-DAT dataset is in the [EM-DAT Data Exploration notebook](https://github.com/NLSanyu/CA682-Data-Visualization-Assignment/blob/main/EM-DAT%20Data%20Exploration.ipynb)

In [171]:
emdat_df = pd.read_csv("emdat_disaster_counts_2000_2021.csv", index_col=0)
emdat_df.head()

Unnamed: 0,Country,ISO,Disaster count,Continent
0,China,CHN,436,Asia
1,Afghanistan,AFG,92,Asia
2,Bangladesh,BGD,105,Asia
3,Bolivia (Plurinational State of),BOL,48,Americas
4,Brazil,BRA,104,Americas


In [172]:
emdat_df.rename(columns = {"ISO": "Code"}, inplace=True)
emdat_df.head()

Unnamed: 0,Country,Code,Disaster count,Continent
0,China,CHN,436,Asia
1,Afghanistan,AFG,92,Asia
2,Bangladesh,BGD,105,Asia
3,Bolivia (Plurinational State of),BOL,48,Americas
4,Brazil,BRA,104,Americas


In [173]:
emdat_df.isna().sum()

Country           0
Code              0
Disaster count    0
Continent         0
dtype: int64

### Our World in Data CO2 emissions dataset

In [174]:
owd_df = pd.read_csv("co2.csv")
owd_df.head()

Unnamed: 0,Entity,Code,Year,Annual CO₂ emissions (per capita)
0,Afghanistan,AFG,1949,0.001922
1,Afghanistan,AFG,1950,0.011266
2,Afghanistan,AFG,1951,0.012098
3,Afghanistan,AFG,1952,0.011946
4,Afghanistan,AFG,1953,0.013685


# <br/> Data Processing and Visualization

### Method 1: Merging the datasets directly using Pandas

#### Data subset choice (to match the subset used for the EM-DAT data in its separate [data exploration notebook](https://github.com/NLSanyu/CA682-Data-Visualization-Assignment/blob/main/EM-DAT%20Data%20Exploration.ipynb))

In [175]:
chosen_years = [
    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 
    2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021
]

In [176]:
owd_df = owd_df[owd_df["Year"].isin(chosen_years)]
owd_df.head()

Unnamed: 0,Entity,Code,Year,Annual CO₂ emissions (per capita)
51,Afghanistan,AFG,2000,0.053581
52,Afghanistan,AFG,2001,0.0543
53,Afghanistan,AFG,2002,0.063856
54,Afghanistan,AFG,2003,0.068871
55,Afghanistan,AFG,2004,0.052529


In [177]:
owd_agg = owd_df.groupby(by="Entity").agg({"Annual CO₂ emissions (per capita)": "median"})
owd_agg.reset_index(inplace=True)
owd_agg.rename(columns={"Entity": "Country"}, inplace=True)
owd_agg.head()

Unnamed: 0,Country,Annual CO₂ emissions (per capita)
0,Afghanistan,0.269324
1,Africa,1.110198
2,Albania,1.612899
3,Algeria,3.350211
4,Andorra,6.874442


#### Merging the datasets

In [178]:
merged_df = emdat_df.merge(owd_agg)
merged_df.head()

Unnamed: 0,Country,Code,Disaster count,Continent,Annual CO₂ emissions (per capita)
0,China,CHN,436,Asia,6.678426
1,Afghanistan,AFG,92,Asia,0.269324
2,Bangladesh,BGD,105,Asia,0.370179
3,Brazil,BRA,104,Americas,2.191942
4,Bhutan,BTN,4,Asia,0.797168


#### Bubble chart on data merged in the above notebook cell

In [180]:
import plotly.express as px

fig = px.scatter(
    merged_df, x="Annual CO₂ emissions (per capita)", 
    y="Disaster count", size="Annual CO₂ emissions (per capita)",
    color="Continent", hover_name="Country"
)
fig.show()

### Method 2: Calculating median on Our World in Data CO2 emissions dataset 

**Note**: This median calculation is done on a subset `(2000-2021)` of the `C02 emissions dataset` that has been filtered using `Excel`. The median is calculated in order to get only `one value` for C02 emissions per country. The resulting dataset with the median is exported into a csv file in order to merge it with the `EM-DAT` dataset using `Excel` and `OpenRefine`

In [181]:
df_3 = pd.read_excel("annual-co-emissions-by-region2000.xlsx")
df_3.head()


Workbook contains no default style, apply openpyxl's default



Unnamed: 0,Entity,Code,Year,Annual CO₂ emissions (zero filled)
0,Afghanistan,AFG,2000,1047127.94
1,Afghanistan,AFG,2001,1069098.0
2,Afghanistan,AFG,2002,1340995.0
3,Afghanistan,AFG,2003,1559602.0
4,Afghanistan,AFG,2004,1237247.0


In [182]:
grouped_df = df_3.groupby(by="Entity").agg({"Annual CO₂ emissions (zero filled)": "median"})
grouped_df

Unnamed: 0_level_0,Annual CO₂ emissions (zero filled)
Entity,Unnamed: 1_level_1
Afghanistan,8.716201e+06
Africa,1.233028e+09
Africa (GCP),1.233017e+09
Aland Islands,0.000000e+00
Albania,4.625543e+06
...,...
Western Sahara,0.000000e+00
World,3.392568e+10
Yemen,1.786422e+07
Zambia,3.246334e+06


In [183]:
grouped_df.to_csv("annual_co2_median.csv")

#### Visualization on the dataset merged using Excel and OpenRefine

In [184]:
df_2 = pd.read_excel("CO2_median_disaster_2000_2021.xlsx")
df_2.head()


Unnamed: 0,Entity,Code,Continent,Disaster count,median CO₂ emissions,land area
0,Afghanistan,AFG,Asia,92.0,8716201.0,652860.0
1,Albania,ALB,Europe,15.0,4625543.0,27400.0
2,Algeria,DZA,Africa,38.0,119996200.0,2381741.0
3,Andorra,AND,Europe,,505632.0,470.0
4,Angola,AGO,Africa,49.0,23541800.0,1246700.0


#### Bubble chart on data merged using OpenRefine

In [190]:
import plotly.express as px

fig = px.scatter(df_2, x="median CO₂ emissions", y="Disaster count", 
                 size="land area", color="Continent", hover_name="Entity")
fig.update_layout(
    title = {"text": "Disaster counts vs Median CO2 emissions", "xanchor": "center", "yanchor": "top"},
)
fig.show()