# Test Notebook


# Introduction

<!--  -->TODO: describe topic and perspectives


In [2]:
# Import packages
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd


In [3]:
# Retrieve dataset data.
chunk_size = 1000000
dtypes = {"Country": str, "Mk": str, "Cn": str, "m (kg)": float, "Enedc (g/km)": float, "Ewltp (g/km)": float, "W (mm)": float, "Ft": str, "Ernedc (g/km)": float, "Erwltp (g/km)": float, "year": int}
chunk_container = pd.read_csv("CO2_data.csv", dtype=dtypes, chunksize=chunk_size)

# Split the dataset based on year.
co2_2015 = pd.DataFrame()
co2_2016 = pd.DataFrame()
co2_2017 = pd.DataFrame()
co2_2018 = pd.DataFrame()
co2_2019 = pd.DataFrame()
co2_2020 = pd.DataFrame()
co2_2021 = pd.DataFrame()
for chunk in chunk_container:
    co2_2015 = pd.concat([co2_2015, chunk[chunk["year"] == 2015]], ignore_index=True)
    co2_2016 = pd.concat([co2_2016, chunk[chunk["year"] == 2016]], ignore_index=True)
    co2_2017 = pd.concat([co2_2017, chunk[chunk["year"] == 2017]], ignore_index=True)
    co2_2018 = pd.concat([co2_2018, chunk[chunk["year"] == 2018]], ignore_index=True)
    co2_2019 = pd.concat([co2_2019, chunk[chunk["year"] == 2019]], ignore_index=True)
    co2_2020 = pd.concat([co2_2020, chunk[chunk["year"] == 2020]], ignore_index=True)
    co2_2021 = pd.concat([co2_2021, chunk[chunk["year"] == 2021]], ignore_index=True)


# Dataset and preprocessing

<!--  -->TODO: describe dataset and how we preprocess them


In [10]:
# Set to true to convert data to csv. False otherwise.
to_csv = True

In [11]:
# Compute data for bar graph with average CO2 emission per country
country_emission = pd.DataFrame()
for df in [co2_2018, co2_2019, co2_2020, co2_2021]:
    # Get subset of columns.
    df = df[["Country", "year", "Ewltp (g/km)"]]
    # Group by country and year and compute mean and std.
    df = df.groupby(["Country", "year"]).agg({"Ewltp (g/km)": ['mean', 'std']}).reset_index()
    # Change mutliindex to single index.
    df.columns = df.columns.map(' '.join).str.strip()
    country_emission = pd.concat([country_emission, df], ignore_index=True)

if to_csv:
    country_emission.to_csv("country_emission.csv")


In [5]:
# Plot bar graph with average CO2 emission reduced by innovative technologies per country
fig = px.bar(country_emission,
    x="Country",
    y="Ewltp (g/km) mean",
    error_y="Ewltp (g/km) std",
    facet_col="year",
    facet_col_wrap=1,
    title="CO2 emission by passenger cars by country in EU from 2019-2021",
    height=800,
    labels={
        "Ewltp (g/km) mean": "CO2 emission WLTP (g/km)"
    },
)
fig.show()


Caption

In [12]:
# Compute data for bar graph with average CO2 emission reduced by innovative technologies per country
country_emission_reduction = pd.DataFrame()
for df in [co2_2019, co2_2020, co2_2021]:
    # Get subset of columns.
    df = df[["Country", "year", "Erwltp (g/km)"]]
    # Group by country and year and compute mean and std.
    df = df.groupby(["Country", "year"]).agg({"Erwltp (g/km)": ['mean', 'std']}).reset_index()
    # Change mutliindex to single index.
    df.columns = df.columns.map(' '.join).str.strip()
    country_emission_reduction = pd.concat([country_emission_reduction, df], ignore_index=True)

if to_csv:
    country_emission_reduction.to_csv("country_emission_reduction.csv")


In [7]:
# Plot bar graph with average CO2 emission reduced by innovative technologies per country
fig = px.bar(country_emission_reduction,
    x="Country",
    y="Erwltp (g/km) mean",
    error_y="Erwltp (g/km) std",
    facet_col="year",
    facet_col_wrap=1,
    title="CO2 emission reduction by country in EU from 2019-2021",
    height=800,
    labels={
        "Erwltp (g/km) mean": "CO2 emission reduction WLTP (g/km)"
    },
)
fig.show()


In [None]:
# Compute pie chart with average CO2 emission per fuel type.
ft_mean_emission = co2_2021.groupby(co2_2021["Ft"].str.lower())["Ewltp (g/km)"].mean()

if to_csv:
    ft_mean_emission.to_csv("ft_mean_emission.csv")


In [8]:
# Plot pie chart with average CO2 emission per fuel type.
fig = px.pie(ft_mean_emission,
    names=ft_mean_emission.index,
    values="Ewltp (g/km)",
    title="CO2 emission by fuel type in 2021",
    hole=0.8,
    labels={
        "Ewltp (g/km)": "CO2 emission WLTP (g/km)"
    },
)
fig.update_layout(showlegend=False)
fig.update_traces(textposition='outside', textinfo="label + percent")
fig.show()


In [None]:
# Compute average emission in EU.
years = []
mean_emission = []
for df in [co2_2017, co2_2018, co2_2019, co2_2020, co2_2021]:
    mean_emission.append(df["Ewltp (g/km)"].mean())
    years.append(df["year"][0])

eu_emission = pd.DataFrame({"year": years, "Ewltp (g/km)": mean_emission})

if to_csv:
    eu_emission.to_csv("eu_emission.csv")


In [9]:
# Plot average emission in EU over the years.
fig = px.line(eu_emission, x="year", y="Ewltp (g/km)", title="Average emission over the years in EU.")
fig.update_xaxes(type='category')
fig.show()


# Links
Cleaned dataset: https://www.kaggle.com/datasets/yoshifu/co2-emission-from-cars-2015-2021

GitHub repository: https://github.com/FuYoshi/data_story_project

Original dataset: https://www.eea.europa.eu/data-and-maps/data/co2-cars-emission-18
