In [15]:
# Import packages
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

In [16]:
# Retrieve dataset data.
chunk_size = 1000000
dtypes = {"Country": str, "Mk": str, "Cn": str, "m (kg)": float, "Enedc (g/km)": float, "Ewltp (g/km)": float, "W (mm)": float, "Ft": str, "Ernedc (g/km)": float, "Erwltp (g/km)": float, "year": int}
path = "/Users/popkesnoek/INVIS/CO2_data_clean.csv"
chunk_container = pd.read_csv(path, dtype=dtypes, chunksize=chunk_size)

# Split the dataset based on year.
co2_2015 = pd.DataFrame()
co2_2016 = pd.DataFrame()
co2_2017 = pd.DataFrame()
co2_2018 = pd.DataFrame()
co2_2019 = pd.DataFrame()
co2_2020 = pd.DataFrame()
co2_2021 = pd.DataFrame()
for chunk in chunk_container:
    co2_2015 = pd.concat([co2_2015, chunk[chunk["year"] == 2015]], ignore_index=True)
    co2_2016 = pd.concat([co2_2016, chunk[chunk["year"] == 2016]], ignore_index=True)
    co2_2017 = pd.concat([co2_2017, chunk[chunk["year"] == 2017]], ignore_index=True)
    co2_2018 = pd.concat([co2_2018, chunk[chunk["year"] == 2018]], ignore_index=True)
    co2_2019 = pd.concat([co2_2019, chunk[chunk["year"] == 2019]], ignore_index=True)
    co2_2020 = pd.concat([co2_2020, chunk[chunk["year"] == 2020]], ignore_index=True)
    co2_2021 = pd.concat([co2_2021, chunk[chunk["year"] == 2021]], ignore_index=True)


ValueError: could not convert string to float: '1452    q2.0'

DATASET AND PREPROCESSING

In [5]:
# Compute data for bar graph with average CO2 emission per country
country_emission = pd.DataFrame()
for df in [co2_2015, co2_2016, co2_2017, co2_2018, co2_2019, co2_2020, co2_2021]:
    # Get subset of columns.
    df = df[["Country", "year", "Ewltp (g/km)"]]
    # Group by country and year and compute mean and std.
    df = df.groupby(["Country", "year"]).agg({"Ewltp (g/km)": ['mean', 'std']}).reset_index()
    # Change mutliindex to single index.
    df.columns = df.columns.map(' '.join).str.strip()
    country_emission = pd.concat([country_emission, df], ignore_index=True)



KeyError: "None of [Index(['Country', 'year', 'Ewltp (g/km)'], dtype='object')] are in the [columns]"

In [6]:
# Plot bar graph with average CO2 emission reduced by innovative technologies per country
fig = px.bar(country_emission,
    x="Country",
    y="Ewltp (g/km) mean",
    error_y="Ewltp (g/km) std",
    facet_col="year",
    facet_col_wrap=2,
    title="CO2 emission by passenger cars by country in EU from 2019-2021",
    height=800,
    labels={
        "Ewltp (g/km) mean": "CO2 emission WLTP (g/km)"
    },
)
fig.show()


ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of [] but received: Country

In [7]:
# Compute data for bar graph with average CO2 emission reduced by innovative technologies per country
country_emission_reduction = pd.DataFrame()
for df in [co2_2019, co2_2020, co2_2021]:
    # Get subset of columns.
    df = df[["Country", "year", "Erwltp (g/km)"]]
    # Group by country and year and compute mean and std.
    df = df.groupby(["Country", "year"]).agg({"Erwltp (g/km)": ['mean', 'std']}).reset_index()
    # Change mutliindex to single index.
    df.columns = df.columns.map(' '.join).str.strip()
    country_emission_reduction = pd.concat([country_emission_reduction, df], ignore_index=True)


KeyError: "None of [Index(['Country', 'year', 'Erwltp (g/km)'], dtype='object')] are in the [columns]"

In [8]:
# Plot bar graph with average CO2 emission reduced by innovative technologies per country
fig = px.bar(country_emission_reduction,
    x="Country",
    y="Erwltp (g/km) mean",
    error_y="Erwltp (g/km) std",
    facet_col="year",
    facet_col_wrap=2,
    title="CO2 emission reduction by country in EU from 2019-2021",
    height=800,
    labels={
        "Erwltp (g/km) mean": "CO2 emission reduction WLTP (g/km)"
    },
)
fig.show()


ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of [] but received: Country

In [9]:
# Pie chart with average CO2 emission per fuel type.
ft_mean_emission = co2_2021.groupby(co2_2021["Ft"].str.lower())["Ewltp (g/km)"].mean()

fig = px.pie(ft_mean_emission,
    names=ft_mean_emission.index,
    values="Ewltp (g/km)",
    title="CO2 emission by fuel type in 2021",
    hole=0.8,
    labels={
        "Ewltp (g/km)": "CO2 emission WLTP (g/km)"
    },
)
fig.update_layout(showlegend=False)
fig.update_traces(textposition='outside', textinfo="label + percent")
fig.show()


KeyError: 'Ft'

In [10]:
# Average emission in EU.
years = []
mean_emission = []
for df in [co2_2017, co2_2018, co2_2019, co2_2020, co2_2021]:
    mean_emission.append(df["Ewltp (g/km)"].mean())
    years.append(df["year"][0])

line_df = pd.DataFrame({"year": years, "Ewltp (g/km)": mean_emission})
fig = px.line(line_df, x="year", y="Ewltp (g/km)", title="Average emission over the years in EU.")
fig.update_xaxes(type='category')
fig.show()


KeyError: 'Ewltp (g/km)'

BY CAR BRAND EMISSION CO2 SCATTERPLOT

In [11]:
bycarbrand_2015 = co2_2015[['Mk', 'Enedc (g/km)', 'Ernedc (g/km)','Erwltp (g/km)','Ewltp (g/km)', 'year']]
bycarbrand_2016 = co2_2016[['Mk', 'Enedc (g/km)', 'Ernedc (g/km)','Erwltp (g/km)','Ewltp (g/km)', 'year']]
bycarbrand_2017 = co2_2017[['Mk', 'Enedc (g/km)', 'Ernedc (g/km)','Erwltp (g/km)','Ewltp (g/km)', 'year']]
bycarbrand_2018 = co2_2018[['Mk', 'Enedc (g/km)', 'Ernedc (g/km)','Erwltp (g/km)','Ewltp (g/km)', 'year']]
bycarbrand_2019 = co2_2019[['Mk', 'Enedc (g/km)', 'Ernedc (g/km)','Erwltp (g/km)','Ewltp (g/km)', 'year']]
bycarbrand_2020 = co2_2020[['Mk', 'Enedc (g/km)', 'Ernedc (g/km)','Erwltp (g/km)','Ewltp (g/km)', 'year']]
bycarbrand_2021 = co2_2021[['Mk', 'Enedc (g/km)', 'Ernedc (g/km)','Erwltp (g/km)','Ewltp (g/km)', 'year']]
# grouped2015car = bycarbrand_2015.groupby(['Mk']).mean()

bycarbrand_2016 = bycarbrand_2016.sort_values(['Mk'], ascending=False)
bycarbrand_2015 = bycarbrand_2015.sort_values(['Mk'], ascending=False)
bycarbrand_2017 = bycarbrand_2017.sort_values(['Mk'], ascending=False)
bycarbrand_2018 = bycarbrand_2018.sort_values(['Mk'], ascending=False)
bycarbrand_2019 = bycarbrand_2019.sort_values(['Mk'], ascending=False)
bycarbrand_2020 = bycarbrand_2020.sort_values(['Mk'], ascending=False)
bycarbrand_2021 = bycarbrand_2021.sort_values(['Mk'], ascending=False)


frames = [bycarbrand_2015, bycarbrand_2016, bycarbrand_2017, bycarbrand_2018, bycarbrand_2019, bycarbrand_2020, bycarbrand_2021]
result = pd.concat(frames)


KeyError: "None of [Index(['Mk', 'Enedc (g/km)', 'Ernedc (g/km)', 'Erwltp (g/km)', 'Ewltp (g/km)',\n       'year'],\n      dtype='object')] are in the [columns]"

In [12]:

df_mk = result.groupby['Mk'].mean()
result.head(10)

NameError: name 'result' is not defined

In [13]:
fig = px.scatter_3d(result, x=result[1], y='year', z='Enedc (g/km)', color='Mk')
fig.show()


NameError: name 'result' is not defined