In [None]:
# Import necessary libraries 
import pandas as pd
import plotly.express as px
import plotly.colors as pc

In [None]:
# Load merged dataset
joined_df = pd.read_csv('../data/merged_dataset.csv')

In [None]:
# Exclude European Union data points
point_to_exclude = joined_df[joined_df['geo'] == 'European Union'].index
joined_df = joined_df.drop(index=point_to_exclude)

Unnamed: 0.1,Unnamed: 0,geo,time_period,amount_passenger_cars,amount_zev,amount_ev,share_zev,emission_per_capita,zev_per_capita,ev_per_capita,total_emissions,zev_diff,ev_diff,emmission_diff,all_cars_diff,zev_norm,ev_norm,emmission_norm,all_cars_norm
0,0,Austria,2014,4694921,3389,4691532,0.000722,8.4,0.000398,0.551440,71465402.4,,,,,,,,
1,1,Austria,2015,4748048,5038,4743010,0.001061,8.7,0.000587,0.552481,74688856.2,1649.0,51478.0,3223453.8,53127.0,0.486574,0.010973,0.045105,0.011316
2,2,Austria,2016,4821557,9086,4812471,0.001884,8.6,0.001044,0.553128,74824050.6,4048.0,69461.0,135194.4,73509.0,0.803493,0.014645,0.001810,0.015482
3,3,Austria,2017,4898578,14637,4883941,0.002988,9.3,0.001668,0.556710,81587644.5,5551.0,71470.0,6763593.9,77021.0,0.610940,0.014851,0.090393,0.015974
4,4,Austria,2018,4978852,20855,4957997,0.004189,9.8,0.002364,0.561987,86458216.6,6218.0,74056.0,4870572.1,80274.0,0.424814,0.015163,0.059697,0.016387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,255,Sweden,2019,4887116,30382,4856734,0.006217,1.2,0.002970,0.474745,12276222.0,13677.0,3460.0,-3916165.2,17137.0,0.818737,0.000713,-0.241852,0.003519
276,256,Sweden,2020,4943293,55829,4887464,0.011294,0.4,0.005406,0.473243,4131035.6,25447.0,30730.0,-8145186.4,56177.0,0.837568,0.006327,-0.663493,0.011495
277,257,Sweden,2021,4985979,110221,4875758,0.022106,0.5,0.010619,0.469758,5189647.5,54392.0,-11706.0,1058611.9,42686.0,0.974261,-0.002395,0.256258,0.008635
278,258,Sweden,2022,4979761,197751,4782010,0.039711,0.6,0.018919,0.457507,6271395.6,87530.0,-93748.0,1081748.1,-6218.0,0.794132,-0.019227,0.208443,-0.001247


In [None]:
# Construct difference in share of emission vehicles
joined_df['share_ev'] = joined_df['amount_ev'] / joined_df['amount_passenger_cars']
joined_df['share_ev_diff'] = joined_df.groupby('geo')['share_ev'].transform(lambda x: x - x.shift(1))

In [None]:
# Use only the relevant colomns and drop rows containing NaNs
df_cleaned = joined_df[['share_ev_diff', 'geo', 'emmission_norm']].dropna()

In [None]:
# To make sure there are enough different colors for each country
colors = pc.qualitative.Alphabet + pc.qualitative.Dark24 + pc.qualitative.Light24

# Plot the scatter to answer sub question 1
fig = px.scatter(
    df_cleaned,
    x='share_ev_diff',
    y='emmission_norm',
    color='geo',
    labels={'share_ev_diff': 'Change in Share of EV', 'emmission_norm': 'Relative change in Total Emmissions', 'geo':'Countries'},
    trendline='ols',
    title="Scatter Plot of change in Share of Emission Vehicles vs Relative change in Emission",
    color_discrete_sequence=colors,
    trendline_scope='overall'
)

fig.update_layout(
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.5
    )
)

fig.show()