In [130]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import plotly.colors as pc

In [None]:
# Load dataset
joined_df = pd.read_csv('../data/merged_dataset.csv')

In [131]:
# Exclude European Union
point_to_exclude = joined_df[joined_df['geo'] == 'European Union'].index
joined_df = joined_df.drop(index=point_to_exclude)
joined_df

Unnamed: 0.1,Unnamed: 0,geo,time_period,amount_passenger_cars,amount_zev,amount_ev,share_zev,emission_per_capita,zev_per_capita,ev_per_capita,total_emissions,zev_diff,ev_diff,emmission_diff,all_cars_diff,zev_norm,ev_norm,emmission_norm,all_cars_norm,share_zev_diff
0,0,Austria,2014,4694921,3389,4691532,0.000722,8.4,0.000398,0.551440,71465402.4,,,,,,,,,
1,1,Austria,2015,4748048,5038,4743010,0.001061,8.7,0.000587,0.552481,74688856.2,1649.0,51478.0,3223453.8,53127.0,0.486574,0.010973,0.045105,0.011316,0.000339
2,2,Austria,2016,4821557,9086,4812471,0.001884,8.6,0.001044,0.553128,74824050.6,4048.0,69461.0,135194.4,73509.0,0.803493,0.014645,0.001810,0.015482,0.000823
3,3,Austria,2017,4898578,14637,4883941,0.002988,9.3,0.001668,0.556710,81587644.5,5551.0,71470.0,6763593.9,77021.0,0.610940,0.014851,0.090393,0.015974,0.001104
4,4,Austria,2018,4978852,20855,4957997,0.004189,9.8,0.002364,0.561987,86458216.6,6218.0,74056.0,4870572.1,80274.0,0.424814,0.015163,0.059697,0.016387,0.001201
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,255,Sweden,2019,4887116,30382,4856734,0.006217,1.2,0.002970,0.474745,12276222.0,13677.0,3460.0,-3916165.2,17137.0,0.818737,0.000713,-0.241852,0.003519,0.002787
276,256,Sweden,2020,4943293,55829,4887464,0.011294,0.4,0.005406,0.473243,4131035.6,25447.0,30730.0,-8145186.4,56177.0,0.837568,0.006327,-0.663493,0.011495,0.005077
277,257,Sweden,2021,4985979,110221,4875758,0.022106,0.5,0.010619,0.469758,5189647.5,54392.0,-11706.0,1058611.9,42686.0,0.974261,-0.002395,0.256258,0.008635,0.010812
278,258,Sweden,2022,4979761,197751,4782010,0.039711,0.6,0.018919,0.457507,6271395.6,87530.0,-93748.0,1081748.1,-6218.0,0.794132,-0.019227,0.208443,-0.001247,0.017605


In [132]:
joined_df.columns

Index(['Unnamed: 0', 'geo', 'time_period', 'amount_passenger_cars',
       'amount_zev', 'amount_ev', 'share_zev', 'emission_per_capita',
       'zev_per_capita', 'ev_per_capita', 'total_emissions', 'zev_diff',
       'ev_diff', 'emmission_diff', 'all_cars_diff', 'zev_norm', 'ev_norm',
       'emmission_norm', 'all_cars_norm', 'share_zev_diff'],
      dtype='object')

In [133]:
joined_df['share_zev_diff'] = joined_df.groupby('geo')['share_zev'].transform(lambda x: x - x.shift(1))

In [134]:
# Group databases by country
groups = joined_df.groupby('geo')

In [135]:
df_cleaned = joined_df[['share_zev_diff', 'ev_diff', 'emmission_diff', 'geo', 'time_period', 'all_cars_diff', 'zev_diff', 'ev_norm', 'zev_norm', 'emmission_norm', 'all_cars_norm']].dropna()

# Convert to numeric in case they are not
df_cleaned['ev_diff'] = pd.to_numeric(df_cleaned['ev_diff'], errors='coerce')
df_cleaned['emmission_diff'] = pd.to_numeric(df_cleaned['emmission_diff'], errors='coerce')
df_cleaned['all_cars_diff'] = pd.to_numeric(df_cleaned['all_cars_diff'], errors='coerce')

# Drop any remaining NaN values after conversion
df_cleaned = df_cleaned.dropna()


In [136]:
df_cleaned.head()

Unnamed: 0,share_zev_diff,ev_diff,emmission_diff,geo,time_period,all_cars_diff,zev_diff,ev_norm,zev_norm,emmission_norm,all_cars_norm
1,0.000339,51478.0,3223453.8,Austria,2015,53127.0,1649.0,0.010973,0.486574,0.045105,0.011316
2,0.000823,69461.0,135194.4,Austria,2016,73509.0,4048.0,0.014645,0.803493,0.00181,0.015482
3,0.001104,71470.0,6763593.9,Austria,2017,77021.0,5551.0,0.014851,0.61094,0.090393,0.015974
4,0.001201,74056.0,4870572.1,Austria,2018,80274.0,6218.0,0.015163,0.424814,0.059697,0.016387
5,0.001678,51987.0,-1413976.6,Austria,2019,60696.0,8709.0,0.010485,0.417598,-0.016354,0.012191


In [None]:
colors = pc.qualitative.Alphabet + pc.qualitative.Dark24 + pc.qualitative.Light24

fig = px.scatter(
    df_cleaned,
    x='share_zev_diff',
    y='emmission_diff',
    color='geo',
    #symbol='time_period',
    labels={'share_zev_diff': 'Change in Share of ZEV', 'emmission_diff': 'Change in Total Emmissions', 'geo':'Countries'},
    trendline='ols',
    title="Scatter Plot of change in Share ZEV vs change in Emission",
    color_discrete_sequence=colors,
    trendline_scope='overall'
)

fig.update_layout(
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.5
    )
)

fig.show()