In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import plotly.express as px
import plotly.graph_objects as go

In [6]:
df_sales = pd.read_csv('../data/Car_Historical_Data.csv')

In [7]:
df_sales

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
0,Australia,Historical,EV sales,Cars,BEV,2011,Vehicles,49
1,Australia,Historical,EV sales,Cars,BEV,2012,Vehicles,170
2,Australia,Historical,EV sales,Cars,PHEV,2012,Vehicles,80
3,Australia,Historical,EV sales,Cars,PHEV,2013,Vehicles,100
4,Australia,Historical,EV sales,Cars,BEV,2013,Vehicles,190
...,...,...,...,...,...,...,...,...
829,World,Historical,EV sales,Cars,PHEV,2020,Vehicles,970000
830,World,Historical,EV sales,Cars,PHEV,2021,Vehicles,1900000
831,World,Historical,EV sales,Cars,BEV,2021,Vehicles,4600000
832,World,Historical,EV sales,Cars,BEV,2022,Vehicles,7300000


In [8]:
df_sales.isnull

<bound method DataFrame.isnull of         region    category parameter  mode powertrain  year      unit    value
0    Australia  Historical  EV sales  Cars        BEV  2011  Vehicles       49
1    Australia  Historical  EV sales  Cars        BEV  2012  Vehicles      170
2    Australia  Historical  EV sales  Cars       PHEV  2012  Vehicles       80
3    Australia  Historical  EV sales  Cars       PHEV  2013  Vehicles      100
4    Australia  Historical  EV sales  Cars        BEV  2013  Vehicles      190
..         ...         ...       ...   ...        ...   ...       ...      ...
829      World  Historical  EV sales  Cars       PHEV  2020  Vehicles   970000
830      World  Historical  EV sales  Cars       PHEV  2021  Vehicles  1900000
831      World  Historical  EV sales  Cars        BEV  2021  Vehicles  4600000
832      World  Historical  EV sales  Cars        BEV  2022  Vehicles  7300000
833      World  Historical  EV sales  Cars       PHEV  2022  Vehicles  2900000

[834 rows x 8 col

In [9]:
df_sales.isnull().sum()

region        0
category      0
parameter     0
mode          0
powertrain    0
year          0
unit          0
value         0
dtype: int64

In [10]:
df_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 834 entries, 0 to 833
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   region      834 non-null    object
 1   category    834 non-null    object
 2   parameter   834 non-null    object
 3   mode        834 non-null    object
 4   powertrain  834 non-null    object
 5   year        834 non-null    int64 
 6   unit        834 non-null    object
 7   value       834 non-null    int64 
dtypes: int64(2), object(6)
memory usage: 52.3+ KB


In [11]:
df_sales['value'].describe()

count    8.340000e+02
mean     8.141432e+04
std      4.072362e+05
min      1.000000e+00
25%      3.400000e+02
50%      3.100000e+03
75%      2.300000e+04
max      7.300000e+06
Name: value, dtype: float64

In [12]:
df_sales.nunique()

region         36
category        1
parameter       1
mode            1
powertrain      2
year           13
unit            1
value         328
dtype: int64

In [13]:
df_sales.drop(columns=['category', 'mode', 'unit','parameter'], inplace=True)

In [14]:
df_sales

Unnamed: 0,region,powertrain,year,value
0,Australia,BEV,2011,49
1,Australia,BEV,2012,170
2,Australia,PHEV,2012,80
3,Australia,PHEV,2013,100
4,Australia,BEV,2013,190
...,...,...,...,...
829,World,PHEV,2020,970000
830,World,PHEV,2021,1900000
831,World,BEV,2021,4600000
832,World,BEV,2022,7300000


In [15]:
# Create a pivot table to separate BEV and PHEV sales
pivot_table = df_sales.pivot_table(
    values='value',  # The values to be aggregated (sales values)
    index=['region', 'year'],  # Columns to keep as index
    columns='powertrain',  # Separate columns for BEV and PHEV
    aggfunc='sum',  # Aggregation function (sum the sales values)
    fill_value=0,  # Fill NaN values with 0
).reset_index()
pivot_table.columns = ['region', 'year', 'BEV', 'PHEV']
# Group the data by year and sum the BEV and PHEV sales for each year
grouped_pivot = pivot_table.groupby('year')[['BEV', 'PHEV']].sum().reset_index()


In [16]:
grouped_pivot

Unnamed: 0,year,BEV,PHEV
0,2010,17795,779
1,2011,95589,18874
2,2012,145915,137091
3,2013,278215,232106
4,2014,490227,320320
5,2015,798692,612725
6,2016,1071902,763718
7,2017,1739120,1076099
8,2018,3105250,1588395
9,2019,3646510,1501703


#### comparing regions to see the contribution of each in total sales

In [18]:
# Calculating total sales for each region
region_sales = df_sales.groupby('region')['value'].sum()

# Calculating total sales across all regions
total_sales = region_sales.sum()

# Calculating percentage contribution of each region to the total sales
percentage_contribution = (region_sales / total_sales) * 100

df_sales_comparison = pd.DataFrame({'Total Sales': region_sales, 'Percentage Contribution (%)': percentage_contribution})

# Sorting df_sales_comparison in descending order
df_sales_comparison = df_sales_comparison.sort_values(by='Percentage Contribution (%)', ascending=False).reset_index()

In [19]:
# df_sales_comparison = df_sales_comparison.drop(index=0)
df_sales_comparison

Unnamed: 0,region,Total Sales,Percentage Contribution (%)
0,World,26984570,39.741902
1,China,13675150,20.140268
2,Europe,8134460,11.98014
3,EU27,5945789,8.756744
4,USA,3355000,4.941123
5,Germany,2211480,3.256988
6,United Kingdom,1134238,1.670465
7,France,1054016,1.552317
8,Norway,749420,1.103719
9,Sweden,511344,0.753089


In [22]:
fig = go.Figure()
fig.add_trace(go.Bar(x=grouped_pivot['year'],
                y=grouped_pivot['BEV'],
                name='BEV',
                marker_color='#f46036',
               
                     
                ))
fig.add_trace(go.Bar(x=grouped_pivot['year'],
                y=grouped_pivot['PHEV'],
                name='PHEV',
                marker_color='#2e294e',
               ))

fig.update_layout(
    title='EV Sales over Years',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Value',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)

# Style the plot 
fig.update_layout(
    plot_bgcolor='#e5ecf6',  # Set the background color
    font_family='Arial',
    font_color='black',  # Set font color to white
    title_font_family='Arial',
    title_font_color='black',  # Set title font color to white
    xaxis=dict(
        showline=True, linewidth=2, linecolor='white',  # Add x-axis line with specified properties
        tickmode='linear',  # Use linear tick mode
        tickvals=df_sales['year'],  # Set tick values to match the years in your data
        ticktext=df_sales['year'],  # Set tick labels to match the years in your data
        range=[min(df_sales['year']), 2023],  # Set the x-axis range
    ),
    yaxis=dict(showline=True, linewidth=2, linecolor='white'),  # Add y-axis line with specified properties
    yaxis_showgrid=True,  # Show y-axis grid lines
    yaxis_gridwidth=0.5,  # Set y-axis grid width
)
# # Annotate values for years 2018 and 2022
selected_years = [2018, 2022]

for year in selected_years:
    bev_value = grouped_pivot.loc[grouped_pivot['year'] == year, 'BEV'].values[0]
    phev_value = grouped_pivot.loc[grouped_pivot['year'] == year, 'PHEV'].values[0]
    
    fig.add_annotation(
        x=year, y=bev_value, text=f'{bev_value/1e6:.2f}M',
        font=dict(size=10, color='black'), yshift=10,xshift=-20,
        showarrow=True, arrowhead=1, arrowcolor='black'
    )
    
    fig.add_annotation(
        x=year, y=phev_value, text=f'{phev_value/1e6:.2f}M',
        font=dict(size=10, color='black'), yshift=10, xshift=22,
        showarrow=True, arrowhead=1, arrowcolor='black'
    )

fig.show()
# fig.write_image('../data/images/sales_over_years_bev_phev.png', format='png')
fig.write_html('../data/html/sales_over_years_bev_phev.html')


In [23]:
# Filtering and grouping
selected_regions = ['China', 'Europe', 'EU27', 'USA']
filtered_df = df_sales_comparison[df_sales_comparison['region'].isin(selected_regions)]
other_regions = df_sales_comparison[~df_sales_comparison['region'].isin(selected_regions)]

# Create an "Others" row for the pie chart
others_row = pd.DataFrame([{
    'region': 'Others',
    'Total Sales': other_regions['Total Sales'].sum(),
    'Percentage Contribution (%)': other_regions['Percentage Contribution (%)'].sum()
}])

In [25]:
import pandas as pd
import plotly.express as px

# Define the colors
colors = {
    'China': '#464545',
    'USA': '#258ca8',
    'Europe': '#ff7518',
    'EU27': '#f4a259',
    'Others': '#e9e9e9'
}

selected_regions = ['China', 'Europe', 'EU27', 'USA']
filtered_df = df_sales_comparison[df_sales_comparison['region'].isin(selected_regions)]
other_regions = df_sales_comparison[~df_sales_comparison['region'].isin(selected_regions) & (df_sales_comparison['region'] != 'World')]

# Create an "Others" row for the pie chart
others_row = pd.DataFrame([{
    'region': 'Others',
    'Total Sales': other_regions['Total Sales'].sum(),
    'Percentage Contribution (%)': other_regions['Percentage Contribution (%)'].sum()
}])

# Concatenate the DataFrames
filtered_df = pd.concat([filtered_df, others_row], ignore_index=True)

# Create the pie chart using Plotly
fig = px.pie(filtered_df,
             names='region',
             values='Percentage Contribution (%)',
             title='Percentage Contribution of Sales by Region',
             labels={'Percentage Contribution (%)': 'Percentage'},
             hover_data=['Total Sales'],
             hole=0.4)  # Adjust the hole size for a donut chart effect

# Update the layout
fig.update_layout(
    # font_family='Arial',
    # font_color='black',
    # title_font_family='Arial',
    # title_font_color='black',
    legend_title_text='Regions',
    legend=dict(
        x=1, y=0.6,  # Adjust the x and y position of the legend
        bgcolor='white',  # Set the background color of the legend
    ),
     font=dict(size=10) ,
    width=400,  # Set the figure width
    height=400  # Set the figure height
)

# Update the colors of the pie chart segments
fig.update_traces(marker=dict(colors=[colors[region] for region in filtered_df['region']]))
 
fig.show()
fig.write_html('../data/html/donut_chart_sales_share_regions.html')

In [26]:
#reading sales share csv

In [27]:
df_sales_share = pd.read_csv('../data/EV_sales _shareHistorical_Cars.csv')

In [28]:
df_sales_share

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
0,Australia,Historical,EV stock,Cars,BEV,2011,Vehicles,4.900000e+01
1,Australia,Historical,EV stock share,Cars,EV,2011,percent,4.600000e-04
2,Australia,Historical,EV sales share,Cars,EV,2011,percent,6.500000e-03
3,Australia,Historical,EV sales,Cars,BEV,2011,Vehicles,4.900000e+01
4,Australia,Historical,EV sales,Cars,BEV,2012,Vehicles,1.700000e+02
...,...,...,...,...,...,...,...,...
2771,World,Historical,Oil displacement Mbd,Cars,EV,2022,Milion barrels per day,4.300000e-01
2772,World,Historical,"Oil displacement, million lge",Cars,EV,2022,"Oil displacement, million lge",2.500000e+04
2773,World,Historical,EV sales,Cars,BEV,2022,Vehicles,7.300000e+06
2774,World,Historical,EV sales share,Cars,EV,2022,percent,1.400000e+01


In [29]:
df_sales_share_ev = df_sales_share[ ( df_sales_share['powertrain'] == 'EV' ) & ( df_sales_share['unit'] == 'percent' ) & (df_sales_share['parameter'] == 'EV sales share' )]

In [30]:
df_sales_share_ev

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
2,Australia,Historical,EV sales share,Cars,EV,2011,percent,0.0065
5,Australia,Historical,EV sales share,Cars,EV,2012,percent,0.0300
14,Australia,Historical,EV sales share,Cars,EV,2013,percent,0.0340
17,Australia,Historical,EV sales share,Cars,EV,2014,percent,0.1600
26,Australia,Historical,EV sales share,Cars,EV,2015,percent,0.2000
...,...,...,...,...,...,...,...,...
2735,World,Historical,EV sales share,Cars,EV,2018,percent,2.3000
2744,World,Historical,EV sales share,Cars,EV,2019,percent,2.6000
2755,World,Historical,EV sales share,Cars,EV,2020,percent,4.2000
2759,World,Historical,EV sales share,Cars,EV,2021,percent,8.7000


In [31]:
df_sales_share_ev_sorted = df_sales_share_ev.sort_values(by='value', ascending=False)

In [32]:
df_sales_share_ev_sorted

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
1813,Norway,Historical,EV sales share,Cars,EV,2022,percent,88.00000
1804,Norway,Historical,EV sales share,Cars,EV,2021,percent,86.00000
1801,Norway,Historical,EV sales share,Cars,EV,2020,percent,75.00000
1136,Iceland,Historical,EV sales share,Cars,EV,2021,percent,72.00000
1139,Iceland,Historical,EV sales share,Cars,EV,2022,percent,70.00000
...,...,...,...,...,...,...,...,...
2040,Rest of the world,Historical,EV sales share,Cars,EV,2013,percent,0.00120
1532,Mexico,Historical,EV sales share,Cars,EV,2013,percent,0.00088
2024,Rest of the world,Historical,EV sales share,Cars,EV,2011,percent,0.00033
1522,Mexico,Historical,EV sales share,Cars,EV,2011,percent,0.00026


In [35]:
# Filter the data for years 2018 to 2022
filtered_data = df_sales_share[(df_sales_share['year'] >= 2018) & (df_sales_share['year'] <= 2022)]

# Filter the top regions
top_regions = ['China', 'USA', 'Europe']

# Create the bar plot using Plotly
fig = go.Figure()

colors = {
    'China': '#464545',
    'USA': '#258ca8',
    'Europe': '#ff7518'
}

for region in top_regions:
    region_data = filtered_data[(filtered_data['region'] == region) & (filtered_data['parameter'] == 'EV sales share')]
    
    fig.add_trace(go.Bar(
        x=region_data['year'],  # Use years for x-axis
        y=region_data['value'],
        name=region,
        marker_color=colors[region],  # Set color for the legend
    ))

# Update layout
fig.update_layout(
    title='Sales Share by Region ',
    # xaxis_title='Year',  # Set x-axis title to Year
    yaxis=dict(
        title='Sales Share',
        titlefont=dict(color='rgb(55, 83, 109)'),
        tickfont=dict(color='rgb(55, 83, 109)'),
    ),
    legend_title_text='Regions',  # Set legend title to Region
    xaxis=dict(tickangle=-45),  # Rotate x-axis tick labels for better visibility
       font=dict(size=10) ,
    # width=400,  # Set the figure width
    # height=400  # Set the figure height
)


# Show the plot
fig.show()
fig.write_html('../data/html/bars_sales_shares_regions.html')



In [90]:
# Filter the data for years 2018 to 2022
filtered_data = df_sales_share[(df_sales_share['year'] >= 2018) & (df_sales_share['year'] <= 2022)]

# Filter the top regions
top_regions = ['China', 'USA', 'Europe']

# Create the bar plot using Plotly
fig = go.Figure()

colors = {
    'China': '#464545',
    'USA': '#258ca8',
    'Europe': '#ff7518'
}

for region in top_regions:
    region_data = filtered_data[(filtered_data['region'] == region) & (filtered_data['parameter'] == 'EV sales share')]
    
    fig.add_trace(go.Bar(
        x=region_data['year'],  # Use years for x-axis
        y=region_data['value'],
        name=region,
        marker_color=colors[region],  # Set color for the legend
    ))

# Update layout for aesthetics
fig.update_layout(
    title='Sales Share by Region',
    # xaxis_title='',  # Set x-axis title to Year
    yaxis=dict(
        title='Sales Share',
        titlefont=dict(color='rgb(55, 83, 109)'),
        tickfont=dict(color='rgb(55, 83, 109)'),
    ),
    legend_title_text='Regions',  # Set legend title to Region
    xaxis=dict(
        tickangle=-45,  # Rotate x-axis tick labels for better visibility
        tickvals=filtered_data['year'].unique(),  # Specify the x-axis tick positions
        ticktext=filtered_data['year'].unique()  # Specify the labels for the tick positions
    ),
    font=dict(size=10),
    width=400,  # Set the figure width
    height=400  # Set the figure height
)

# Show the plot
fig.show()
fig.write_html('../data/html/bars_sales_shares_regions.html')
