# Development Notebook

In this Notebook we will develop the functions used in our Streamlit EV Adoption Tracker APP

In [16]:
import pandas as pd

In [17]:
ev_url = "https://api.iea.org/evs?parameters=EV%20sales&category=Historical&mode=Cars&csv=true"

In [None]:
ev_df = pd.read_csv(ev_url)
ev_df.head()


In [None]:
# loop through the columns and display all the unique classes for categorical columns and basic stats for numerical columns
for col in ev_df.columns:
    print(f'Column: {col}')
    if ev_df[col].dtype == "object":
        print(f'{col}:{ev_df[col].unique()}')
        print("\n")
    else:
        print(f'{col}:{ev_df[col].describe()}')
        print("\n")


In [20]:
# drop the columns that are not needed category, mode unit
ev_df = ev_df.drop(columns=["category", "mode", "unit"])

In [21]:
# budling a filter to datafram where parameter is EV sales
ev_sales_df = ev_df[ev_df["parameter"] == "EV sales"]
ev_sales_share_df = ev_df[ev_df["parameter"] == "EV sales share"]

#drop the parameter column on both dataframes
ev_df = ev_df.drop(columns=["parameter"])
ev_sales_share_df = ev_sales_share_df.drop(columns=["parameter"])

In [None]:
ev_sales_df.head()

In [23]:
# drop the powertrain column
ev_sales_share_df = ev_sales_share_df.drop(columns=["powertrain"])


In [None]:
ev_sales_share_df.head()

In [25]:
# create a function that load the data from the a specific url, bulding a filter of sales share and sales per region an creates two dataframes
def load_data():
    url = "https://api.iea.org/evs?parameters=EV%20sales&category=Historical&mode=Cars&csv=true"
    ev_df = pd.read_csv(url)
    ev_sales_df = ev_df[ev_df["parameter"] == "EV sales"]
    ev_sales_share_df = ev_df[ev_df["parameter"] == "EV sales share"]
    #drop the columns that are not needed from both dataframes category, mode, unit and build a filter where parameter is EV sales and EV sales share
    ev_sales_df = ev_sales_df.drop(columns=["category", "mode", "unit", "parameter"])
    ev_sales_share_df = ev_sales_share_df.drop(columns=["category", "mode", "unit", "parameter", "powertrain"])
    return ev_sales_df, ev_sales_share_df

In [26]:
(a,b) = load_data()

In [None]:
display(a.head())
display(b.head())

### top sales by country
this function will take in the sales df and return a sorted df with the top 10 countries with most sales

In [28]:
# filter the df to only include the year 2023
ev_sales_df_2023 = ev_sales_df[ev_sales_df["year"] == year]

# drop the year column
ev_sales_df_2023 = ev_sales_df_2023.drop(columns=["year"])

# filter out regions that are not countries: World, OECD, G20, G7, EU, EU27, EU28, Rest of World, Europe, Asia, North America, South America, Africa, Middle East, Oceania
ev_sales_df_2023 = ev_sales_df_2023[~ev_sales_df_2023["region"].isin(["World", "OECD", "G20", "G7", "EU", "EU27", "EU28", "Rest of World", "Europe", "Asia", "North America", "South America", "Africa", "Middle East", "Oceania"])]

# aggregate the sales by region
ev_sales_df_2023_agg = ev_sales_df_2023.groupby("region")["value"].sum().reset_index()

In [None]:
#rename the region column to country
ev_sales_df_2023_agg = ev_sales_df_2023_agg.rename(columns={"region": "country"})

# rename the column value to sales
ev_sales_df_2023_agg = ev_sales_df_2023_agg.rename(columns={"value": "sales"})

#convert the sales column into millions
ev_sales_df_2023_agg["sales_(m)"] = ev_sales_df_2023_agg["sales"] / 1000000

#round the sales column to 2 decimal places
ev_sales_df_2023_agg["sales_(m)"] = ev_sales_df_2023_agg["sales_(m)"].round(2)

#drop the sales column
ev_sales_df_2023_agg = ev_sales_df_2023_agg.drop(columns=["sales"])

#sort the df by the sales column in descending order
ev_sales_df_2023_agg = ev_sales_df_2023_agg.sort_values(by="sales_(m)", ascending=False)

#reset the index and make it start from 1 
ev_sales_df_2023_agg = ev_sales_df_2023_agg.reset_index(drop=True)
ev_sales_df_2023_agg.index = ev_sales_df_2023_agg.index + 1

# return the top 10 countries
return ev_sales_df_2023_agg.head(10)

In [None]:
def top10_sales_by_country(ev_sales_df, year=2023):

    # filter the df to only include the year 2023
    ev_sales_df_2023 = ev_sales_df[ev_sales_df["year"] == year]

    # drop the year column
    ev_sales_df_2023 = ev_sales_df_2023.drop(columns=["year"])

    # filter out regions that are not countries: World, OECD, G20, G7, EU, EU27, EU28, Rest of World, Europe, Asia, North America, South America, Africa, Middle East, Oceania
    ev_sales_df_2023 = ev_sales_df_2023[~ev_sales_df_2023["region"].isin(["World", "OECD", "G20", "G7", "EU", "EU27", "EU28", "Rest of World", "Europe", "Asia", "North America", "South America", "Africa", "Middle East", "Oceania"])]

    # aggregate the sales by region
    ev_sales_df_2023_agg = ev_sales_df_2023.groupby("region")["value"].sum().reset_index()
    #rename the region column to country
    ev_sales_df_2023_agg = ev_sales_df_2023_agg.rename(columns={"region": "country"})
    # rename the column value to sales
    ev_sales_df_2023_agg = ev_sales_df_2023_agg.rename(columns={"value": "sales"})

    #convert the sales column into millions
    ev_sales_df_2023_agg["sales_(m)"] = ev_sales_df_2023_agg["sales"] / 1000000

    #round the sales column to 2 decimal places
    ev_sales_df_2023_agg["sales_(m)"] = ev_sales_df_2023_agg["sales_(m)"].round(2)

    #drop the sales column
    ev_sales_df_2023_agg = ev_sales_df_2023_agg.drop(columns=["sales"])

    #sort the df by the sales column in descending order
    ev_sales_df_2023_agg = ev_sales_df_2023_agg.sort_values(by="sales_(m)", ascending=False)

    #reset the index and make it start from 1 
    ev_sales_df_2023_agg = ev_sales_df_2023_agg.reset_index(drop=True)
    ev_sales_df_2023_agg.index = ev_sales_df_2023_agg.index + 1

    # return the top 10 countries
    return ev_sales_df_2023_agg.head(10)