In [1]:
import sys

# Install a pip package in the current Jupyter kernel
!{sys.executable} -m pip install pandas matplotlib scikit-learn xgboost

# Install a conda package in the current Jupyter kernel
#!conda install --yes --prefix {sys.prefix} pandas matplotlib scikit-learn xgboost

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com



[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd

from electric_cars.view import plot_column
from electric_cars.utils import load_data, preview_table, describe_column, fit_predicting_model, make_prediction, stack_tables, join_tables, apply_calculation_to_row, aggregate_sum

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Assumptions

Here are some key assumptions that you may potentially need for the modeling:
- In France, about 20% of the electric vehicle charging needs are covered through home charging outlets.
- Vehicle traffic in France is heavily dependent on the time of day. It can be considered that it is doubled during the following time slots: 6:30-9:30, 12:00-14:00, 16:30-19:30.
- There is no competition between the various players on the selling price of electricity.
- The average speed for a home - work journey is 45 km/h (about 28 mph).
- The choice of charging stations in an area does not depend on the characteristics of the available stations.
- A French person uses their car on average 4 times a week.

# Available functions

Here is the list of functions made available to you:
- `load_data()`: loads the data from the csv files
- `preview_table(table)`: prints the first 5 rows of the table
- `describe_column(table, column_name)`: prints the description of the column
- `plot_column(table, column_name, zone_name)`: plots column_name for zone_name
- `fit_predicting_model(table, features, target, per_zone, model_type)`: fits a model on the table and returns it. The available model_types are "linear_regression", "decision_tree", "xgboost". per_zone = True will fit one independent model per zone while per_zone = False will fit the same model on all zones.
- `make_prediction(table, features, target, per_zone, model)`: makes a prediction on the table using the model. per_zone = True if you fitted independent models per zone, else False.
- `stack_tables(table_1, table_2, stack_type)`: stacks table_1 and table_2 either vertically or horizontally. Stack_type can be "vertical" or "horizontal".
- `join_tables(table_1, table_2, join_key, join_type)`: joins table_1 and table_2 on the join_key. Join_type can be "left", "right", "outer" or "inner".
- `apply_calculation_to_row(table, new_column_name, function, columns)`: applies the function to the columns and creates a new column named new_column_name
- `aggregate_sum(table, column_name, new_column_name)`: aggregates the column_name by summing it and creates a new column named new_column_name

# 0. Loading data

In [3]:
table_1, table_2, table_3, table_4, table_5 = load_data()

# I. Data exploration


In [4]:
preview_table(table_1)

Unnamed: 0,Date,Zone Name,Rainy Day,Population,Median Salary (EUR),Shops per 1k hab,Hospitals per 1M hab,Childcare per 100 children under 3 y.o.,Public_Transport_Quality,Infrastructure_Quality,Traffic,weekend
0,2012-01-01,Zone A,0,37489.3834,2028.4367,46.9499,45.8929,15.8448,0.063,0.1187,18693.0,1
1,2012-01-02,Zone A,1,37489.4073,2028.4421,46.9491,45.8939,15.8445,0.063,0.1187,18337.0,0
2,2012-01-03,Zone A,1,37489.4312,2028.4474,46.9483,45.8948,15.8442,0.063,0.1187,18337.0,0
3,2012-01-04,Zone A,1,37489.4551,2028.4528,46.9475,45.8958,15.8439,0.063,0.1187,18337.0,0
4,2012-01-05,Zone A,0,37489.479,2028.4581,46.9468,45.8967,15.8436,0.0631,0.1188,17803.0,0
5,2012-01-06,Zone A,0,37489.5029,2028.4635,46.946,45.8977,15.8433,0.0631,0.1188,17803.0,0
6,2012-01-07,Zone A,0,37489.5268,2028.4688,46.9452,45.8987,15.843,0.0631,0.1188,18693.0,1
7,2012-01-08,Zone A,1,37489.5507,2028.4742,46.9445,45.8996,15.8427,0.0631,0.1188,19253.0,1
8,2012-01-09,Zone A,1,37489.5746,2028.4796,46.9437,45.9006,15.8424,0.0631,0.1188,18337.0,0
9,2012-01-10,Zone A,1,37489.5985,2028.4849,46.9429,45.9015,15.842,0.0631,0.1188,18337.0,0


# I. Data analysis