In [1]:
# open questions: 
# - filter out all data that got added beyond march 2020
# - We sparse data with the sqkm transformation and add a variance of about 3%
# - The definition of EV can be quite different. In our case we look at battery-powered electric cars as well as plug-in hybrid vehicles.
# --------------
# Import the Data
# --------------
import os
import pandas as pd
import numpy as np
import locale
from locale import atof
import math



In [2]:
# March 2020
population_df = pd.read_csv('data/population_density.csv', sep=',', encoding='latin-1', header=0)

# August 2022 
charger_df = pd.read_csv('data/charger_locations.csv', sep=';', encoding='ISO-8859-1', header=10, skipinitialspace=True)

In [3]:
# remove unnecessary columns
charger_df = charger_df[['Breitengrad', 'Längengrad']]

# rename columns for readability
charger_df = charger_df.rename(columns={'Breitengrad': 'ch_latitude', 'Längengrad': 'ch_longitude'})
population_df = population_df.rename(columns={'Lat': 'pop_latitude', 'Lon': 'pop_longitude', 'Population': 'pop_density'})

In [4]:
# Fix wrong values
def preprocess_lat_long(data, column):

  data[column] = data[column].apply(lambda x: x.replace(',','.'))
  data[column] = data[column].apply(lambda x: x.replace(',','.'))

  return data[column]

preprocess_lat_long(charger_df, 'ch_latitude')
preprocess_lat_long(charger_df, 'ch_longitude')

def preprocess_rstrip(data, column): # remove very last dot in a value in 'Breitengrad' column

  data[column] = data[column].apply(lambda x: x.rstrip('.'))

preprocess_rstrip(charger_df, 'ch_latitude')

In [5]:
# Convert charge location data types to float 64
locale.setlocale(locale.LC_NUMERIC, '')
charger_df['ch_latitude'] = charger_df['ch_latitude'].apply(atof).astype(np.float64)
charger_df['ch_longitude'] = charger_df['ch_longitude'].apply(atof).astype(np.float64)

In [6]:
# check for null values
print(charger_df.isnull().sum())
print(population_df.isnull().sum())

ch_latitude     0
ch_longitude    0
dtype: int64
pop_latitude     0
pop_longitude    0
pop_density      0
dtype: int64


In [7]:
# Rough shape of germany (in latitude and longitude)
# north: 55.060673
# east: 15.041248
# south: 47.259456
# west: 5.857632
# ====
# Longitude: 47.259456 - 55.060673
# Latitude: 5.857632 - 15.041248
# ====
# print("latitude", 113, "sample_num", round_float(5.857632, 3))
# print("longitude", 40075 * (math.cos(math.radians(5.857632)) / 360), "sample_num", round_float(47.259456, 3)) # 110.73819622579707
# print("longitude", 40075 * (math.cos(math.radians(15.041248 )) / 360), "sample_num", round_float(55.259456, 3)) # 107.5055566430472

longitude_border_south = 47.259456
longitude_border_north = 55.060673
latitude_border_west = 5.857632
latitude_border_east = 15.041248
degree_to_km_latitude = 113 # 1 Degree equals 113 km => having 2 decimal places will enable us to calculate roughly 1 km accuracy
degree_to_km_longitude_min = 40075 * (math.cos(math.radians(5.857632)) / 360) # 110.73819622579707 => having 2 decimal places will be roughly 1 km accuracy
degree_to_km_longitude_max = 40075 * (math.cos(math.radians(15.041248 )) / 360) # 107.5055566430472 => having 2 decimal places will be roughly 1 km accuracy
# Due to the fact that the distance between the latitudes is not the same, we have to calculate the distance between the longitude lines for each latitude line or go with a accuracy variance of about 3%

# filter out all charger locations that are not in the area of interest
charger_df = charger_df[(charger_df['ch_latitude'] >= longitude_border_south) & (charger_df['ch_latitude'] <= longitude_border_north)]
charger_df = charger_df[(charger_df['ch_longitude'] >= latitude_border_west) & (charger_df['ch_longitude'] <= latitude_border_east)]

# filter out all population locations that are not in the area of interest
population_df = population_df[(population_df['pop_latitude'] >= longitude_border_south) & (population_df['pop_latitude'] <= longitude_border_north)]
population_df = population_df[(population_df['pop_longitude'] >= latitude_border_west) & (population_df['pop_longitude'] <= latitude_border_east)]


In [8]:
print(charger_df.info())
print(population_df.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 34720 entries, 0 to 34721
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ch_latitude   34720 non-null  float64
 1   ch_longitude  34720 non-null  float64
dtypes: float64(2)
memory usage: 813.8 KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 32446528 entries, 0 to 32446527
Data columns (total 3 columns):
 #   Column         Dtype  
---  ------         -----  
 0   pop_latitude   float64
 1   pop_longitude  float64
 2   pop_density    float64
dtypes: float64(3)
memory usage: 990.2 MB
None


In [9]:
# round all latitude and longitude values to 2 decimal places This will result in roughly 1km²
charger_df['ch_latitude'] = charger_df['ch_latitude'].apply(lambda x: round(x, 2))
charger_df['ch_longitude'] = charger_df['ch_longitude'].apply(lambda x: round(x, 2))

population_df['pop_latitude'] = population_df['pop_latitude'].apply(lambda x: round(x, 2))
population_df['pop_longitude'] = population_df['pop_longitude'].apply(lambda x: round(x, 2))

# sum up all chargers in a ~1km² area
charger_df = charger_df.groupby(['ch_latitude', 'ch_longitude']).size().reset_index(name='ch_count_per_sqkm')

# sum up all population in a ~1km² area
population_df = population_df.groupby(['pop_latitude', 'pop_longitude'])['pop_density'].sum().reset_index(name='pop_count_per_sqkm')


# Amount of EV in 2020 based on https://www.statista.com/statistics/1166543/electric-passenger-cars-number-by-segment-germany/ => 569.382
total_EV = 569382

# get total recorded population
total_population = population_df['pop_count_per_sqkm'].sum() 
print(total_population)
ev_per_person = round(total_EV / total_population, 10) 
print(ev_per_person)

# Add EVs to population_df
population_df['ev_count_per_sqkm'] = population_df['pop_count_per_sqkm'].apply(lambda x: round(x * ev_per_person, 10))



80392210.0085602
0.0070825519
