# Elektromobilität in den USA - Entwicklungen und Unterschiede

In [18]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import folium

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from folium.plugins import HeatMap

## Data Preperation

### Datensatz 1

In [19]:
# dataset 1 - vehicle registrations
registrations = pd.read_csv('data/vehicle_registrations.csv')

registrations.rename(columns={'Electric (EV)': 'Electric'}, inplace=True)

for column_name, values in registrations.items():
    if column_name != 'State':
        registrations[column_name] = registrations[column_name].replace({',': ''}, regex=True).astype(int)

columns_to_remove = ['Plug-In Hybrid Electric (PHEV)', 'Hybrid Electric (HEV)']
registrations['Hybrid'] = registrations[columns_to_remove].sum(axis=1)
registrations.drop(columns=columns_to_remove, inplace=True)

columns_to_remove = ['Gasoline', 'Diesel']
registrations['Fuel'] = registrations[columns_to_remove].sum(axis=1)
registrations.drop(columns=columns_to_remove, inplace=True)

columns_to_remove = ['Biodiesel', 'Ethanol/Flex (E85)', 'Compressed Natural Gas (CNG)', 'Propane', 'Hydrogen', 'Methanol', 'Unknown Fuel']
registrations['Other'] = registrations[columns_to_remove].sum(axis=1)
registrations.drop(columns=columns_to_remove, inplace=True)

In [20]:
registrations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 357 entries, 0 to 356
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Year      357 non-null    int64 
 1   State     357 non-null    object
 2   Electric  357 non-null    int64 
 3   Hybrid    357 non-null    int64 
 4   Fuel      357 non-null    int64 
 5   Other     357 non-null    int64 
dtypes: int64(5), object(1)
memory usage: 16.9+ KB


### Datensatz 2

In [21]:
# dataset 2 - alternative fuel stations
columns_to_keep = ['State', 'Latitude', 'Longitude', 'Open Date']

stations = pd.read_csv('data/alt_fuel_stations (Dec 2 2023).csv', usecols=columns_to_keep, engine='python')

stations['Open Date'] = pd.to_datetime(stations['Open Date'], format='%Y-%m-%d')

In [22]:
# dataset 3 - USA
usa = pd.read_csv('data/usa.csv')

columns_to_int = ['Population', 'Land_area']
usa[columns_to_int] = usa[columns_to_int].replace({',': ''}, regex=True).apply(pd.to_numeric)