# Data Exploration

In this notebook describe your data exploration steps.

## Install dependencies

In [None]:
%pip install pandas
%pip install seaborn
%pip install matplotlib 
%pip install sqlalchemy
%pip install openpyxl

## Load data

In [None]:
import pandas as pd 
csv_df = pd.read_sql_table('e_charging_stations', 'sqlite:///AMSE_database.sqlite')
excel_df = pd.read_sql_table('e_ladesäulenregister', 'sqlite:///AMSE_database.sqlite')

### Look at the first rows

In [None]:
csv_df.head(10)

In [None]:
excel_df.head(10)

### Data exploration
Print some basic information about the data. Your data exploration would continue here.

In [None]:
csv_df.info()

In [None]:
excel_df.info()

In [None]:
csv_df.groupby(["ort"]).count()

In [None]:
excel_df.groupby(["ort"]).count()

In [None]:
shared_columns = ['betreiber', 'strasse', 'hausnummer', 'postleitzahl', 'ort']
print("\nSummary statistics for shared columns:")
print("Mobilithek:")
print(csv_df[shared_columns].describe())
print("\nGovData:")
print(excel_df[shared_columns].describe())

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Arial Unicode MS'


operator_counts_mobilithek = csv_df['betreiber'].value_counts().sort_values(ascending=False)
plt.figure(figsize=(10, 6))  
plt.scatter(operator_counts_mobilithek.index, operator_counts_mobilithek.values)
plt.xlabel('Operator')
plt.ylabel('Number of Charging Points')
plt.title('Number of Charging Points by Operator (Mobilithek)')
plt.xticks([]) 
plt.show()


operator_counts_govdata = excel_df['betreiber'].value_counts().sort_values(ascending=False)
plt.figure(figsize=(10, 6))  
plt.scatter(operator_counts_govdata.index, operator_counts_govdata.values)
plt.xlabel('Operator')
plt.ylabel('Number of Charging Points')
plt.title('Number of Charging Points by Operator (GovData)')
plt.xticks([])
plt.show()


In [None]:
import matplotlib.pyplot as plt

combined_data = pd.concat([csv_df, excel_df])
combined_data['betreiber'] = combined_data['betreiber'].str.strip()
combined_data.drop_duplicates()
combined_data.fillna(0)

charging_points_by_operator = combined_data.groupby('betreiber')['anzahl_ladepunkte'].sum()

charging_points = [charging_points_by_operator[operator] for operator in charging_points_by_operator.index]

plt.figure(figsize=(12, 6))
plt.boxplot(charging_points, vert=False)
plt.xlabel('Number of Charging Points')
plt.ylabel('Operator')
plt.title('Boxplot: Number of Charging Points by Operator')
plt.yticks([]) 
plt.show()







