# Data Exploration

## Preparation

### Install dependencies

In [None]:
%pip install pandas
%pip install pandasql
%pip install plotly
%pip install nbformat
%pip install folium
%pip install kaleido

### Import Modules

In [None]:
import sqlite3
import pandas as pd
import pandasql as ps
import folium
import plotly.express as px
import plotly.graph_objects as go

### Load data

In [None]:
con = sqlite3.connect("../data/data.sqlite")
df = pd.read_sql_query("SELECT * FROM data;", con)

### Analyze preview and structure of the loaded data

In [None]:
df.head(20)

In [None]:
df.info()

## Vizualize data

### Plot locations of the measurement stations

In [None]:
m = folium.Map(location=[49.58, 11.00], zoom_start=12)
folium.Marker([49.5030, 11.0549], tooltip="Weather Station: Nürnberg (03668)").add_to(m)
folium.Marker([49.6497, 11.0075], tooltip="Weather Station: Möhrendorf-Kleinseebach(01279)").add_to(m)
folium.Marker([49.5964, 11.0043], tooltip="Pedestrian Zone: Erlangen").add_to(m)
m

### Print min and max values of each column (values will be used later for plotting)

In [None]:
dateMin = df['date'].min()
dateMax = df['date'].max()
pedsMin = df['pedestrians'].min()
pedsMax = df['pedestrians'].max()
rainMin = df['rain'].min()
rainMax = df['rain'].max()
tempMin = df['temp'].min()
tempMax = df['temp'].max()

print("Min Date: " + str(dateMin))
print("Max Date: " + str(dateMax))
print("Min Pedestrians: " + str(pedsMin))
print("Max Pedestrians: " + str(pedsMax))
print("Min Rain: " + str(rainMin))
print("Max Rain: " + str(rainMax))
print("Min Temperature: " + str(tempMin))
print("Max Temperature: " + str(tempMax))

### Generate plots

In [None]:
fig = px.scatter(
    df,
    x="temp",
    y="rain",
    size="pedestrians",
    color="pedestrians",
    hover_data=["date"],
    labels={'date':'Date', 'pedestrians': 'Number of Pedestrians', 'temp': 'Temperature in °C', 'rain': 'Rain in mm'},
    title="Rainfall and temperature in relation to the number of pedestrians.",
    width=1000,
    height=400
)
fig.show()
fig.write_image("plots/plot1.pdf")

fig = px.bar(
    df,
    x="date",
    y="pedestrians",
    color="rain",
    barmode="group",
    labels={'date':'Date', 'pedestrians': 'Number of Pedestrians', 'rain': 'Rain in mm'},
    title="Number of pedestrians per day colored with rainfall in mm.",
    width=1000,
    height=400
)
fig.show()
fig.write_image("plots/plot2.pdf")

fig = px.bar(
    df,
    x="date",
    y="pedestrians",
    color="temp",
    barmode="group",
    labels={'date':'Date', 'pedestrians': 'Number of Pedestrians', 'temp': 'Temperature in °C'},
    title="Number of pedestrians per day colored with temperature in °C.",
    width=1000,
    height=400
)
fig.show()
fig.write_image("plots/plot3.pdf")