# Data Exploration

In this notebook describe your data exploration steps.

## Install dependencies

In [None]:
%pip install pandas
%pip install plotly
%pip install nbformat
%pip install folium

## Load data

In [None]:
# import the modules
import sqlite3
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go

# connect to the database
con = sqlite3.connect("../data/data.sqlite")
 
# The following manipulations are done in SQL
# As the temperature and rain data are stored in two different tables, the data is combined in two subqueries
# The average of the temperature and rain data is calculated and a column with row numbers is added to the tables
# The two subqueries are joined together with thew table 'pedestrians' and the columns 'date', 'pedestrians', 'rain' and 'temp' are selected

df = pd.read_sql_query('''

WITH RainData AS (
    SELECT
        r1.MESS_DATUM AS date,
        (r1.[  RS] + r2.[  RS]) / 2 AS rain
    FROM rainmoe r1
    JOIN rainnue r2 ON r1.MESS_DATUM = r2.MESS_DATUM
),
TempData AS (
    SELECT
        t1.MESS_DATUM AS date,
        (t1.[ TMK] + t2.[ TMK]) / 2 AS temp
    FROM tempmoe t1
    JOIN tempnue t2 ON t1.MESS_DATUM = t2.MESS_DATUM
)

SELECT
    pd.timestamp AS date,
    pd.pedestrians_count AS pedestrians,
    rd.rain,
    td.temp
FROM pedestrians pd
JOIN RainData rd ON pd.timestamp = rd.date
JOIN TempData td ON pd.timestamp = td.date;

''', con)

In [None]:
df.head(20)

In [None]:
df.info()

### Locations

In [None]:
m = folium.Map(location=[49.58, 11.00], zoom_start=12)
folium.Marker([49.5030, 11.0549], tooltip="Weather Station: Nürnberg (03668)").add_to(m)
folium.Marker([49.6497, 11.0075], tooltip="Weather Station: Möhrendorf-Kleinseebach(01279)").add_to(m)
folium.Marker([49.5964, 11.0043], tooltip="Pedestrian Zone: Erlangen").add_to(m)
m

### Data exploration
Print some basic information about the data. Your data exploration would continue here.

In [None]:
# print min and max values of the columns

print("Min Pedestrians: " + str(df['pedestrians'].min()))
print("Max Pedestrians: " + str(df['pedestrians'].max()))
print("Min Rain: " + str(df['rain'].min()))
print("Max Rain: " + str(df['rain'].max()))
print("Min Temperature: " + str(df['temp'].min()))
print("Max Temperature: " + str(df['temp'].max()))

### Print some figures

In [None]:
fig = px.scatter(
    df,
    x="temp",
    y="rain",
    size="pedestrians",
    color="pedestrians",
    hover_data=["date"],
    labels={'date':'Date', 'pedestrians': 'Number of Pedestrians', 'temp': 'Temperature in °C', 'rain': 'Rain in mm'},
    title="Rainfall and temperature in relation to the number of pedestrians."
)
fig.show()

fig = px.bar(
    df,
    x="date",
    y="pedestrians",
    color="rain",
    barmode="group",
    labels={'date':'Date', 'pedestrians': 'Number of Pedestrians', 'rain': 'Rain in mm'},
    title="Number of pedestrians per day colored with rainfall in mm."
)
fig.show()

fig = px.bar(
    df,
    x="date",
    y="pedestrians",
    color="temp",
    barmode="group",
    labels={'date':'Date', 'pedestrians': 'Number of Pedestrians', 'temp': 'Temperature in °C'},
    title="Number of pedestrians per day colored with temperature in °C."
)
fig.show()