In [1]:
import mysql.connector
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
def load_properties(filepath, sep='=', comment_char='#'):
    """
    Read the file passed as parameter as a properties file.
    """
    props = {}
    with open(filepath, "rt") as f:
        for line in f:
            l = line.strip()
            if l and not l.startswith(comment_char):
                key_value = l.split(sep)
                key = key_value[0].strip()
                value = sep.join(key_value[1:]).strip().strip('"') 
                props[key] = value 
    return props

In [3]:
connection = mysql.connector.connect(**load_properties('./project.properties'))

In [12]:
query = '''
select *, avg(snowdepth) OVER(ORDER BY date
     ROWS BETWEEN 2 PRECEDING AND current row)
     as moving_average from weatherdailysnowdelay where sbbregion_isocode = 'RME'
'''

result = pd.read_sql(sql=query, con=connection)
result_df = pd.DataFrame(result)

In [13]:
result_df.head()

Unnamed: 0,sbbregion_isocode,date,rainfall,snowdepth,temp,zugpuenktlichkeit,moving_average
0,RME,2021-01-01,0.390909,20.0,-2.85,97.08034,20.0
1,RME,2021-01-02,0.554545,18.6667,-2.408333,97.900474,19.33335
2,RME,2021-01-03,0.418182,18.5556,-3.466667,98.479371,19.0741
3,RME,2021-01-04,0.445455,19.0,-4.441667,96.476673,18.740767
4,RME,2021-01-05,0.227273,18.5556,-4.975,96.751683,18.703733


In [14]:
sbb_delay_query = '''
select date, zugpuenktlichkeit from sbbdelay
  where sbbregion_isocode = 'RME'
'''

result_sbb_delay = pd.read_sql(sql=sbb_delay_query, con=connection)
result_sbb_delay_df = pd.DataFrame(result_sbb_delay)

In [15]:
### change puenktlichkeit to delay, for better visual understanding of correlation to rain and temperature

result_sbb_delay_df.zugpuenktlichkeit = 100 - result_df.zugpuenktlichkeit

In [16]:
result_sbb_delay_df.head()

Unnamed: 0,date,zugpuenktlichkeit
0,2021-01-01,2.91966
1,2021-01-02,2.099526
2,2021-01-03,1.520629
3,2021-01-04,3.523327
4,2021-01-05,3.248317


In [17]:
import plotly.io as pio
import plotly.graph_objects as go

### Lineplot

In [18]:
datatrace1 = {
    'name': 'Rainfall',
    'type': 'bar', 
    'x': result_df['date'], 
    'y': result_df['rainfall']
}

datatrace2 = {
    'name': 'Avg. Rainfall (past 3 days)',
    'type': 'scatter', 
    'x': result_df['date'], 
    'y': result_df['moving_average']
}

datatrace3 = {
    'name': 'Traindelay in %',
    'type': 'scatter', 
    'x': result_sbb_delay['date'], 
    'y': result_sbb_delay['zugpuenktlichkeit']
}

datatrace4 = {
    'name': 'Temerature',
    'type': 'scatter', 
    'x': result_df['date'], 
    'y': result_df['temp']
}


layout = {
    'title': 'SBB Verspätung für Wetter'
}

figdict = {'data': [datatrace1, datatrace2, datatrace3, datatrace4], 
          'layout': layout}

go.Figure(**figdict)