# Hydrology


We will take a look at some data from [SMHI hydrological station network](https://vattenwebb.smhi.se/station/)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Råån / Bröddebacken (station 2127)

In [None]:
url = "https://vattenwebb.smhi.se/station/rest/report/2127"

raw_df = pd.read_excel(url,skiprows=13, names=["date","flow","quality"])


In [None]:
df = raw_df.copy()
df = df.drop('date',axis=1)
df.index = pd.to_datetime(raw_df['date'])

In [None]:
df.head()

In [None]:
df.flow.plot.hist()

In [None]:
df.flow.plot(figsize=(18,4))

# Rangedetector

In [None]:
import plotly.graph_objects as go
from tsod.detectors import RangeDetector

rd = RangeDetector(min_value=0.0,max_value=20.0)
anom = rd.detect(df.flow)


data = [
    go.Scatter(go.Scatter(x=df.index, y=df.flow, mode='markers+lines'), name="Flow"),
    go.Scatter(go.Scatter(x=df[anom].index, y=df[anom].flow, mode='markers', name='Anomalies')),
    ]
fig = go.Figure(data=data, layout=go.Layout(width=1000))
fig.show() 

In [None]:
df = df.resample('D').first()
df.index.freq

In [None]:
from tsod.detectors import DiffRangeDetector

drd = DiffRangeDetector(min_value= -10.0, max_value=10.0) # a change of 10 m^3/s from day to day is questionable

#drd.fit(df['1980']['flow']) # find parameters from a normal dataset

In [None]:
anom = drd.detect(df['flow'])

In [None]:
data = [
    go.Scatter(go.Scatter(x=df.index, y=df.flow, mode='markers+lines'), name="Flow"),
    go.Scatter(go.Scatter(x=df[anom].index, y=df[anom].flow, mode='markers', name='Anomalies')),
    ]
fig = go.Figure(data=data, layout=go.Layout(width=1000))
fig.show() 

## Seasonality

In [None]:
df['dayofyear'] = df.index.dayofyear
df['year'] = df.index.year
df['month'] = df.index.month

In [None]:
df.head()

In [None]:
df.plot.scatter(x='dayofyear',y='flow',alpha=0.2)

In [None]:
import seaborn as sns

In [None]:
plt.figure(figsize=(12,8))
sns.violinplot(x="month",y="flow", data=df)

In [None]:
plt.figure(figsize=(12,8))
sns.boxplot(x="month",y="flow", data=df)

# Stations in vicinity

River discharge temporal patterns should be similar in neighbouring water bodies.

## Vege å / Åbromölla (station 2196)

In [None]:
url = "https://vattenwebb.smhi.se/station/rest/report/2196"

raw_df = pd.read_excel(url,skiprows=13, names=["date","flow","quality"])

In [None]:
raw_df.head()

In [None]:
df2 = raw_df.copy()
df2.index = pd.to_datetime(raw_df['date'])
df2 = df2.drop('date',axis=1)

In [None]:
df2.head()

In [37]:
sel = slice('2019','2020')
data = [
    go.Scatter(go.Scatter(x=df[sel].index, y=df[sel].flow, name="Råån")),
    go.Scatter(go.Scatter(x=df2[sel].index, y=df2[sel].flow, name="Vege å")),
    ]
fig = go.Figure(data=data, layout=go.Layout(width=1000))
fig.show() 

It is apparent that these two timeseries are highly correlated. This information can be utilized by a Multivariate timeseries anomaly detector. To be implemented...