# Langtjern TOC interpolation at outlet

## Required python libraries and files, setup variables

In [1]:
import sys
import LangtjernWeather as lw
import numpy as np
import datetime
import pandas as pd
import getpass
import matplotlib
import matplotlib.pyplot as plt
import getpass
import itertools as IT
from itertools import compress
from matplotlib import rc
import shutil
import os
import re
import pickle
import pylab
import statsmodels.api as sm
from patsy import dmatrices

#Setting matplotlib style
matplotlib.style.use('ggplot')
%matplotlib inline
pd.options.mode.chained_assignment = None
rc('text', usetex=True)
rc('xtick', labelsize = 16)
rc('ytick', labelsize = 16)
rc('font', size = 16)
#login function
def login():
    username = input("Username: [JLG] ") #default username JLG
    if not username :
        username = "JLG"
    password = getpass.getpass('Password: ')
    return username, password

#Creating directories to store files. This will overwrite if the directory already exists
def create_dir(dir_name) :
    if not os.path.exists(dir_name) :
        os.makedirs(dir_name)
    else :
        shutil.rmtree(dir_name)
        os.makedirs(dir_name)
#Helper function for plotting
def plot_df(df) :
    num_cols = len(df.columns)
    fig, axes = plt.subplots(nrows = num_cols, ncols = 1, figsize = (20, num_cols * 5))
    for name,ax_h in zip(df.columns, axes) :
        plt.sca(ax_h)
        plt.plot(df.index,df[name])
        #df[name].plot(ax = ax_h)
        ax_h.set_ylabel(name)
    return fig

def isnumber(x):
    try:
        float(x)
        return True
    except:
        return False

#Sanity check
print("Using python version:")
print(sys.version)
print("Paths to python libraries:")
print(sys.path)

#Creating directory to save results
result_dir = './results/'
create_dir(result_dir)    

Using python version:
3.6.3 (default, Oct  3 2017, 21:45:48) 
[GCC 7.2.0]
Paths to python libraries:
['', '/home/jose-luis/Envs/prognos_get_data_py3/lib/python36.zip', '/home/jose-luis/Envs/prognos_get_data_py3/lib/python3.6', '/home/jose-luis/Envs/prognos_get_data_py3/lib/python3.6/lib-dynload', '/usr/lib/python3.6', '/home/jose-luis/Envs/prognos_get_data_py3/lib/python3.6/site-packages', '/home/jose-luis/Envs/prognos_get_data_py3/lib/python3.6/site-packages/IPython/extensions', '/home/jose-luis/.ipython']


## Asking for credentials to AquaMonitor.

In [2]:
username,password = login()

#Folder where the downloaded data will be saved
root = "./DownloadedData/"
create_dir(root)

Username: [JLG]  
Password:  ··············


## Defining period to download

In [3]:
## Defining period to download

#Setting up date to load data

fromDate = "1970/01/01"
toDate = "2018/12/31"

## Loading data at inlet

In [4]:
chemistry = lw.getLangtjernData(username,password,root,fromDate,toDate, 37933, 'Water')
#del password
chemistry.drop(['Depth1', 'Depth2','ProjectId', 'ProjectName', 'StationId', 'StationCode',
                'StationName','SampleDate_dato', 'SampleDate_tid'], inplace=True, axis=1) 
# chemistry.replace('< 1','Null')
# chemistry.replace('< 0,06','Null')
chemistry = chemistry[chemistry.applymap(isnumber)]

chemistry = chemistry.rename(columns=lambda x: re.sub('[_/]',' ',x))
chemistry = chemistry.astype('float64',errors='ignore')
print("The data that were read are: {}".format(chemistry.columns))

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
chemistry['TOC mg l'].plot(subplots=True,figsize=(20,5),linestyle='None',marker='.',markersize=1)
TOC=chemistry['TOC mg l']
TOC = TOC[~TOC.index.duplicated(keep='first')]

## Getting CDOM

In [None]:
inlet = lw.getLangtjernData(username,password,root,fromDate,toDate, 63098, 'Water')
inlet.drop(['Depth1', 'Depth2','ProjectId', 'ProjectName', 'StationId', 'StationCode',
             'StationName','SampleDate_dato', 'SampleDate_tid'], inplace=True, axis=1) 
inlet = inlet.rename(columns=lambda x: re.sub('[_/]',' ',x))
inlet = inlet.rename(columns=lambda x: re.sub('µ','u',x))
print("The data that were read are: {}".format(inlet.columns))

In [None]:
inlet.plot(subplots=True,figsize=(20,12),linestyle='None',marker='.',markersize=1)

## Getting radiation

In [None]:
radiation = lw.getLangtjernData(username,password,root,fromDate,toDate, 62040, 'Air')
radiation = radiation['Globalstråling_W/m2']
radiation.columns = ['radiation']
radiation = radiation.astype('float64')
radiation.plot(figsize=(20,3),marker='.',markersize=1,linestyle='')

In [None]:
soilTemp = lw.getLangtjernData(username,password,root,fromDate,toDate, 63099, 'Water')
soilTemp = soilTemp.loc[[i == 0.15 for i in soilTemp['Depth1']],:]
soilTemp = soilTemp['Temperatur_C']
soilTemp = soilTemp.astype('float64')
soilTemp.plot(figsize=(20,3),marker='.',markersize=1,linestyle='')

## Finding relation between CDOM, TOC, temperature at the inlet 

### Interpolation using daily data

In [None]:
C = TOC.copy()
C = C.dropna().to_frame()
C = C[~C.index.duplicated(keep='first')]
C.rename(columns={'TOC mg l':'TOC'},inplace=True)

out = inlet.copy()
# display(out)
# out.drop(['Vannstand m'],inplace=True, axis=1)
out.rename(columns={'CDOM ug L' : 'CDOM','Temperatur C':'Temperature', 'Vannstand m': 'gh'},inplace=True)
out['Temperature'] = out['Temperature'].apply(lambda x : 0.0 if x < 0.0 else x)
out.interpolate(inplace=True,method='index')

rad=radiation.copy()
rad=rad.to_frame()
rad.rename(columns={'Globalstråling_W/m2':'radiation'},inplace=True)
rad.interpolate(inplace=True,method='index')

soilT = soilTemp.copy()
soilT = soilT.to_frame()
display(soilT.columns)
soilT.rename(columns={'Temperatur_C': 'soilTemperature'},inplace=True)
display(soilT.columns)
soilT.interpolate(inplace=True,method='index')
display(soilT.columns)

# data = pd.concat([rad,out,soilT],axis=1,join='outer')
data = rad.join(oxy)
data = data.join(out)
allData = data.copy()
data.interpolate(inplace=True,method='index')
# data.interpolate(inplace=True,method='index')
# data.interpolate(inplace=True)
#data = data[np.isnan(data['TOC'])==False]
# lake[lake['Depth1']==1.0]

start = pd.to_datetime('2014-08-01')
finish = pd.to_datetime('2017-06-01')

# start = pd.to_datetime('2012-04-30')
# finish = pd.to_datetime('2018-12-31')

data = data.loc[start:finish,:]
C = C.loc[start:finish,:]

#data.interpolate(inplace=True,method='index')
data = pd.concat([data,C],axis=0,sort=True)
data.interpolate(inplace=True,method='index')
data = data.dropna()

data.plot(subplots=True,figsize=(20,18),marker='o',markersize=3,linestyle='')
display(data)

## Regression

In [None]:
#Performing regression
y, X = dmatrices('TOC ~ CDOM +  soilTemperature:gh + radiation', data=data, return_type='dataframe')

#CDOM + gh:Temperature + soilTemperature:gh + radiation

# Note the difference in argument order
model = sm.OLS(y, X).fit()
model.summary()
#fig, ax = plt.subplots()
#fig = sm.graphics.plot_fit(model, 0, ax=ax)
ypred = model.predict(X)
fig, ax = plt.subplots(figsize=(20,5))

ax.plot(X.index, y['TOC'], 'o', label="TOC")
ax.plot(X.index, ypred, 'b+', label="prediction")
# ax.plot(y['TOC'],ypred,'k.')
model.summary()

## Creating time series of inlet inputs of CDOM
### Creating time series of grap samples

In [None]:
start = pd.to_datetime('2014-08-12 14:00:00')
finish = pd.to_datetime('2017-05-31 00:00:00')
allData = allData[start:finish]

allData.interpolate(method='time',inplace=True)
allData['TOC'] = allData.apply(lambda x : float(model.predict([1.,x.CDOM,x.soilTemperature*x.gh,x.radiation,])),axis=1)

display(allData)
to_string_values_acpy = lambda t,depth,val :  i.strftime('%Y-%m-%d %H:%M:%S') + '\t' + \
                                              '-' + str(depth) + '\t' + str(val*1000) + '\n'

to_string_values = lambda t,val :  i.strftime('%Y-%m-%d %H:%M:%S') + '\t' + str(val*1000) + '\n'

filename = './results/TOC_inlet.dat'
filenameObs = './results/TOC_inlet.obs'
fid = open(filename,'w')
fidObs = open(filenameObs,'w')

for i,x in allData['TOC'].items():
    fid.write(to_string_values_acpy(i,0.0,x))
    fidObs.write(to_string_values(i,x))

fid.close()
fidObs.close()

allData.plot(subplots=True, figsize=(20,3*len(allData.columns) ), marker='.', markersize=1, linestyle='None')