In [1]:
import pandas as pd
import json
import requests
from statistics import mode
import datetime as dt
from joblib import delayed, Parallel

from basefunctions import ftpfunction as ftpfunc
from basefunctions import parserfunctions as pf
from basefunctions import grimmfunctions as grimm
from basefunctions import requestfunction as requestfunc
#from basefunctions import testfunctions as testfunc

import saqncredentials

# Basefunctions Library:

## parserfunctions as pf:

### pf.getThingFromProperties(url, domain, serialno)

function that uses url, operator domain and serialno as string inputs and returns the saqn database entry of the thing    

### pf.post_difference(targetdatastream, dataframe)

wrapper function that executes getSymmDiff and postObservations in succession

### pf.getSymmDiff(targetdatastream, dataframe)

function that checks a list of observations against existing observations in the database by timestamp input is targetdatastream, dataframe. returns an equally formatted, reduced dataframe of missing observations

### pf.postObservations(targetdatastream, dataframe)

function that posts observations to the server. returns a dictionary of counts of successfull and failed posts

## grimmfunctions as grimm:

### grimm.parseGrimmFile(filepath)

function that grabs a file from the ftp server when given a path and parses it, returning a dataframe

### grimm.formatDataframe(df,filepath)

function that formats the dataframe such that column heads are identical to saqn observedproperty iot.ids input is dataframe, filepath (to identify device type)

---

---

    
## OLD, not included

### grimm.updateSoftwareNo(inputline,datastream)

function that checks the ["properties"]["software_version"] field of a datastream with a new input of the form pd.Series({pandas timestamp, value}). mutates the datastream object (shallow copy!) and performs a patch request if necessary. returns the datastream so datastream=grimm.updateSoftwareNo(inputline,datastream) makes sense although the shallow copy should mutate it anyway

--> probably belongs to pf library as it is not grimm specific

---

---

## TODO

- last calibration time to utc so that it is captured by the post function

In [2]:
#wrapper function for iteration. may also be parallelized or done in multiple serial calls

# targeturl = "http://193.196.38.108:8080/FROST-Server/v1.0"
targeturl = "https://api.smartaq.net/v1.0"
operatordomain = "grimm-aerosol.com"

folder = saqncredentials.grimm.folder
things = ftpfunc.getData(folder)
print(folder)

#for thing in things[11:12]: 
for thing in ['SN19006']:

    print("------------------------")
    print("NEW THING")
    print("------------------------")
    filelist=ftpfunc.getData(folder + "/" + thing)

#     # reduce filelist so it contains the last 5 days plus the day 2 weeks ago
#     today = dt.datetime.now()
#     cropped_filelist = [file for file in filelist if dt.datetime.strptime(file[:10],'%Y-%m-%d')+ dt.timedelta(5) >= today]
#     cropped_filelist += [file for file in filelist if (dt.datetime.strptime(file[:10],'%Y-%m-%d') + dt.timedelta(14)).date() == today.date()]
#     print(cropped_filelist)
    
    for file in filelist:
    # for file in cropped_filelist:
        filepath = folder + "/" + thing + "/" + file
        print("------------------------")
        print(filepath)
        print("------------------------")
        
        # get the file, parse it and format it for further progressing
        df=grimm.parseGrimmFile(filepath)
        df_formatted=grimm.formatDataframe(df,filepath)
        
        # Check Serial Number Column whether they are all the same
        serialmode=mode(df_formatted["hardware.id"])
        
        #check if the serial no that occurs most occurs more than 95% of the time. if yes, nonoccurence is considered a typo (throw warning in log but keep)
        if(list(df_formatted["hardware.id"]).count(serialmode)/len(list(df_formatted["hardware.id"])) > 0.95 == False):
            print("Warning: several serial numbers in one datasheet, needs manual inspection: " + filepath)

        
        # get the database info corresponding to the thing and its datastreams
        saqnthing = pf.getThingFromProperties(targeturl, **{"operator.domain": operatordomain}, **{"hardware.id": serialmode})
        
        sess = requestfunc.session(3, 2)
        saqndatastreams = json.loads(sess.get(saqnthing["Datastreams@iot.navigationLink"] + "?$expand=ObservedProperty").text)["value"]

        # for each datastream, check for missing observations
        res=Parallel(n_jobs=2)(delayed(pf.post_difference)(targetdatastream,df_formatted) for targetdatastream in saqndatastreams)
        print(res)

Messnetz
------------------------
NEW THING
------------------------
------------------------
Messnetz/SN19006/2020-09-05-SN19006-measure.dat
------------------------
['None', 'None', 'None', 'None', 'None']
------------------------
Messnetz/SN19006/2020-09-06-SN19006-measure.dat
------------------------
['None', 'None', 'None', 'None', 'None']
------------------------
Messnetz/SN19006/2020-09-07-SN19006-measure.dat
------------------------
['None', 'None', 'None', 'None', 'None']
------------------------
Messnetz/SN19006/2020-09-08-SN19006-measure.dat
------------------------
['None', 'None', 'None', 'None', 'None']
------------------------
Messnetz/SN19006/2020-09-09-SN19006-measure.dat
------------------------
['None', 'None', {'success': 285, 'failed': 0}, {'success': 285, 'failed': 0}, {'success': 285, 'failed': 0}]
------------------------
Messnetz/SN19006/2020-09-10-SN19006-measure.dat
------------------------
['None', 'None', {'success': 283, 'failed': 0}, {'success': 283, 'fai

KeyError: 'value'

In [65]:
# TODOs

#lat lon filter für historical locations? mit gutem statistischen algorithmus ist das rauszubekommen
#-> FoI so lassen wäre ein interessanter datensatz für gps, allerdings sind dann die messdaten unbrauchbar