In [55]:
import requests
import pandas as pd
import numpy as np
from dotenv import dotenv_values

config = dotenv_values(".env") #apikey



We start by register the Station data with the database.

In [None]:
station = {
    "stationname":"sniffer",
    "projectid":"SETx-UIFL Beaumont",
    "description": "Beaumont Run of the SNIFFER air quality sensor for VOCUS data",
    "contactname": "Pawell",
    "contactemail": "Pawell@utexas.edu",
    "active":True,
    "startdate": "Feb 23, 2023 17:05:57",
    "datetime": "Feb 25, 2023 12:18:11"
}
r = requests.post("https://postgrest-dev.proudflower-a6582e11.centralus.azurecontainerapps.io/station", headers={'Authorization': f'Bearer {config["apikey"]}',
         }, data=station)
print(r)

We can then  clean the sensor data. which requires us to load 4 datasets.
 - Mass list which will generate each measurements alias.
- Sensor provides the 1d measurement data from the Sniffer
- engdata provides the 1d engineering metadata from the sensor
- engdataNames is the header file for the engdata

In [3]:
MassList = pd.read_csv("2023Beaumont_partial/w_data/Export/UL/IonList/MassIDs_2023.02.23-17h36m01sUL.csv", header=None)
sensor = np.fromfile("2023Beaumont_partial/w_data/Export/UL/CPS/cps2023.02.23-17h36m01sUL.fdt", sep="\n")
engData = np.fromfile("2023Beaumont_partial/w_data/Export/EngData/EngData2023.02.23-17h36m01s.fdt",sep="\n")
with open ("2023Beaumont_partial/w_data/Export/EngData/EngDataNames2023.02.23-17h36m01s.csv" , encoding="ISO-8859-1") as f:
    engDataNames = (f.read().split("\n"))

For the sensor data we need to add columns that are the same for each.

In [41]:
sensor_table = pd.DataFrame({"alias":MassList[0].tolist()})
sensor_table['postprocess']=True
sensor_table['postprocessscript']=None
sensor_table['units']='Counts Per Second'
sensor_table['datatype']=1


For the Sensor data it is linked to a specific station. Which can be found by the project name SETx-UIFL. Below I'll make a request that filters out by SETx-UIFL and the the station Name sniffer.

To use filters we will use the column name =eq.  and the the specific name
ex :  projectid=eq.SETx-UIFL Beaumont


the following operators are available:

|  abbreviation |  meaning|
|---------------|---------|
|  eq | equals | 
|  gte | greater than or equal | 
| gt | greater than | 
| lte | less than or equal | 
| lt | less than | 
| neq | not equal | 
| like | LIKE operator (use * in place of %) | 
| ilike | ILIKE operator (use * in place of %) | 
| in | one of a list of values e.g. ?a=in.1,2,3 | 
| notin|  not one of a list of values e.g. ?a=notin.1,2,3 is checking for exact equality (null,true,false) | 
| isnot | checking for exact inequality (null,true,false) | 
| @@ | full-text search using to_tsquery | 
| @> | contains e.g. ?tags=@>.{example, new} <@ contained in e.g. values=<@{1,2,3} | 
| not | negates another operator, see below|

In [54]:
r = requests.get("https://postgrest-dev.proudflower-a6582e11.centralus.azurecontainerapps.io/station?projectid=eq.SETx-UIFL Beaumont&stationname=eq.sniffer")
sensor_table['stationid']= r.json()[0]['stationid']

Then we can upload that sensor table into the database.

In [None]:
r = requests.post("https://postgrest-dev.proudflower-a6582e11.centralus.azurecontainerapps.io/sensor", headers={'Authorization': f'Bearer {config["apikey"]}',
         'Content-Type':'text/csv'}, data=sensor_table.to_csv(header=True, index=False))

Finally we will transform the measurement data into a useable format and upload the data.

We'll start by transforming the engData and measurement_df from 1-Dimensional to 2-dimensional. We skip the first three lines because they are the header to the file.

In [7]:
engData = pd.DataFrame(engData[3:].reshape(int(engData[1]),int(engData[2]), order='F' ), columns=engDataNames[:-1])
measurement_df = pd.DataFrame(sensor[3:].reshape(int(sensor[2]), int(sensor[1])), columns=(MassList[0].tolist()))

We then create the collection time for each measurement. This is based on the engData JulianDate Column. Julian Date is the number of dates since Jan 01, 2009.

In [80]:
engData['collectiontime']=(pd.to_datetime(pd.Timestamp('2009-01-01T00:00:00').to_julian_date()+engData['JulianDate'], unit='D',origin='julian'))


In [81]:
measurement_df=(pd.concat([engData, measurement_df], axis=1))

In [83]:
measurement_df['geometry']=None
measurement_df['elevation']=None
measurement_df['geometry']=None

In [86]:
for m in MassList[0].tolist():
    print(m)

    r = requests.get(f"https://postgrest-dev.proudflower-a6582e11.centralus.azurecontainerapps.io/sensor?alias=eq.{m}")
    measurement_df["sensorid"]=r.json()[0]['sensorid']
    measurement_df['measurementvalue']=measurement_df[m]
    print(measurement_df[['sensorid','collectiontime', 'geometry', 'elevation', 'measurementvalue' ]].to_csv(header=True, index=False))
    break


12.3623
sensorid,collectiontime,geometry,elevation,measurementvalue
4,2023-02-23 17:36:01.008006400,,,-0.0003183083283
4,2023-02-23 17:36:01.958393344,,,-0.0003183083283
4,2023-02-23 17:36:02.995201280,,,-0.0003183083283
4,2023-02-23 17:36:04.032009216,,,-0.0003183083283
4,2023-02-23 17:36:04.982396416,,,-0.0003183083283
4,2023-02-23 17:36:06.019204352,,,-0.0003183083283
4,2023-02-23 17:36:06.969591296,,,-0.0003183083283
4,2023-02-23 17:36:08.006399232,,,-0.0003183083283
4,2023-02-23 17:36:09.043207168,,,-0.0003183083283
4,2023-02-23 17:36:09.993594368,,,-0.0003183083283
4,2023-02-23 17:36:11.030402304,,,-0.0003183083283
4,2023-02-23 17:36:11.980789248,,,-0.0003183083283
4,2023-02-23 17:36:13.017597184,,,-0.0003183083283
4,2023-02-23 17:36:13.967984384,,,-0.0003183083283
4,2023-02-23 17:36:15.004792320,,,-0.0003183083283
4,2023-02-23 17:36:16.041600000,,,-0.0003183083283
4,2023-02-23 17:36:16.991987200,,,-0.0003183083283
4,2023-02-23 17:36:18.028795136,,,-0.0003183083283
4,2023-02-23 1

0      -0.000318
1      -0.000318
2      -0.000318
3      -0.000318
4      -0.000318
          ...   
1795   -0.000318
1796   -0.000318
1797   -0.000318
1798   -0.000318
1799   -0.000318
Name: 12.3623, Length: 1800, dtype: float64
