In [35]:
import matplotlib.pyplot as plt
from pandas import merge
from pyniva import PUB_META, PUB_TSB, Vessel, token2header
import datetime

plt.style.use("ggplot")

# Download Color Fantasy data 1.1.2019 - 30.9.2020

*Dag Hjermann (based on Zofia Rudjord's code)*  


## Preparations

In [9]:
path_to_token = "/home/jovyan/shared/01_datasets/tokens/niva-service-account.json"
HEADER = token2header(path_to_token)

## Get avaliable vessels / data sources  
To check which Vessels have associated data, run the part below. Some of the abbreviations are  
* FA - *Color Fantasy* (Oslo - Kiel)  
* TF - *Trollfjord* (Hurtigruten, Bergen - Kirkenes)  
* NO - *Norbjørn* (Tromsø - Svalbard)

In [10]:
boat = [v for v in Vessel.list(PUB_META, header=HEADER)]
print(*[b.path for b in boat], sep="\n")

NO
Survey_2019_04_test/SB_Echo
RA
Survey_2019_04/SB_Echo
NB
Survey_2018_03/SB_Echo
Survey_2019_test/Waveglider_1
TF
Survey_2019_04/Waveglider_1
Survey_2019_04_test/SeaGlider_1
Survey_2019_04/SeaGlider_1
SYNTH_FA
FA
Survey_2018_03/Waveglider_1
Survey_2018_03/SeaGlider_1


## Color Fantasy (Oslo - Kiel): Get available measurements/sensors   

If you uncomment the "print" part of the snippet below (long output!), you will see all avalilable time series from this vessel. You may findfor instance want the following time series:   
* FA/ferrybox/INLET/TEMPERATURE  
* FA/ferrybox/CTD/SALINITY  
* FA/ferrybox/INLET/OXYGEN/CONCENTRATION  
* FA/ferrybox/SYSTEM/OBSTRUCTION  
* FA/ferrybox/SYSTEM/PUMP *(0 if the water pump is not running and the system doesn't work, 1 if the pump is running)* 
* FA/ferrybox/SYSTEM/TRIP_NUMBER *(consecutive numbers for every trip to/from Oslo)*   
  
(Longitude + latitude are metadata and will be downloaded separately afterwards)

In [11]:
platform_code = "FA"
first_n_measurements = -1  # = -1 will list all measurments
FA = [v for v in boat if v.path == platform_code][0]
measurements = FA.get_all_tseries(PUB_META, header=HEADER)
# print(*[m.path for m in measurements][:first_n_measurements], sep="\n")

## Color Fantasy (Oslo - Kiel): Download data    
We will download the data for May 2020. The part below downloads data for each of the time series below and stores the result in a list called `data_list`  

In [13]:
start_time = "2020-05-01T00:00:00"
end_time = "2020-06-01T00:00:00"

paths = ['FA/ferrybox/INLET/TEMPERATURE',
         'FA/ferrybox/CTD/SALINITY',
         'FA/ferrybox/INLET/OXYGEN/CONCENTRATION',
         'FA/ferrybox/CHLA_FLUORESCENCE/ADJUSTED',
         'FA/ferrybox/SYSTEM/OBSTRUCTION',  
         'FA/ferrybox/SYSTEM/PUMP',
         'FA/ferrybox/SYSTEM/TRIP_NUMBER'
        ]

# Make empty data list
data_list = []

# Go through all data sets listed in paths  
# For each, we go through all measurements, and if its's the right one
#   download the data and add to 'data_list'
for i in range(0, len(paths),1):
    print(paths[i])
    for m in measurements:
        if m.path == paths[i]:
            data = m.get_tseries(
                PUB_TSB,
                header=HEADER,
                noqc=True,
                dt=0,
                start_time=start_time,
                end_time=end_time,
                )
            data_list.append(data)  # add data set to data_list



FA/ferrybox/INLET/TEMPERATURE
FA/ferrybox/CTD/SALINITY
FA/ferrybox/INLET/OXYGEN/CONCENTRATION
FA/ferrybox/CHLA_FLUORESCENCE/ADJUSTED
FA/ferrybox/SYSTEM/OBSTRUCTION
FA/ferrybox/SYSTEM/PUMP
FA/ferrybox/SYSTEM/TRIP_NUMBER


### Check one element of `data_list`  
Each of these lists contains a date set with two columns, where the first is time (given as date + time in UTZ time zone)

In [14]:
print(data_list[1])

                     salinity
time                         
2020-05-01 00:00:18    26.830
2020-05-01 00:01:18    26.851
2020-05-01 00:02:18    26.838
2020-05-01 00:03:18    26.849
2020-05-01 00:04:18    26.879
...                       ...
2020-05-31 23:55:20    27.267
2020-05-31 23:56:20    27.274
2020-05-31 23:57:21    27.277
2020-05-31 23:58:21    27.271
2020-05-31 23:59:21    27.272

[44554 rows x 1 columns]


## Get coordinates   
We use the ´start_time´ and ´end_time´ given above  

In [15]:
# start_time = "2019-01-01T00:00:00"
# end_time = "2020-01-31T00:00:00"
path = platform_code + "/gpstrack"
for m in measurements:
    if m.path == path:
        gpsdata = m.get_tseries(
            PUB_TSB,
            header=HEADER,
            noqc=True,
            dt=0,
            start_time=start_time,
            end_time=end_time,
        )
        
print(gpsdata)

                     longitude  latitude
time                                    
2020-05-01 00:00:18     10.746   59.9046
2020-05-01 00:01:18     10.746   59.9046
2020-05-01 00:02:18     10.746   59.9046
2020-05-01 00:03:18     10.746   59.9046
2020-05-01 00:04:18     10.746   59.9046
...                        ...       ...
2020-05-31 23:55:20     10.746   59.9046
2020-05-31 23:56:20     10.746   59.9046
2020-05-31 23:57:21     10.746   59.9046
2020-05-31 23:58:21     10.746   59.9046
2020-05-31 23:59:21     10.746   59.9046

[44554 rows x 2 columns]


## Merge to a single data set 
We use the ´time´ for merging the data sets together "side-by-side", so we end with a data set with one line per time point and one column per variable  

In [17]:
# Start with just gpsdata  
data_merged = gpsdata

# Go through the data list and add one by one  
# We use left join, as data without longitude and latitude are not very useful  
for data_variable in data_list:
    data_merged = merge(data_merged, data_variable, how = "left", on = "time")

print(data_merged)

                     longitude  latitude  temperature  salinity  \
time                                                              
2020-05-01 00:00:18     10.746   59.9046        8.141    26.830   
2020-05-01 00:01:18     10.746   59.9046        8.111    26.851   
2020-05-01 00:02:18     10.746   59.9046        8.099    26.838   
2020-05-01 00:03:18     10.746   59.9046        8.253    26.849   
2020-05-01 00:04:18     10.746   59.9046        8.204    26.879   
...                        ...       ...          ...       ...   
2020-05-31 23:55:20     10.746   59.9046       12.339    27.267   
2020-05-31 23:56:20     10.746   59.9046       12.280    27.274   
2020-05-31 23:57:21     10.746   59.9046       12.333    27.277   
2020-05-31 23:58:21     10.746   59.9046       12.339    27.271   
2020-05-31 23:59:21     10.746   59.9046       12.248    27.272   

                     oxygen_concentration  chla_fluorescence  obstruction  \
time                                               

## Save the result to a csv (text) file  

In [39]:
data_merged.to_csv("pydata_Fantasy_May2019.csv")