In [1]:
import requests
import pandas as pd

import io  
from datetime import datetime as dt
import time

In [2]:
def extract_header_and_data(lines):
    header, data = [], 'timestamp  level\n'
    for line in lines:
        if line.startswith("#"):
            header.append(line)
        else:
            data += line + '\n'
    #pack header as dictionary
    header_dict = {}
    for line in header:
        if line.startswith('#'):
            line = line.strip('#')
            res = line.split(':')
            header_dict[res[0].strip()] = res[1].strip() 
    return header_dict, data

def parse_response(res):
    response_text = res.text.split('\r\n')
    header, data = extract_header_and_data(response_text)
    df = pd.read_csv(io.StringIO(data), sep='\s+', usecols = ['timestamp', 'level'])
    df['site'] = header['Site name']
    df['lat'] = header['Latitude']
    df['lon'] = header['Longitude']
    df['ref'] = header['Reference level']
    df['timestamp'] = df['timestamp'].apply(lambda x: dt.strptime(x,"%Y-%m-%dT%H:%M:%S+02:00"))
    return df

example header  
{'Site name': 'ANX', 'Abstract': 'Tides and observed water level from Andenes', 'Provider': 'Statens kartverk sjø (Norwegian Hydrographic Service)', 'Latitude': '69.32606700', 'Longitude': '16.13484800', 'Datum': 'EUREF89', 'Time interval': '600 seconds', 'Reference level': 'CD (Chart Datum)', 'Z0': '133.8 cm', 'Start time': '2022-09-15T01', 'End time': '2022-09-16T01', 'Unit': 'cm', 'Series1': 'Water level observations'}

In [3]:
tide_stations = pd.read_csv('tide_stations_loc.csv')
tide_stations.head()

Unnamed: 0,name,code,latitude,longitude,type
0,Andenes,ANX,69.326067,16.134848,PERM
1,Bergen,BGO,60.398046,5.320487,PERM
2,Bodø,BOO,67.29233,14.39977,PERM
3,Hammerfest,HFT,70.66475,23.67869,PERM
4,Harstad,HAR,68.801261,16.548236,PERM


In [4]:
start_date = '2022-09-15'
end_date = '2022-09-16'
data_type = 'OBS'
#url= url_template.format(lat, lon, datatype, place, start_date, end_date)
url_template = 'http://api.sehavniva.no/tideapi.php?tide_request=locationdata&lat={}&lon={}\
&datatype={}\
&file=txt&lang=en&place={}\
&dst=1&refcode=CD&fromtime={}&totime={}&interval=10'

urls = []

for i, row in tide_stations.iterrows():
    url = url_template.format(row['latitude'], row['longitude'], data_type, row['code'], start_date, end_date)
    print(url)
    urls.append(url)

http://api.sehavniva.no/tideapi.php?tide_request=locationdata&lat=69.326067&lon=16.134848&datatype=OBS&file=txt&lang=en&place=ANX&dst=1&refcode=CD&fromtime=2022-09-15&totime=2022-09-16&interval=10
http://api.sehavniva.no/tideapi.php?tide_request=locationdata&lat=60.398046&lon=5.320487&datatype=OBS&file=txt&lang=en&place=BGO&dst=1&refcode=CD&fromtime=2022-09-15&totime=2022-09-16&interval=10
http://api.sehavniva.no/tideapi.php?tide_request=locationdata&lat=67.29233&lon=14.39977&datatype=OBS&file=txt&lang=en&place=BOO&dst=1&refcode=CD&fromtime=2022-09-15&totime=2022-09-16&interval=10
http://api.sehavniva.no/tideapi.php?tide_request=locationdata&lat=70.66475&lon=23.67869&datatype=OBS&file=txt&lang=en&place=HFT&dst=1&refcode=CD&fromtime=2022-09-15&totime=2022-09-16&interval=10
http://api.sehavniva.no/tideapi.php?tide_request=locationdata&lat=68.801261&lon=16.548236&datatype=OBS&file=txt&lang=en&place=HAR&dst=1&refcode=CD&fromtime=2022-09-15&totime=2022-09-16&interval=10
http://api.sehavniva

In [5]:
columns = ['timestamp', 'level', 'site', 'lat', 'lon', 'ref']
df = pd.DataFrame(columns=columns)
for url in urls:
    res = requests.get(url)
    if res.status_code != 200:
        print("ups, rquest error  for ", url)
    else:
        temp_df = parse_response(res)
        df = pd.concat([df, temp_df])
df.reset_index(drop=True, inplace=True)

In [6]:
df.head()

Unnamed: 0,timestamp,level,site,lat,lon,ref
0,2022-09-15 01:00:00,118.0,ANX,69.326067,16.134848,CD (Chart Datum)
1,2022-09-15 01:10:00,124.9,ANX,69.326067,16.134848,CD (Chart Datum)
2,2022-09-15 01:20:00,131.9,ANX,69.326067,16.134848,CD (Chart Datum)
3,2022-09-15 01:30:00,138.8,ANX,69.326067,16.134848,CD (Chart Datum)
4,2022-09-15 01:40:00,145.8,ANX,69.326067,16.134848,CD (Chart Datum)


In [7]:
df.tail()

Unnamed: 0,timestamp,level,site,lat,lon,ref
3620,2022-09-16 00:20:00,140.4,AES,62.469414,6.151946,CD (Chart Datum)
3621,2022-09-16 00:30:00,145.7,AES,62.469414,6.151946,CD (Chart Datum)
3622,2022-09-16 00:40:00,150.9,AES,62.469414,6.151946,CD (Chart Datum)
3623,2022-09-16 00:50:00,156.0,AES,62.469414,6.151946,CD (Chart Datum)
3624,2022-09-16 01:00:00,160.7,AES,62.469414,6.151946,CD (Chart Datum)


In [8]:
df.site.unique()

array(['ANX', 'BGO', 'BOO', 'HFT', 'HAR', 'HEI', 'HRO', 'HVG', 'KAB',
       'KSU', 'MSU', 'MAY', 'NVK', 'NYA', 'OSC', 'OSL', 'RVK', 'SBG',
       'SVG', 'TRG', 'TOS', 'TRD', 'VAW', 'VIK', 'AES'], dtype=object)

In [10]:
file_name  = 'levels_from_{}_to_{}.csv'.format(start_date, end_date)
df.to_csv(file_name)   