An example to extract observations from a time series data source whose values temperature values (TEMP1 attribute) are different ("ne"), greater than ("gt"), greater tha or equal ("gte"), lesser than ("lt"), lesser than or equal ("lte") or are equal ("eq") to a certain value.
The data source used is available at http://catalogue.ec-meloa.eu. apublic dataset that has been collected between 29/10/2021 11:15:00 UTC and 28/02/2022 23:59:00 UTC by the drifter WAVY OCEAN 52 nearby La Palma Island in Canarias (reference location 28.6484151820544, -17.9793222860329)


In [49]:
import sys
import urllib.request
import os
import csv
import json
from datetime import datetime
import pandas as pd
import folium
from IPython.display import display
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

Base values

In [50]:
OPERATION = "gte" # "lt"
BASE_VALUE = 24 # 22
DATASET = "http://catalogue.ec-meloa.eu/dataset/24116ae9-7425-45e8-a605-29fbf917649c/resource/c2f7d170-e0eb-4f35-a82f-5a8bc4be38f6/download/meloa_test_00064_00wo52_20211029t111600_20220122t193600_13_133.csv"


In [51]:
def bounding_box(points):
    x_coordinates, y_coordinates = zip(*points)

    return [
        min(x_coordinates),
        min(y_coordinates),
        max(x_coordinates),
        max(y_coordinates),
    ]

In [52]:
urllib.request.urlretrieve(DATASET, "to_parse.csv")

output_dir = "./result"

try:
    os.mkdir(output_dir)
except FileExistsError:
    pass

compare_op = ["eq", "ne", "gt", "gte", "lt", "lte"]

if OPERATION not in compare_op:
    print(f"Operation not supported")
    exit(1)

valid_op = ""
match OPERATION:
    case "ne":
        valid_op = "!="
    case "gt":
        valid_op = ">"
    case "gte":
        valid_op = ">="
    case "lt":
        valid_op = "<"
    case "lte":
        valid_op = "<="
    case _:
        valid_op = "=="

coordinates = []
dates = []
temps1 = []
results = []
headers = None
with open("to_parse.csv", "r") as csv_file:
    reader = csv.reader(csv_file)
    headers = next(reader)

    temp1 = headers.index("temp_1")
    latitude = headers.index("latitude")
    longitude = headers.index("longitude")
    times = headers.index("timestamp")
    for row in list(reader):
        if eval(f"{row[temp1]} {valid_op} {BASE_VALUE}"):
            results.append(row)
            temps1.append(row[temp1])
            coordinates.append((float(row[latitude]), float(row[longitude])))
            dates.append(row[times])

_from = min(dates)
_to = max(dates)
bbox = bounding_box(coordinates)
metadata = {
    "description": "MELOA_WO_TEMP1",
    "geometry": {"type": "MultiPoint", "coordinates": coordinates},
    "media_type": "TEXT",
    "start_datetime": _from,
    "end_datetime": _to,
    "bbox": bbox,
}

metadataFile = os.path.join(output_dir, "metadata.json")
f = open(metadataFile, "a")
f.write(json.dumps(metadata))
f.close()

results.insert(0, headers)
with open(os.path.join(output_dir, "result.csv"), "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(results)




Display a map with the results.


In [53]:
dfo = pd.read_csv(DATASET)
df = pd.read_csv(os.path.join(output_dir, "result.csv"))

# Create a map centered around the average latitude and longitude
map_center = [df['latitude'].mean(), df['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=10)

# Draw original lines connecting the points
folium.PolyLine(
    locations=dfo[['latitude', 'longitude']].values.tolist(),
    color='red',
    weight=2.5,
    opacity=0.5
).add_to(m)

# Add markers to the map
for _, row in df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Timestamp: {row['timestamp']}<br>Temp_1: {row['temp_1']}",
    ).add_to(m)

# Display the map
display (m)