# Requirements: wasteContainerValencia_LD.ipynb

In [None]:
! pip install \
    pathlib==1.0.1 \
    pandas==2.0.3 \
    missingno==0.5.2 \
    pysmartdatamodels==0.6.0

---

# Code

In [None]:
from pathlib import Path
import pandas as pd
import missingno as mn
from matplotlib import pyplot as plt


DATA_RAW = Path("data/private_datasets/wasteContainer_Valencia")

### Utils

In [None]:
from pysmartdatamodels import pysmartdatamodels as sdm


def attributes_checker(subject: str, datamodel: str, attribute: str) -> bool:
    """
    Check if attribute is in the datamodel.

    Args:
        subject (str): _description_
        datamodel (str): _description_
        attribute (str): _description_
    """
    if attribute in sdm.attributes_datamodel(subject, datamodel):
        return True
    else:
        return False

## Data cleaning

In [None]:
# list content in DATA_RAW
print(list(DATA_RAW.glob("*")))

df_wasteContainer = pd.read_csv(
    DATA_RAW / "contenidors-residus-solids-contenidores-residuos-solidos.csv", sep=";"
)
df_wasteContainer.head()

Filas repetidas

In [None]:
raw_n_rows = df_wasteContainer.shape[0]
print("Nº contenedores: ", raw_n_rows)
df_wasteContainer.drop_duplicates(inplace=True)
print("Nº contenedores duplicados: ", raw_n_rows - df_wasteContainer.shape[0])

Valores faltantes

In [None]:
# mn.matrix(df_wasteContainer, figsize=(10, 10))
# plt.show()

In [None]:
# drop rows with all NaN values
df_no_nan = df_wasteContainer.dropna(how="any").copy()
# calculate percentage of rows dropped
print("Nº contenedores sin NaN: ", df_no_nan.shape[0])
print(
    "Percentage of rows dropped: ",
    round(((raw_n_rows - df_no_nan.shape[0]) / raw_n_rows) * 100, 3),
    "%",
)

# change multiples data types
df_no_nan["Cod. Via"] = df_no_nan["Cod. Via"].astype("int64")
df_no_nan["Portal"] = df_no_nan["Portal"].astype("int64")

Seleccionamos trabajar con el dataset incompleto o reducido pero completo

In [None]:
df_wasteWork = df_no_nan.copy()
# df_wasteWork = df_wasteContainer.copy()

set(df_wasteWork.columns)

In [None]:
df_wasteWork.sort_values(by=["OBJECTID"], ascending=False).head()

In [None]:
df_wasteWork["Cod. Via"].value_counts()

In [None]:
df_wasteWork["Productor"].unique()

In [None]:
random_row = df_wasteWork.loc[14578, :]
random_row

In [None]:
random_row.Portal

## URLs

In [None]:
import requests
import json
import random
import time

IP_ORION = "fiware.orionld"
IP_QUANTUMLEAP = "fiware.quantumleap"
PROJECT_NAME = "citcom"

################################################################################
# do not edit!

URL_ORION_ENTITIES = f"http://{IP_ORION}:1026/ngsi-ld/v1/entities"
URL_ORION_SUBSCRIPTION = f"http://{IP_ORION}:1026/ngsi-ld/v1/subscriptions"

URL_QUANTUMLEAP_NOTIFY = f"http://{IP_QUANTUMLEAP}:8668/v2/notify"
URL_QUANTUMLEAP_QUERIES = f"http://{IP_QUANTUMLEAP}:8668/v2/entities"

HEADERS = {"Content-Type": "application/ld+json", "NGSILD-Tenant": PROJECT_NAME}

################################################################################

## Suscripciones

In [None]:
payload = {
    "description": "Filling level changes.",
    "type": "Subscription",
    "entities": [{"type": "WasteContainer", "idPattern": ".*"}],
    "watchedAttributes": ["fillingLevel", "location"],
    "notification": {
        "attributes": ["fillingLevel", "location"],
        "format": "normalized",
        "endpoint": {
            "uri": URL_QUANTUMLEAP_NOTIFY,
            "accept": "application/json",
        },
    },
    "@context": "https://raw.githubusercontent.com/smart-data-models/dataModel.WasteManagement/master/context.jsonld",
}


requests.post(URL_ORION_SUBSCRIPTION, headers=HEADERS, data=json.dumps(payload))

Checking the subscriptions

```
curl -X GET \
  'http://localhost:1026/ngsi-ld/v1/subscriptions/' \
  -H 'NGSILD-Tenant: openiot'
```

In [None]:
requests.get(URL_ORION_SUBSCRIPTION, headers={"NGSILD-Tenant": PROJECT_NAME}).json()

Checking supported operations

In [None]:
requests.options(URL_ORION_ENTITIES).headers

In [None]:
requests.options(f"{URL_ORION_ENTITIES}/urn:ngsi-ld:WasteContainer:001").headers

In [None]:
requests.options(f"{URL_ORION_ENTITIES}/urn:ngsi-ld:WasteContainer:001/attrs").headers

In [None]:
requests.options(
    f"{URL_ORION_ENTITIES}/urn:ngsi-ld:WasteContainer:001/attrs/fillingLevel"
).headers

## Agregamos nuevas entidades

In [None]:
id_entity = 0


def add_entity_from_row(row, entity_type):
    def get_id():
        """Increment id_counter and convert to string with 3 digits

        Returns:
            _type_: _description_
        """
        global id_entity
        id_entity += 1
        return str(id_entity).zfill(3)

    def get_status() -> str:
        """A random selector beetwen all this states

        Returns:
            _type_: _description_
        """
        import random

        states = [
            "ok",
            "lidOpen",
            "dropped",
            "moved",
            "vandalized",
            "burned",
            "unknown",
        ]
        return random.choice(states)

    def attribute_checker(payload: dict) -> None:
        # remove strings that start with "@"
        all_attri = list(filter(lambda x: not x.startswith("@"), list(payload.keys())))

        bool_attri = list(
            map(
                lambda attr: attributes_checker(  # slow function !!!
                    "dataModel.WasteManagement", "WasteContainer", attr
                ),
                all_attri,
            )
        )

        # get id from False values in bool_attri
        false_attri = [all_attri[i] for i, x in enumerate(bool_attri) if not x]

        assert (
            len(false_attri) == 0
        ), f"The following attributes are not in the datamodel: {false_attri}"
        print("[PASS!] - All attributes are in the datamodel.")

    id_n = get_id()
    payload_used = "payl_container"  # change this to change the payload used

    payl_container = {
        "id": f"urn:ngsi-ld:{entity_type}:{id_n}",
        "type": entity_type,
        "location": {
            "type": "Point",
            "coordinates": list(map(float, row["geo_point_2d"].split(", "))),
        },
        "address": {
            "type": "Property",
            "value": {
                "addressCountry": {"type": "string", "value": "Spain"},
                "addressLocality": {"type": "string", "value": "Valencia"},
                "streetAddress": {
                    "type": "string",
                    "value": str(row["Ubicació / Ubicación"]),
                },
                "streetNr": {"type": "string", "value": str(row.Portal)},
            },
        },
        "status": {"type": "Property", "value": get_status()},
        "storedWasteKind": {
            "type": "Property",
            "value": row["Tipus Contenidor / Tipo Contenedor"],
        },
        "fillingLevel": round(random.random(), 2),
        "@context": "https://raw.githubusercontent.com/smart-data-models/dataModel.WasteManagement/master/context.jsonld",
    }

    (
        attribute_checker(locals()[payload_used]) if id_entity == 1 else None
    )  # execute only in the first iteration, too slow function pysmartdatamodels

    return requests.post(
        URL_ORION_ENTITIES, headers=HEADERS, data=json.dumps(locals()[payload_used])
    )


def add_entities_from_df(df, entity_type="WasteContainer"):
    for _, row in df.iterrows():
        try:
            response = add_entity_from_row(row, entity_type)
            # print response if status code is not 201
            if response.status_code != 201:
                print(f"Object ID error: {row.OBJECTID}")
                print(f"Status code: {response.status_code}")
                print(f"Error response: {response.text}")
                # stop the loop if status code is not 201
                # break
            time.sleep(0.08)
        except AssertionError as e:
            print(f"[Assertion!] - {e}")
            break
        except Exception as e:
            print(f"Error: {e}")
            print(f"Row: {row}")


add_entities_from_df(df_wasteWork, "WasteContainer")

## Actualización variables

In [None]:
url = f"{URL_ORION_ENTITIES}/urn:ngsi-ld:WasteContainer:001/attrs/fillingLevel"
payload = {
    "value": 1,
    "@context": "https://raw.githubusercontent.com/smart-data-models/dataModel.WasteManagement/master/context.jsonld",
}

# requests.put(url, headers=HEADERS, data=json.dumps(payload))
requests.patch(url, headers=HEADERS, data=json.dumps(payload))

In [None]:
import time

# update all containers
count = 0
for id_n in list(range(1, len(df_wasteWork.index) + 1)):
    url = f"{URL_ORION_ENTITIES}/urn:ngsi-ld:WasteContainer:{str(id_n).zfill(3)}/attrs/fillingLevel"
    payload = {
        "value": round(random.random(), 2),
        "@context": "https://raw.githubusercontent.com/smart-data-models/dataModel.WasteManagement/master/context.jsonld",
    }

    # requests.put(url, headers=HEADERS, data=json.dumps(payload))
    requests.patch(url, headers=HEADERS, data=json.dumps(payload))
    count += 1
    # pause of 0.1 second
    time.sleep(0.08)

print(f"Updated {count} containers.")

## Time Series Data Queries (QuantumLeap API)

[Source](https://ngsi-ld-tutorials.readthedocs.io/en/latest/time-series-data.html#:~:text=QuantumLeap%20API%20%2D%20List%20The%20First%20N%20Sampled%20Values)


In [None]:
url = f"{URL_QUANTUMLEAP_QUERIES}/urn:ngsi-ld:WasteContainer:001/attrs/fillingLevel?limit=4"
headers = {
    "Accept": "application/json",
    "Fiware-ServicePath": "/",
}

requests.get(url, headers=headers).json()

## Listar entidades del broker

Muestra todos los atributos de las entidades

In [None]:
headers = {
    "Link": '<https://raw.githubusercontent.com/smart-data-models/dataModel.WasteManagement/master/context.jsonld>; rel="http://www.w3.org/ns/json-ld#context"; type="application/ld+json"',
    "NGSILD-Tenant": PROJECT_NAME,
    "Accept": "application/ld+json",
}
params = {"type": "WasteContainer"}

lst_entities = requests.get(URL_ORION_ENTITIES, headers=headers, params=params).json()
print(len(lst_entities))
display(lst_entities)

Muestra los atributos de las entidades que se le indiquen

In [None]:
headers = {
    "Link": '<https://raw.githubusercontent.com/smart-data-models/dataModel.WasteManagement/master/context.jsonld>; rel="http://www.w3.org/ns/json-ld#context"; type="application/ld+json"',
    "NGSILD-Tenant": PROJECT_NAME,
    "Accept": "application/ld+json",
}
params = {
    "type": "WasteContainer",
    "options": "keyValues",
    "attrs": "location,fillingLevel",
}

lst_entities = requests.get(URL_ORION_ENTITIES, headers=headers, params=params).json()
lst_entities

Usando la paginación para mostrar TODAS las entidades

In [None]:
headers = {
    "Link": '<https://raw.githubusercontent.com/smart-data-models/dataModel.WasteManagement/master/context.jsonld>; rel="http://www.w3.org/ns/json-ld#context"; type="application/ld+json"',
    "NGSILD-Tenant": PROJECT_NAME,
    "Accept": "application/ld+json",
}

params = {
    "type": "WasteContainer",
    "limit": 1000,  # Controla la paginación (máximo 1000)
    "offset": 0,  # Inicializa el offset en 0
    "options": "keyValues",
    "attrs": "location,fillingLevel",
}

all_entities = []

while True:
    response = requests.get(URL_ORION_ENTITIES, headers=headers, params=params)
    entities = response.json()
    all_entities.extend(entities)

    # Verifica si hay más páginas de resultados
    if len(entities) < params["limit"]:
        break

    # Ajusta el parámetro "offset" para la siguiente página
    params["offset"] += params["limit"]

print(f"Total de entidades recuperadas: {len(all_entities)}")
all_entities

In [None]:
# list of jsons to pandas dataframe
df = pd.json_normalize(all_entities)
df.drop(columns=["@context", "type"], inplace=True)
# split location column in two columns (latitude and longitude)
df[["latitude", "longitude"]] = pd.DataFrame(
    df["location.coordinates"].values.tolist(), index=df.index
)
df.drop(columns=["location.coordinates", "location.type"], inplace=True)

df