# Extract Time Series Data from SAP IOT

This notebook will download the time series data from SAP IOT and store the downloaded files in the
folder `./migration-data/iot-download`. 

## Pre-requirements

- within the `notebooks` folder there must be a file `.env` which contains the credentials to
access SAP PAI and SAP IOT. See file `.env.sample` as a reference for all needed parameters
to be maintained. By default you'll get the details of the parameters from the PAI and IOT
service key.

## Steps in the notebook

- install requirements
- fetch all equipment models which are linked to a thing type in SAP IOT
- get all property set types for each thing type
- initiate the download for each property set type within the date range (yearly slices)
- store the result of the download trigger
- check for all triggered downloads when they are ready
- finally download the iot-data to the folder `migration-data/iot-download`

For detailed description see [documentation](../docs/iot-time-series-data.md).

In [None]:
# Define the config you want to use
CONFIG_ID = 'dca-test'

In [None]:
%pip install -r ../requirements.txt

In [None]:
# initialize sqlite database
from modules.pai.models import ModelAPIWrapper
from modules.iot.iot import SAPIoTAPIWrapper

iot_wrapper = SAPIoTAPIWrapper(config_id=CONFIG_ID)
model_wrapper = ModelAPIWrapper(config_id=CONFIG_ID)
equi_model = list()
equi_model = model_wrapper.get_equipment_models()
thing_types = iot_wrapper.get_thing_types()

print(f"fetched {len(equi_model)} equipment models")

# we expect that every model has a thing type
# iterate over all equipment models and check if the thing type exists
thing_types_found = []
for model in equi_model:
    search_terms = model["modelSearchTerms"].split(',')
    found = False
    for term in search_terms:
        if term in thing_types:
            thing_types_found.append(term)
            found = True
            break
    if not found:
        print(f"thing type {model['modelSearchTerms']} not found")

print(f"found {len(thing_types_found)} equipment models with existing thing types")

# Database Setup

We'll create a new database table "iot_export_status" where we store the
request id from the time series cold store. Later we use the status
column to keep track which files can be downloaded.

In [None]:

from sqlalchemy import create_engine
from modules.database.tables import meta_obj
from modules.util.config import get_config_by_id

config = get_config_by_id(CONFIG_ID)
engine = create_engine(config["database"]["connection"], echo=False)

with engine.connect() as conn:
    print("creating iot_export_status table")
    meta_obj.drop_all(engine)
    meta_obj.create_all(engine)
    # check that the table is empty
    
    # conn.execute(iot_export_status_table.delete())
    # result = conn.execute(iot_export_status_table.select())
    # print(result.fetchall())
    conn.commit()

In [None]:
from modules.iot.iot import SAPIoTAPIWrapper
from modules.util.helpers import generate_yearly_slices
from sqlalchemy import create_engine, func, select
from modules.database.tables import iot_export_status_table
from modules.util.config import get_config_by_id, get_system_by_type
import time

config = get_config_by_id(CONFIG_ID)
iot_config = get_system_by_type(config, "IOT")
engine = create_engine(config["database"]["connection"], echo=False)

iot_wrapper = SAPIoTAPIWrapper(config_id=CONFIG_ID)
thing_types = thing_types_found
time_slices = generate_yearly_slices(
    iot_config["config"]["time_range_from"], iot_config["config"]["time_range_to"]
)
property_set_type_ignore_list = list()

# limit the thing_types for testing to 5 entries
# thing_types = thing_types[:5]

with engine.connect() as conn:    
    # iterate over all thing types
    for thing_type in thing_types:
        # get property sets of the thing type
        property_sets = iot_wrapper.get_property_sets_by_thing_type(thing_type)

        for property_set_type in property_sets:
            # check if this property set type is in the ignore list
            if property_set_type in property_set_type_ignore_list:
                # if the property set type is in the ignore list, continue with the next property set type
                continue
            print(f"start export for {property_set_type}")
            # iterate over all time slices
            for time_slice in time_slices:
                # get the data for the thing type and the time slice
                try: 
                    # check if this property set type is already in the iot_export_status table
                    # if it is already in the table, we don't need to export it again
                    query = iot_export_status_table.select().where(
                        (iot_export_status_table.c.tenant_id == config["config_id"]) &
                        (iot_export_status_table.c.thing_type == thing_type) &
                        (iot_export_status_table.c.property_set_type == property_set_type) &
                        (iot_export_status_table.c.start_date == time_slice[0].strftime("%Y-%m-%d")) &
                        (iot_export_status_table.c.end_date == time_slice[1].strftime("%Y-%m-%d"))
                    )
                    status = conn.execute(query).fetchall()

                    if status != []:
                        # if the status is not None, the export is already done
                        continue

                    # initiate the export
                    request_id = iot_wrapper.initiate_time_series_export(
                        property_set_type=property_set_type,
                        start_date=time_slice[0].strftime("%Y-%m-%d"),
                        end_date=time_slice[1].strftime("%Y-%m-%d"),
                    )
                    # store the response id in the thing_types_status dictionary to keep track of the exports
                    # with reference to the start and end date of the export
                    stmt = iot_export_status_table.insert().values(
                        tenant_id=config["config_id"],
                        thing_type=thing_type,
                        property_set_type=property_set_type,
                        start_date=time_slice[0].strftime("%Y-%m-%d"),
                        end_date=time_slice[1].strftime("%Y-%m-%d"),
                        status="Initiated",
                        request_id=request_id,
                    )

                    conn.execute(stmt)
                    conn.commit()

                except Exception as e:
                    if e.response.status_code == 404:
                        # add this property set type to an ignore list and none of the time slices will be exported
                        property_set_type_ignore_list.append(property_set_type)
                        continue
                    # store the error message in the iot_export_status table
                    error_message = e.response.json()
                    if "message" in error_message:
                        message = error_message["message"]

                    else:
                        message="Unknown error"

                    stmt = iot_export_status_table.insert().values(
                        tenant_id=config["config_id"],
                        thing_type=thing_type,
                        property_set_type=property_set_type,
                        start_date=time_slice[0].strftime("%Y-%m-%d"),
                        end_date=time_slice[1].strftime("%Y-%m-%d"),
                        status="Error",
                        message=message,
                        request_id="None",
                    )

                    conn.execute(stmt)
                    conn.commit()

                    continue

    # print status report: how many exports are initiated and how many failed
    status_combined = (
        select(iot_export_status_table.c.status, func.count().label("count"))
        .where(iot_export_status_table.c.status.in_(["Error", "Initiated"]))
        .group_by(iot_export_status_table.c.status)
    )

    result = conn.execute(status_combined).fetchall()
    status_dict = {row[0]: row[1] for row in result}
    status_error = status_dict.get("Error", 0)
    status_initiated = status_dict.get("Initiated", 0)

    print(f"{status_error} entries failed and {status_initiated} entries are initiated")

print("done")

# Check processing status

Next we need to check the processing status of all initiated downloads. Once all
downloads are ready to download we can continue with next step.

The following are the possible statuses:

- Initiated: The request is placed successfully.
- Submitted: The request for data export is initiated and the method is retrieving the data and preparing for the export process.
- Failed: The request for data export failed due to various reasons. The reasons are listed in the response payload.
- Exception: The system retried to initiate the data export but failed.
- Ready for Download: The request for data export succeeded and the data is available in a file format for download.
- Expired: The data that is ready for download is available only for seven days, beyond which the exported data is not available for download. You should re-initiate the request for data export.

In [None]:
from modules.iot.iot import SAPIoTAPIWrapper
from sqlalchemy import create_engine, and_
from modules.database.tables import iot_export_status_table
from modules.util.config import get_config_by_id, get_system_by_type

iot_wrapper = SAPIoTAPIWrapper(config_id=CONFIG_ID)

config = get_config_by_id(CONFIG_ID)
iot_config = get_system_by_type(config, "IOT")
engine = create_engine(config["database"]["connection"], echo=False)

with engine.connect() as conn:
    # select all initiated exports from the iot_export_status table
    query = iot_export_status_table.select().where(
        (iot_export_status_table.c.tenant_id == config["config_id"]) &
        (iot_export_status_table.c.status == "Initiated")
    )

    results = conn.execute(query).fetchall()

    # iterate over download_data dictionary and check the status of the exports
    all_exports_complete = False

    count = 1
    while not all_exports_complete:
        all_exports_complete = True
        for export_status in results:
            export_status = export_status._asdict()
            # get the status of the export
            status = iot_wrapper.get_time_series_export_status(
                request_id=export_status["request_id"]
            )
            print(
                f"Status for {export_status['thing_type']}/"
                f"{export_status['property_set_type']} from "
                f"{export_status['start_date']} to "
                f"{export_status['end_date']}: {status}"
            )

            if export_status["status"] != status:
                # update the status in the iot_export_status table
                stmt = (
                    iot_export_status_table.update()
                    .values(status=status)
                    .where(
                        and_(
                            iot_export_status_table.c.tenant_id == config["config_id"],
                            iot_export_status_table.c.thing_type == export_status["thing_type"],
                            iot_export_status_table.c.property_set_type == export_status["property_set_type"],
                            iot_export_status_table.c.start_date == time_slice[0].strftime("%Y-%m-%d"),
                            iot_export_status_table.c.end_date == time_slice[1].strftime("%Y-%m-%d"),
                        )
                    )
                )

                res = conn.execute(stmt)
                conn.commit()

                # update the status in the results list as well

                export_status['status'] = status

            # check if the status is one of the final statuses
            if status not in ["Failed", "Exception", "Ready for Download", "Expired"]:
                all_exports_complete = False

        # wait for some time before checking the status again
        if not all_exports_complete:
            time.sleep(30*count)
            count += 1

print("done")

In [None]:
import os
from modules.iot.iot import SAPIoTAPIWrapper
from sqlalchemy import create_engine, and_
from modules.database.tables import iot_export_status_table
from modules.util.config import get_config_by_id

config = get_config_by_id(CONFIG_ID)
iot_wrapper = SAPIoTAPIWrapper(config_id=CONFIG_ID)
engine = create_engine(config["database"]["connection"], echo=False)

DOWNLOAD_FOLDER = "../migration-data/iot-download/"

if not os.path.exists(DOWNLOAD_FOLDER):
    os.makedirs(DOWNLOAD_FOLDER)


with engine.connect() as conn:
    # select all exports that are ready for download
    query = iot_export_status_table.select().where(
        and_(
            iot_export_status_table.c.tenant_id == config["config_id"],
            iot_export_status_table.c.status == "Ready for Download",
        )
    )

    results = conn.execute(query).fetchall()

    # iterate over download_data dictionary and download the data
    for export_status in results:
        export_status = export_status._asdict()
        # download the data
        data = iot_wrapper.download_time_series_export(
            request_id=export_status["request_id"]
        )

        # create a folder under ../migration-data/iot-download/ with the name of the thing type if it does not exist
        if not os.path.exists(f"{DOWNLOAD_FOLDER}{export_status["thing_type"]}"):
            os.makedirs(f"{DOWNLOAD_FOLDER}{export_status["thing_type"]}")

        # save the data to a csv file
        with open(
            f"{DOWNLOAD_FOLDER}{export_status['thing_type']}/"
            f"{export_status['property_set_type']}-"
            f"{export_status['start_date']}-"
            f"{export_status['end_date']}.zip",
            "wb",
        ) as f:
            f.write(data)

        # update the status in the iot_export_status table
        stmt = (
            iot_export_status_table.update()
            .values(status="Downloaded")
            .where(
                and_(
                    iot_export_status_table.c.tenant_id == config["config_id"],
                    iot_export_status_table.c.thing_type == export_status["thing_type"],
                    iot_export_status_table.c.property_set_type == export_status["property_set_type"],
                    iot_export_status_table.c.start_date == time_slice[0].strftime("%Y-%m-%d"),
                    iot_export_status_table.c.end_date == time_slice[1].strftime("%Y-%m-%d"),
                )
            )
        )

        conn.execute(stmt)
        conn.commit()

print("done")

In [None]:
import gzip
import os
import shutil
import zipfile

DOWNLOAD_FOLDER = "../migration-data/iot-download/"

# read all files in the download folder
files = []
for root, dirs, filenames in os.walk(DOWNLOAD_FOLDER):
    for filename in filenames:
        if filename.endswith(".zip"):
            files.append(os.path.join(root, filename))

# iterate over all files
for file in files:
    # get the thing type, property set type, start date and end date from the file name
    file_name = os.path.basename(file)
    # the files needs to be unzipped first - crete a folder with the name of the file
    file_folder = file.replace(".zip", "")
    if not os.path.exists(file_folder):
        os.makedirs(file_folder)
    # unzip the file
    with zipfile.ZipFile(file, 'r') as zip_ref:
        print(f"unzipping {file}")
        zip_ref.extractall(file_folder)
    
        # now read all files with ending .gz in the folder where the file was unzipped
        for root, dirs, filenames in os.walk(file_folder):
            for filename in filenames:
                if filename.endswith(".gz"):
                    print(f"unzipping {filename}")
                    # unzip the file
                    with gzip.open(os.path.join(root, filename), 'rb') as f_in:
                        with open(os.path.join(root, filename.replace(".gz", "")),'wb') as f_out:
                            shutil.copyfileobj(f_in, f_out)
                        # we can delete the gz file after unzipping
                        os.remove(os.path.join(root, filename))


print("done")