# How to create a pipeline

This is a short intro to importing data from a local placement on your computer to the modeltest database. The process requires some manual work and must be adapted to each individual campaign. 

### STEP 1:

Initiate client

In [None]:
client = SDKclient()

### STEP 2:

Create the campaign with manual data insertion.

In [None]:
campaign = client.campaign.create(name=campaign_dir.split("/")[-1],
                                      description="Modeltest for SWACH",
                                      date=get_datetime_date("180120120000"),
                                      location="STADT TOWING TANK",
                                      waterline_diameter=70, 
                                      scale_factor=75,
                                      water_density=1025, 
                                      water_depth=4.1 * 75,
                                      transient=3 * 60 * 60)

### STEP 3:

Add all sensors with manual data insertion.
This requires lookup of all data for all sensors. The following example is one of many sensor imports.

In [None]:
client.sensor.create(name='Wave 3 Front Slot 7 AI2-AI10',
                         description='Wave front - C',
                         unit='mm',
                         kind='length',
                         x=(1925/1000)*75,
                         y=(370/1000)*75,
                         z=None,
                         is_local=True,
                         campaign_id=campaign.id)

### STEP 4a:

Set file location and concepts for import

In [None]:
import os

campaign_dir = "C:/Users/hly/Documents/STT"
concept_ids = ["M206", "M207"]

### STEP 4b:

Locate and open all wind current calibration in file structure. Each file structure is unique, same for file names. In the following example, wave current calibrations are imported with the test info gathered from either filename or the file itself. 

In [None]:
os.chdir(campaign_dir)
os.chdir(os.getcwd() + "\\" + "WaveCalib")
calibs = os.listdir(path='.')
for calib in calibs:
    # find wave spectrum and wave height+period
    wave_spectrum = calib.split("_")[0]
    if wave_spectrum == "Irreg":
        wave_spectrum = "jonswap"  # jonswap er forsøkt tilnærmet i SWACH testene
    if wave_spectrum == "Reg":
        wave_spectrum = "regular"
    wave_height = calib.split("_")[1]
    wave_height = float(wave_height.split("s")[1])
    wave_period = calib.split("_")[2]
    wave_period = float(wave_period.split("p")[1])
    gamma = find_gamma(wave_height, wave_period)

    # find test date and time
    os.chdir(os.getcwd() + "\\" + calib)
    times = os.listdir(path='.')
    date = times[0].split(" ")[1]
    timestamp = times[0].split(" ")[2]
    date_time = date + timestamp

    wave_current_calibration = client.wave_current_calibration.create(description=calib,
                                                                      test_date=get_datetime_date(date_time),
                                                                      campaign_id=campaign.id,
                                                                      wave_spectrum=wave_spectrum,
                                                                      wave_period=wave_period,
                                                                      wave_height=wave_height,
                                                                      gamma=gamma,
                                                                      wave_direction=0,
                                                                      current_velocity=0,
                                                                      current_direction=0)

    for time in times:
        os.chdir(os.getcwd() + "\\" + time)
        files = [os.getcwd() + "\\" + x for x in os.listdir(path='.') if x.split(" ")[0] == time.split(" ")[0]]
        for file in files:

            # NOTE: THIS FUNCTION READS AND IMPORTS TIMESERIES
            read_datapoints_from_csv_with_pandas(file=file, test_id=wave_current_calibration.id,client=client)
        os.chdir(get_parent_dir(os.getcwd()))
    os.chdir(get_parent_dir(os.getcwd()))

### STEP 4c:

Locate and open all wind current calibration in file structure. Each file structure is unique, same for file names. In the following example, wave current calibrations are imported with the test info gathered from either filename or the file itself. 

Notice the add_floater_test function, whic is step 5

In [None]:
os.chdir(campaign_dir)
for concept_id in concept_ids:
    os.chdir(campaign_dir + "\\" + concept_id)
    tests = os.listdir(path='.')
    for test in tests:
        os.chdir(os.getcwd() + "\\" + test)
        times = [x for x in os.listdir(path='.') if os.path.isdir(x)]
        date = times[0].split(" ")[1]  # Fetch the date from directory name
        timestamp = times[0].split(" ")[2]
        date_time = date + timestamp
        for time in times:
            os.chdir(os.getcwd() + "\\" + time)
            files = [os.getcwd() + "\\" + x for x in os.listdir(path='.') if
                      x.split(" ")[0] == test]  # Only add to test files if start with test name

            floater_test = add_floater_test(files=files,
                                            campaign=campaign,
                                            testname=test,
                                            date=get_datetime_date(date_time),
                                            concept_id=concept_id, client=client)

            os.chdir(get_parent_dir(os.getcwd()))
        os.chdir(get_parent_dir(os.getcwd()))
    os.chdir(get_parent_dir(os.getcwd()))

### STEP 5:

Import all floater tests, and add relevant data for given test. 
In the future the individual tests will also have a concept id connected to them, and so this will need to be added in pipeline.

In [None]:
orientation = 0  # for alle i SWACH
if concept_id == "M206":
    draft = 29.5  # Fra Specs
if concept_id == "M207":
    draft = 18

x = testname.split("_")[0]
# hardkoding av category basert på filnavn
if x == "waveReg":
    category = "regular wave"
elif x == "waveIrreg":
    category = "irregular wave"
elif x[0:3] == "X30" or x[0:3] == "Y30" or x[0:3] == "X20" or x[0:3] == "Y20":
    category = "decay"
elif x[0:3] == "X10":
    category = "pull out"
try:
    wave_id = client.wave_current_calibration.get_id(waveCalibDict[testname])
except:
    wave_id = None

floater_test = client.floater.create(description=testname,
                                      test_date=date,
                                      campaign_id=campaign.id,
                                      # measured_hs=10,  # en random verdi
                                      # measured_tp=13,  # en random verdi
                                      category=category,
                                      orientation=orientation,
                                      draft=draft,
                                      wave_id=wave_id,
                                      wind_id=None)

for file in files:
    read_datapoints_from_csv_with_pandas(file=file, test_id=floater_test.id, client=client)

### STEP 6:

Function to read datapoints for a given file and import them. Final part of the import

In [None]:
def str_to_datetime(s):
    if len(s) == 8:
        hour = int(s[0:2])
        min = int(s[3:5])
        sec = int(s[6:8])
        return datetime.datetime(year=1900, month=1, day=1, hour=hour, minute=min, second=sec)
    else:
        hour = int(s[0:2])
        min = int(s[3:5])
        sec = int(s[6:8])
        ms = int(s[9:15])
        return datetime.datetime(year=1900, month=1, day=1, hour=hour, minute=min, second=sec, microsecond=ms)


def read_datapoints_from_csv_with_pandas(file, test_id, client: SDKclient):
    df = pd.read_csv(file, sep=';')

    col_names = list(df.columns)
    for sensor in col_names[1:]:
        sensor_strip = sensor.strip()
        tic = timer.perf_counter()
        sensor_id = client.sensor.get_id(sensor_strip)
        timeseries = client.timeseries.create(sensor_id=sensor_id,
                                              test_id=test_id)
        datapoints = df[[col_names[0], sensor]].values.tolist()
        start_time, start_value = datapoints[0]

        start_time = str_to_datetime(start_time)

        for time, value in datapoints:
            if pd.isna(value):
                continue
            time_point = str_to_datetime(time)

            datapoint = DataPoint(timeseries_id=timeseries.id,
                                  time=(time_point - start_time).total_seconds(),
                                  value=value,
                                  client=client)
            timeseries.data_points.append(datapoint)
        timeseries.post_data_points()
        toc = timer.perf_counter()
        print(f"Posting timeseries for sensor {sensor} in file {file} took  {toc - tic:0.4f} seconds")
