### Generating sensor data

This notebook is used to generate sensor data similar to real production data.

In [1]:
import pandas as pd
import numpy as np
from os.path import join
from datetime import datetime, timedelta

In [2]:
# Every second with this probability value will or will not be recorded.
REC_P = 0.5

In [21]:
def generate_batch(batch_id, start_datetime):
    '''generates several records of format TagName/DateTime/Value/StringValue'''
    t = start_datetime
    b = 'B{}'.format(10000+batch_id)
    coefs = np.random.rand(10)

    data = []
    # record batch on 1st equipment
    data.append(['EQ01.BATCH', str(t), np.nan, b])
    # record status 0 on 1st equipment
    data.append(['EQ01.STATUS', str(t), 0, 0])
    # generate a random time for the 1st equipment
    t1 = 600+np.random.randint(60)-30
    # record signals
    for i in range(t1):
        # record value
        if np.random.binomial(1, p=REC_P):
            value = coefs[0]*(t1/2 - abs(i-t1/2)) + np.random.rand()
            data.append(['EQ01.POWER', str(t), value, value])
        if np.random.binomial(1, p=REC_P):
            value = coefs[1]*(t1/2 - abs(i-t1/2)) + np.random.rand()
            data.append(['EQ01.TEMP', str(t), value, value])
        t += timedelta(seconds=1)
    t1_final = t
    # record batch on 2nd equipment
    data.append(['EQ02.BATCH', str(t), np.nan, b])
    # record status 0 on 2nd equipment
    data.append(['EQ02.STATUS', str(t), 0, 0])
    # generate a random time for the 1st equipment
    t20 = 600+np.random.randint(60)-30
    # record signals
    for i in range(t20):
        # record value
        if np.random.binomial(1, p=REC_P):
            value = coefs[2]*(t20/2 - abs(i-t20/2)) + np.random.rand()
            data.append(['EQ02.POWER', str(t), value, value])
        if np.random.binomial(1, p=REC_P):
            value = coefs[3]*(t20/2 - abs(i-t20/2)) + np.random.rand()
            data.append(['EQ02.TEMP', str(t), value, value])
        t += timedelta(seconds=1)
    # record status 1 on 2nd equipment
    data.append(['EQ02.STATUS', str(t), 1, 1])
    # generate a random time for the 1st equipment
    t21 = 600+np.random.randint(60)-30
    # record signals
    for i in range(t21):
        # record value
        if np.random.binomial(1, p=REC_P):
            value = coefs[4]*(t21/2 - abs(i-t21/2)) + np.random.rand()
            data.append(['EQ02.POWER', str(t), value, value])
        if np.random.binomial(1, p=REC_P):
            value = coefs[5]*(t21/2 - abs(i-t21/2)) + np.random.rand()
            data.append(['EQ02.TEMP', str(t), value, value])
        t += timedelta(seconds=1)
    # record status 2 on 2nd equipment
    data.append(['EQ02.STATUS', str(t), 2, 2])
    # generate a random time for the 1st equipment
    t22 = 600+np.random.randint(60)-30
    # record signals
    for i in range(t22):
        # record value
        if np.random.binomial(1, p=REC_P):
            value = coefs[6]*(t22/2 - abs(i-t22/2)) + np.random.rand()
            data.append(['EQ02.POWER', str(t), value, value])
        if np.random.binomial(1, p=REC_P):
            value = coefs[7]*(t22/2 - abs(i-t22/2)) + np.random.rand()
            data.append(['EQ02.TEMP', str(t), value, value])
        t += timedelta(seconds=1)
    # record batch on 3rd equipment
    data.append(['EQ03.BATCH', str(t), np.nan, b])
    # record status 0 on 3rd equipment
    data.append(['EQ03.STATUS', str(t), 0, 0])
    # generate a random time for the 1st equipment
    t3 = 600+np.random.randint(120)-60
    # record signals
    for i in range(t3):
        # record value
        if np.random.binomial(1, p=REC_P):
            value = coefs[8]*(t3/2 - abs(i-t3/2)) + np.random.rand()
            data.append(['EQ03.POWER', str(t), value, value])
        if np.random.binomial(1, p=REC_P):
            value = coefs[9]*(t3/2 - abs(i-t3/2)) + np.random.rand()
            data.append(['EQ03.TEMP', str(t), value, value])
        t += timedelta(seconds=1)
    target = np.sum(np.power(coefs, 2)) + np.dot(coefs[:5], coefs[5:])
    return data, t1_final, t, target

Data is exported in daily exports.

In [6]:
def split(batch_data):
    this_day = []
    next_day = []
    for x in (batch_data):
        if datetime.strptime(x[1], "%Y-%m-%d %H:%M:%S").hour > 12:
            this_day.append(x)
        else:
            next_day.append(x)
    return this_day, next_day

In [None]:
np.random.seed(10)
start_datetime = datetime.strptime("2019-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
t = start_datetime
day_data = []
target_data = []

for batch_id in range(1, 500):
    # generate batch
    batch_data, t1_final, t, target = generate_batch(batch_id, t)
    target_data.append(['B{}'.format(10000+batch_id), target])
    # check if batch goes through 2 days
    if (t-start_datetime).days > 0:
        this_day, next_day = split(batch_data)
        day_data.extend(this_day)
        # save data
        df = pd.DataFrame(data=day_data, columns=['TagName', 'DateTime', 'Value', 'StringValue'])
        df.to_csv(join('..', 'data', str(start_datetime)[:10] + '.csv'), index=False)
        # start new day
        day_data = next_day
        start_datetime += timedelta(seconds=24*60*60)
    else:
        # add batch data
        day_data.extend(batch_data)
    t = t1_final + timedelta(seconds=1800)

Target values are located in a separate file.

In [17]:
target = pd.DataFrame(data=target_data, columns=['Batch', 'Target'])
target.to_csv(join('..', 'data', 'target.csv'), index=False)