In [145]:
import pandas as pd
import numpy as np
from os.path import join
from datetime import datetime, timedelta

In [146]:
def generate_batch(batch_id, start_datetime):
    '''generates several records of format TagName/DateTime/Value/StringValue'''
    t = start_datetime
    b = 'B{}'.format(10000+batch_id)
    data = []
    # record batch on 1st equipment
    data.append(['EQ01.BATCH', str(t), np.nan, b])
    # record status 0 on 1st equipment
    data.append(['EQ01.STATUS', str(t), 0, 0])
    # record batch on 2nd equipment
    t += timedelta(seconds=1800+np.random.randint(120)-60)
    data.append(['EQ02.BATCH', str(t), np.nan, b])
    # record status 0 on 2nd equipment
    data.append(['EQ02.STATUS', str(t), 0, 0])
    t += timedelta(seconds=2400+np.random.randint(120)-60)
    # record status 1 on 2nd equipment
    data.append(['EQ02.STATUS', str(t), 1, 1])
    t += timedelta(seconds=3000+np.random.randint(120)-60)
    # record status 2 on 2nd equipment
    data.append(['EQ02.STATUS', str(t), 2, 2])
    t += timedelta(seconds=1200+np.random.randint(120)-60)
    # record batch on 3rd equipment
    data.append(['EQ03.BATCH', str(t), np.nan, b])
    # record status 0 on 3rd equipment
    data.append(['EQ03.STATUS', str(t), 0, 0])    
    t += timedelta(seconds=600+np.random.randint(120)-60)
    
    return data, t

In [147]:
def generate_signal(equipment, tag, start_datetime, num=80000):
    '''generates several records of format TagName/DateTime/Value/StringValue'''
    t = start_datetime
    data = []
    for i in range(num):
        # record value
        value = np.random.randint(10)
        data.append(['{}.{}'.format(equipment, tag), str(t), value, value])
        t += timedelta(seconds=1)
    return data

In [148]:
def split(batch_data):
    this_day = []
    next_day = []
    for x in (batch_data):
        if datetime.strptime(x[1], "%Y-%m-%d %H:%M:%S").hour > 12:
            this_day.append(x)
        else:
            next_day.append(x)
    return this_day, next_day

In [149]:
start_datetime = datetime.strptime("2019-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
t = start_datetime
day_data = []

for batch_id in range(1, 100):
    # generate batch
    batch_data, t = generate_batch(batch_id, t)
    # check if batch goes through 2 days
    if (t-start_datetime).days > 0:
        this_day, next_day = split(batch_data)
        day_data.extend(this_day)
        # add signal from sensors
        signal = generate_signal('EQ1', 'POWER', start_datetime, 24*60*60)
        day_data.extend(signal)
        signal = generate_signal('EQ1', 'TEMP', start_datetime, 24*60*60)
        day_data.extend(signal)
        signal = generate_signal('EQ2', 'POWER', start_datetime, 24*60*60)
        day_data.extend(signal)
        signal = generate_signal('EQ2', 'TEMP', start_datetime, 24*60*60)
        day_data.extend(signal)
        signal = generate_signal('EQ3', 'POWER', start_datetime, 24*60*60)
        day_data.extend(signal)
        signal = generate_signal('EQ3', 'TEMP', start_datetime, 24*60*60)
        day_data.extend(signal)
        # save data
        df = pd.DataFrame(data=day_data, columns=['TagName', 'DateTime', 'Value', 'StringValue'])
        df.to_csv(join('..', 'data', str(start_datetime)[:10] + '.csv'), index=False)
        # start new day
        day_data = next_day
        start_datetime += timedelta(seconds=24*60*60)
    else:
        # add batch data
        day_data.extend(batch_data)