In [1]:
import os
import pickle
from string import Template
from collections import namedtuple

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd

import sys
sys.path.insert(0, '..')
from src.data import TimeSeries
from src.methods.spc import FControlChart, PatternFunction
from src.eval import (
    mean_time_from_event,
    classification_metrics
)

In [9]:
data = {}
for dir in os.listdir('../data'):
    if dir == 'wrapper-machine':
        continue

    print(dir.upper())
    # Data loading
    data_dir_path = os.path.join('../data', dir)
    if dir == 'nitrogen-generator':
        tf = '%Y-%m-%d %H:%M:%S'
    else:
        tf = '%Y-%m-%d %H:%M:%S.%f'

    train_ts = TimeSeries.from_csv(
        'pandas',
        os.path.join(data_dir_path, 'train.csv')
    )
    test_ts = TimeSeries.from_csv(
        'pandas',
        os.path.join(data_dir_path, 'test.csv')
    )

    # Data prep
    train_ts.parse_datetime('timestamp', tf)
    test_ts.parse_datetime('timestamp', tf)

    train_ts.split_by_day()
    test_ts.split_by_day()
    
    

    temp = pd.concat(
        train_ts.time_series[k].drop(
            columns=['timestamp','PW_0.5h','date','time']
        ) for k in train_ts.time_series.keys()
    )

    FEATURE_COLS = [
        c for c in temp.columns if np.std(temp[c])!=0
    ]
    LABEL_COL = 'PW_0.5h'
    data.update({dir: {'train': train_ts, 'test': test_ts, 'cols': FEATURE_COLS}})
    temp = None
    del temp

BLOOD-REFRIGERATOR
NITROGEN-GENERATOR


In [5]:
for ds, ts in data.items():
    train_count = 0
    train_stop = 0
    test_count = 0
    test_stop = 0
    print(ds, len(ts['train'].time_series), len(ts['test'].time_series))
    for t in ts['train'].time_series.values():
        train_count += len(t)
        train_stop += t['PW_0.5h'].sum()
    for t in ts['test'].time_series.values():
        test_count += len(t)
        test_stop += t['PW_0.5h'].sum()
    print(train_count, test_count)
    print(train_stop, test_stop)

blood-refrigerator 25 27
60166 65763
704 642
nitrogen-generator 29 8
40354 11162
810 242


In [10]:
for ds, ts in data.items():
    print(ts)
    print(ts['cols'])
    print(ts['train'].data.describe())
    print(ts['test'].data.describe())

{'train': <src.data.TimeSeries object at 0x79509ec27b50>, 'test': <src.data.TimeSeries object at 0x79509b971570>, 'cols': ['Product temperature base [°C]', 'Evaportator temperature base [°C]', 'Power supply [V]', 'Condenser temperature base [°C]', 'Instant power consumption [W]', 'Signal [DBM]', 'Door_alert', 'Door_close', 'Door_open', 'Machine_cooling', 'Machine_defrost', 'Machine_pause']}
['Product temperature base [°C]', 'Evaportator temperature base [°C]', 'Power supply [V]', 'Condenser temperature base [°C]', 'Instant power consumption [W]', 'Signal [DBM]', 'Door_alert', 'Door_close', 'Door_open', 'Machine_cooling', 'Machine_defrost', 'Machine_pause']
       Product temperature base [°C]  Evaportator temperature base [°C]  \
count                   6.016600e+04                       6.016600e+04   
mean                    3.779106e-17                      -2.966598e-16   
std                     1.000008e+00                       1.000008e+00   
min                    -1.402715e+0