In [17]:
import os

import numpy as np
import pandas as pd

import sys
sys.path.insert(0, '../')
from src.data import (
    TimeSeries,
    parse_datetime
)
from src.methods import spc

In [18]:
SEED = 3141

In [19]:
br = {
    'train': TimeSeries.from_csv(
        'pandas',
        '../data/blood-refrigerator/train.csv',
        sep=',',
    ),
    'test': TimeSeries.from_csv(
        'pandas',
        '../data/blood-refrigerator/test.csv',
        sep=',',
    )
}

In [20]:
for t,d in br.items():
    print(t, d.shape)

train (60166, 14)
test (65763, 14)


In [21]:
for t,d in br.items():
    d.parse_datetime('timestamp')

In [22]:
days = 0
for t,d in br.items():
    d.split_by_day()
    days += len(d.time_series)
days

52

In [23]:
counts = []
totals = []
for t, df in br.items():
    for date, d in df.time_series.items():
        counts.append(len(d[d['PW_0.5h']==1]))
        totals.append(len(d))
counts[:10], totals[:10]

([0, 0, 0, 0, 0, 0, 0, 0, 0, 55],
 [106, 2530, 2531, 2530, 2531, 2530, 2530, 2531, 2530, 2531])

In [46]:
id2day = {i:d for i,d in enumerate(br['train'].time_series.keys())}
train_days = [id2day[i] for i,c in enumerate(counts[:len(id2day)]) if c == 0]

y = pd.concat([
    br['train'].time_series[k]['PW_0.5h'] for k in train_days
])
X = pd.concat([
    br['train'].time_series[k].drop(
        columns=['timestamp','PW_0.5h','date','time']
    ) for k in train_days
])

# Drop std=0 variables
X = X[
    [c for c in X.columns if np.std(X[c]) != 0]
]

keep_cols = X.columns

print(X.shape)
np.mean(X, axis=0)

(43123, 11)


Product temperature base [°C]       -0.214097
Evaportator temperature base [°C]   -0.118012
Power supply [V]                    -0.227221
Condenser temperature base [°C]     -0.060278
Instant power consumption [W]        0.158707
Signal [DBM]                        -0.215734
Door_close                           0.999977
Door_open                            0.000023
Machine_cooling                      0.353709
Machine_defrost                      0.001623
Machine_pause                        0.644668
dtype: float64

In [47]:
def exceeds_5_breaches(values:np.ndarray, ucl):
    if (values > ucl).sum() >= 5:
        return True
    return False

In [48]:
chart = spc.FControlChart()

chart.determine_parameters(X.values)

exceeds_5per15 = spc.PatternFunction(
    exceeds_5_breaches,
    15,
    {'ucl': chart.ucl}
)

chart.add_patterns({'5per15at0.05':exceeds_5per15})

In [49]:
eval_keys = list(br['test'].time_series.keys())
k = eval_keys[0]
test_vals = br['test'].time_series[k][keep_cols].values
test_y = br['test'].time_series[k]['PW_0.5h']

hits = chart.check_patterns(test_vals)
hits

KeyboardInterrupt: 

In [36]:
test_y.sum()

0