# Case - 成型機PHM,  案例分類 - 關聯規則

Copyright © 2019 Hsu Shih-Chieh 

利用關連規則找出特定類型案例的參數變化規則

算法筆記: https://hackmd.io/@JHSU/BJCyWchPr


In [108]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import warnings, matplotlib, datetime
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
from datasets import load_moldcase
from utils import set_font_cn
from IPython.display import display
from tqdm.notebook import tqdm
from collections import Counter
from sklearn.preprocessing import MinMaxScaler
from apyori import apriori
set_font_cn()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load Data

In [2]:
clist=[]
for i in range(1,6):
    clist.append("case%02d"%i)


In [3]:
data= load_moldcase(clist)
print(data.DESCR)

成型機案例數據
---------------------------

**Data Set Characteristics::**

    :控制器數據: 從控制器取出的螺桿位置, 油壓缸壓力, 料管溫度的數據統計值, 總共有44種數據
    
    :設備健康值數據: 透過高維度SPC監控方法(PCA T2/SPE)計算後的設備健康值, 設備運作時實時運算, 並存入sqlite
    
    :spccol_mapping: PLC數據欄位的中英文對照表
    
    :caseinfo: 每一個案例的詳細資訊



**讀取數據Sample Code**

::
    data= load_moldcase()
    c = data.case01_caseinfo
    df_ctr = data.case01_plc
    df_hv = data.case01_hv


**Model Characteristics**
    
    :設備異常監控: PCA T2/SPE
    
    :設備異常分類(方法一): 案例的參數趨勢分析
        - Source Code: data_molding_TrendAnalysis.ipynb
        - 算法筆記: https://hackmd.io/@JHSU/By3uWuwPH
    
    :設備異常分類(方法一): 分析每一個兩個案例的之間的參數變化相似性
        - Source Code: data_molding_DTW.ipynb
        - 算法筆記: https://hackmd.io/@JHSU/HyCnabcPH


專案說明：
這個專案與控制器廠商合作, 從控制器中取出螺桿位置, 油壓缸壓力, 料管溫度這三項特徵數據, 但因為控制器性能限制, 無法將實時數據取出, 退而求其次, 透過控制器本身的SPC監控功能, 取出這三項特徵的44種統計數據(ex: 最大值, 最小值,...), 並透過這些數據進行設備異常監控與設備異常分類





### Pre-process
- 準備關聯分析要用的數據
    - 訓練案例數據 - df_evt

In [4]:
spc_feature=['SPC_7','SPC_6','SPC_40','SPC_9','SPC_19','SPC_20','SPC_24','SPC_28','SPC_32','SPC_33']
spccol_mapping = data.spccol_mapping
#準備數據 (Case1~5 清模案例)
dflist = []
for cname in clist:
    c = data[f'{cname}_caseinfo']
    print(c.cname)
    df_ctr = data[f'{cname}_plc'] 
    df_ctr['dt'] = df_ctr['dt'].map(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f'))
    df_hv = data[f'{cname}_hv'] 
    df_hv['is_defect'] =  df_hv['isdefect_clamp'] + df_hv['isdefect_eject'] + df_hv['isdefect_inject'] + df_hv['isdefect_temp']
    df_hv['is_defect'] = df_hv['is_defect'].map(lambda x:x>=1)
    defectmolds = df_hv[df_hv['is_defect']>=1]['moldidx'].values
    df_ctr_defect = df_ctr[df_ctr['SPC_0'].isin(defectmolds)]
    df_ctr_defect = df_ctr_defect.loc[c.evt_str:c.evt_end]        
    df_evt = df_ctr.loc[c.evt_str:c.evt_end]        
    df_ctr_ok = df_ctr[~df_ctr['SPC_0'].isin(defectmolds)]
    df_evtDev =df_evt[spc_feature]-df_ctr_ok[spc_feature].median()
    df_evtDev =df_evtDev.applymap(lambda x: '變大' if x>0 else '變小' if x!=0 else '不變')
    df_evtDev['isDefect'] = df_evt['SPC_0'].map(lambda x: '異常' if x in defectmolds else '正常')
    print(df_evtDev.shape)
    dflist.append(df_evtDev)

    
df_evt = pd.concat(dflist)
df_evt['isDefect'].value_counts()

for f in spc_feature:
    df_evt[f] = spccol_mapping[f] +' '+ df_evt[f]



D2 A05 9/10 清模
(11, 11)
D2 A05 9/11 清模
(9, 11)
D2 A05 9/12 清模
(7, 11)
D2 A01 9/9 清模
(8, 11)
D2 A05 9/9 清模
(7, 11)


### 關聯分析
以下為分析步驟
1. 設定規則篩選條件
    - support > 健康值異常數據點出現機率的一半
    - confidence > 0.8
    - lift > 1
2. 只留下Y為“異常”的規則
    分析結果 - rulebase  

In [14]:

min_support = df_evt[df_evt['isDefect']=='異常'].shape[0]/df_evt.shape[0]/2
association_rules=apriori(np.array(df_evt),min_support=min_support, min_confidence=0.8, min_lift=1, max_length=11) 
association_results = list(association_rules)

rulebase = []
for rule in association_results:
    support = rule.support
    items = rule.items   
    for st in rule.ordered_statistics:
        x = st.items_base
        y = st.items_add
        confidence = st.confidence
        lift = st.lift
        if ('異常' in list(y)):  
            rulebase.append((','.join(x),np.round(confidence,2),np.round(support,2),np.round(lift,2)))

rulebase = sorted(rulebase, key=lambda x : x[1], reverse=True)    
print("規則數量",len(rulebase))
display(rulebase)


規則數量 31


[('溫度2段 變小,開模峰速 變大,頂出終點 變小', 0.86, 0.29, 1.89),
 ('溫度2段 變小,溫度3段 變大,開模峰速 變大,頂出終點 變小', 0.86, 0.29, 1.89),
 ('溫度2段 變小,射出最前 變小,開模峰速 變大,頂出終點 變小', 0.85, 0.26, 1.87),
 ('溫度2段 變小,射出終點 變小,開模峰速 變大,頂出終點 變小', 0.85, 0.26, 1.87),
 ('溫度2段 變小,開模峰速 變大,頂出終點 變小,射出開始位置 變大', 0.85, 0.26, 1.87),
 ('射出最前 變小,開模峰速 變大,溫度2段 變小,頂出終點 變小,射出終點 變小', 0.85, 0.26, 1.87),
 ('射出最前 變小,開模峰速 變大,溫度2段 變小,頂出終點 變小,溫度3段 變大', 0.85, 0.26, 1.87),
 ('開模峰速 變大,溫度2段 變小,頂出終點 變小,射出終點 變小,溫度3段 變大', 0.85, 0.26, 1.87),
 ('開模峰速 變大,溫度2段 變小,頂出終點 變小,射出開始位置 變大,溫度3段 變大', 0.85, 0.26, 1.87),
 ('射出最前 變小,開模峰速 變大,溫度2段 變小,頂出終點 變小,射出終點 變小,溫度3段 變大', 0.85, 0.26, 1.87),
 ('開模峰速 變大,頂出峰速 變小,射出開始位置 變大', 0.83, 0.24, 1.84),
 ('溫度2段 變小,開模峰速 變大,頂出峰速 變小', 0.83, 0.24, 1.84),
 ('溫度3段 變大,開模峰速 變大,頂出峰速 變小', 0.83, 0.24, 1.84),
 ('射出最前 變小,開模峰速 變大,射出開始位置 變大,頂出終點 變小', 0.83, 0.24, 1.84),
 ('射出終點 變小,開模峰速 變大,頂出終點 變小,射出開始位置 變大', 0.83, 0.24, 1.84),
 ('溫度2段 變小,開模峰速 變大,頂出峰速 變小,射出開始位置 變大', 0.83, 0.24, 1.84),
 ('頂出峰速 變小,溫度3段 變大,開模峰速 變大,射出開始位置 變大', 0.83, 0.24, 1.84),
 ('溫度2段 變小,溫度3段 變大

### Inference
- 推論Case和案例規則庫的吻合程度

Issue:
- 吻合的筆數很少

- TODO: 更改score的算法
    1. 算每一條規則的IOU (交集/連集)
    2. 平均分數

In [98]:

def trendAnzlysis(cid): 
    '''
    參考 data_molding_TrendAnalysis.ipynb 
    '''
    data_infer= load_moldcase([cid])
    c = data_infer[f'{cid}_caseinfo'] 
    df_ctr = data_infer[f'{cid}_plc']
    df_hv = data_infer[f'{cid}_hv']
    df_ctr['dt'] = df_ctr['dt'].map(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f'))
    df_hv['is_defect'] =  df_hv['isdefect_clamp'] + df_hv['isdefect_eject'] + df_hv['isdefect_inject'] + df_hv['isdefect_temp']
    df_hv['is_defect'] = df_hv['is_defect'].map(lambda x:x>=1)
    defectmolds = df_hv[df_hv['is_defect']>=1]['moldidx'].values
    df_ctr_defect = df_ctr[df_ctr['SPC_0'].isin(defectmolds)]
    df_ctr_defect = df_ctr_defect.loc[c.evt_str:c.evt_end]        
    df_ctr_ok = df_ctr[~df_ctr['SPC_0'].isin(defectmolds)]
    spc_feature=['SPC_7','SPC_6','SPC_40','SPC_9','SPC_19','SPC_20','SPC_24','SPC_28','SPC_32','SPC_33']
    spccol_mapping = data.spccol_mapping

    df_ctr_defectDev =df_ctr_defect[spc_feature]-df_ctr_ok[spc_feature].median()
    df_ctr_defectDev =df_ctr_defectDev.applymap(lambda x: '變大' if x>0 else '變小' if x!=0 else '不變')

    def fn_cnt(col):
        word_counts = Counter(col)
        ret = pd.Series([word_counts['不變'],word_counts['變小'],word_counts['變大']])
        return ret

    df_devSummary = df_ctr_defectDev.apply(lambda x: fn_cnt(x))
    df_devSummary = df_devSummary.T
    df_devSummary.columns=['不變','變小','變大']
    summ=df_devSummary.sum(axis=1)
    df_devSummary['不變%']=df_devSummary['不變']/summ
    df_devSummary['變小%']=df_devSummary['變小']/summ
    df_devSummary['變大%']=df_devSummary['變大']/summ

    def fn_summary(row):
        tr = np.argmax(row)
        v = str(row[tr]/sum(row)*100)+'%'
        return ' '.join([tr,v])

    df_devSummary['summary']=df_devSummary[['不變','變小','變大']].apply(fn_summary, axis=1)
    df_devSummary['paraname'] = df_devSummary.index.map(lambda x:spccol_mapping[x])
    df_devSummary['trend'] = df_devSummary['summary'].map(lambda x: x.split(' ')[0])
    df_devSummary['feature'] = df_devSummary[['paraname', 'trend']].apply(lambda x: ' '.join(x), axis=1)                
    case_feature = df_devSummary['feature'].values        
    #print(case_feature)
    return case_feature





In [107]:
#case_feature=['溫度3段 變大','射出開始位置 變大','射出終點 變小','溫度2段 變小','頂出終點 變小','開模峰速 變大','射出最前 變小']
case_feature = trendAnzlysis('case07')
score=0
for i,r in enumerate(rulebase[0:1]):
    r_flag=True
    for e in r[0].split(','):
        #check e in case feature, if false, set flag to false and break
        if e not in case_feature:
            r_flag=False
            break
    if r_flag:
        score=score+1

print('score:',score)       

score: 0
