In [1]:
import numpy as np
import pandas as pd
import datetime
import os
import glob
import re
import fitz

# 2.timedataの作成

## 2.1 lotlistの読み込み
FLAG[2]を手入力済みのlotlistの読み込み・加工

In [2]:
def process_lotlist_data(lotlist_dir, LOT):
    
    print("2.1 is started")
    # read in csv files and concatenate into one dataframe
    files1 = glob.glob(lotlist_dir)
    df_list = [pd.read_csv(file, encoding="cp932") for file in files1]
    lotlist = pd.concat(df_list, axis=0, ignore_index=True)
    lotlist = lotlist.rename(columns={'lotno': 'LotNo'})

    # convert time columns to datetime type
    lotlist['DATETIME_S'] = pd.to_datetime(lotlist['DATETIME_S'])
    lotlist['DATETIME_E'] = pd.to_datetime(lotlist['DATETIME_E'])

    # cleansing
    lotlist_LOT = lotlist.loc[lotlist['LotNo'] == LOT]
    lotlist_LOT = lotlist_LOT.sort_values('DATETIME_S')
    serial_num = pd.RangeIndex(start=1, stop=len(lotlist_LOT.index) + 1, step=1)
    lotlist_LOT['uid'] = serial_num
    lotlist_LOT = lotlist_LOT.drop('Unnamed: 0',axis=1)
    lotlist_1 = lotlist_LOT.loc[(lotlist_LOT['FLAG'] == 1)]
    lotlist_2 = lotlist_LOT.loc[(lotlist_LOT['FLAG'] == 2)]

    lotlist_LOT = lotlist_LOT.loc[(lotlist_LOT['FLAG'] == 0)]
    lotlist_LOT['No'] = lotlist_LOT.groupby(['LotNo']).cumcount() + 1
    lotlist_LOT['No'] = lotlist_LOT['No'].astype(str)

    lotlist_LOT = pd.concat([lotlist_LOT, lotlist_2], axis=0)
    lotlist_LOT = lotlist_LOT.sort_values('DATETIME_S')
    lotlist_LOT['No'] = lotlist_LOT['No'].fillna(0)
    lotlist_LOT['No'] = lotlist_LOT['No'].astype(str)
    lotlist_LOT['ナンバリング'] = lotlist_LOT['LotNo'] + '-' + lotlist_LOT['No']
    
    print("2.1 is finished")
    return lotlist_LOT,lotlist_1,lotlist_2

## 2.2ドレッシング記録の読み込み

In [3]:
def read_dre_excel(dre_dir, LOT):
    
    print("2.2 is started")
    dre = pd.read_excel(dre_dir, skiprows=1)
    dre2 = dre.filter(items=["s1_作業指図書no","s1_品名","s1_サイズ","s1_オフセット1","s1_オフセット2","s1_ドレスサイクルs","s1_ドレスサイクルe","s1_開始dt","s1_終了dt","s1_開始日付手入力",
                         "s1_終了日付手入力","s1_開始時間手入力","s1_終了時間手入力"])
    
    dre2["s1_開始dt"] = pd.to_datetime(dre2["s1_開始dt"])
    dre2["s1_終了dt"] = pd.to_datetime(dre2["s1_終了dt"])
    dreLOT = dre2.loc[dre2['s1_作業指図書no']==LOT].reset_index(drop=True)
    
    print("2.2 is finished")
    return dreLOT

## 2.3 3次元測定データの読み込み
三次元測定データPDFからテーパ角度/直径情報を抽出

In [4]:
def extract_data_from_pdfs(PDF_dir):
    
    print("2.3 is started")
    df_inspection = pd.DataFrame(columns=["フォルダー名","ファイル名","円錐角度_測定値","円錐角度_誤差値","直径_測定値","直径_誤差値","位置_測定値","真直_測定値"])
    for root, dirs, files in os.walk(PDF_dir):
        for f in files:
            full = os.path.join(root, f)
            doc = fitz.open(full)
            name = f
            Name = root
            for page in range(len(doc)):
                text = doc[page].get_text()   
                Text = text.split('\n')
                for i in Text:
                    if i == "円錐角度_円錐2":
                        Ti2 = Text.index("円錐角度_円錐2")
                        data1 = Text[Ti2+6]
                        data2 = Text[Ti2+4]


                    elif i == "直径_円錐追加出力1":
                        Ti3 = Text.index("直径_円錐追加出力1")
                        data3 = Text[Ti3+6]
                        data4 = Text[Ti3+4]

                    elif i == "位置度1":
                        Ti4 = Text.index("位置度1")
                        data5 = Text[Ti4+5]

                    elif i == "真直度1":
                        Ti5 = Text.index("真直度1")
                        data6 = Text[Ti5+3]                       
                        df = pd.DataFrame([[Name,name,data1,data2,data3,data4,data5,data6]],columns=["フォルダー名","ファイル名","円錐角度_測定値","円錐角度_誤差値","直径_測定値","直径_誤差値","位置_測定値","真直_測定値"])
                        df_inspection = pd.concat([df_inspection,df],axis=0,ignore_index=True)
    df_inspection['LotNo'] = df_inspection['ファイル名'].str[-15:-4]
    df_inspection = df_inspection.loc[(df_inspection["LotNo"] == LOT)]
    dp1=df_inspection.loc[df_inspection["LotNo"] == LOT]
    dp1['memo']=''
    dp1.loc[dp1['ファイル名'].astype(str).str.contains('No.'), 'memo'] = dp1['ファイル名'].astype(str).str.replace(r'(.*?)No.','',regex=True)
    dp1['No'] = dp1['memo'].str[:-16]
    df_inspection = pd.concat([dp1],axis=0,ignore_index=True)
    df_inspection = df_inspection[['ファイル名','円錐角度_測定値','円錐角度_誤差値',"直径_測定値","直径_誤差値","位置_測定値","真直_測定値",'LotNo','No']]
    df_inspection["ナンバリング"] = df_inspection['LotNo'] + "-" + df_inspection['No']
    
    print("2.3 is finished")
    return df_inspection

## 2.4 lotlistと3次元測定データの結合

In [5]:
def lotlist_sanjigensoku(df_inspection,lotlist_LOT):
    print("2.4 is started")
    # データ整理
    timedata = pd.merge(df_inspection ,lotlist_LOT ,how='left' ,on=['No','LotNo','ナンバリング'])
    timedata = timedata[['uid','ファイル名','LotNo', 'No','ナンバリング','FLAG','円錐角度_測定値','円錐角度_誤差値',"直径_測定値","直径_誤差値","位置_測定値","真直_測定値",'DATETIME_S', 'DATETIME_E','2:var818', '2:x_axis', '2:z_axis', '2:c_axis']]
    # timedata = timedata.dropna(how='any', axis='index')
    timedata.reset_index(inplace=True, drop=True)

    #誤差値を文字としてリスト化、カラム追加
    gosa1=pd.DataFrame()
    for i in timedata['円錐角度_誤差値']:
        ddd = re.split('[°"]' , i)#　度と分秒に分けて抽出
        data1=ddd[0]
        data2=ddd[1]
    #データフレーム化
        data3 = pd.DataFrame([[data1,data2]],columns=["度2","に2"])
        gosa1=pd.concat([gosa1,data3],axis=0,ignore_index=True)
    timedata=pd.concat([timedata,gosa1],axis=1)

    gosa2=pd.DataFrame()
    for h in timedata['に2']:
        ddd = re.split("[']" , h)#　分と秒を分ける
        data1=ddd[0]
        data2=ddd[1]
    #データフレーム化
        data3 = pd.DataFrame([[data1,data2]],columns=["分2","秒2"])
        gosa2=pd.concat([gosa2,data3],axis=0,ignore_index=True)
    timedata=pd.concat([timedata,gosa2],axis=1)

    # #int型に変換
    timedata['分2'] = timedata['分2'].astype(int)
    timedata['秒2'] = timedata['秒2'].astype(int)

    for i in timedata['度2']:
        if '-0' in i:
            timedata.loc[timedata['度2'] == i,'誤差値(度)'] = -(timedata['分2']/60 + timedata['秒2']/3600)
        else:
            timedata.loc[timedata['度2'] == i,'誤差値(度)'] = timedata['分2']/60 + timedata['秒2']/3600


    #測定値を文字としてリスト化、カラム追加
    real1=pd.DataFrame()
    for i in timedata['円錐角度_測定値']:
        ddd = re.split('[°"]' , i)
        data1=ddd[0]
        data2=ddd[1]
    #データフレーム化
        df = pd.DataFrame([[data1,data2]],columns=["度1","に1"])#　度と分秒に分けて抽出
        real1=pd.concat([real1,df],axis=0,ignore_index=True)
    timedata=pd.concat([timedata,real1],axis=1)


    real2=pd.DataFrame()
    for h in timedata['に1']:
        ddd = re.split("[']" , h)
        data1=ddd[0]
        data2=ddd[1]
    #データフレーム化
        df = pd.DataFrame([[data1,data2]],columns=["分1","秒1"])#　分と秒を分ける
        real2=pd.concat([real2,df],axis=0,ignore_index=True)
    timedata=pd.concat([timedata,real2],axis=1)

    ##int型に変換
    timedata['度1'] = timedata['度1'].astype(int)
    timedata['分1'] = timedata['分1'].astype(int)
    timedata['秒1'] = timedata['秒1'].astype(int)
    timedata['測定値(度)']=abs(timedata['度1']) + timedata['分1']/60 + timedata['秒1']/3600
    timedata = timedata[['uid','ファイル名','LotNo', 'No','ナンバリング','FLAG','DATETIME_S', 'DATETIME_E','円錐角度_測定値','円錐角度_誤差値','誤差値(度)','測定値(度)',"直径_測定値","直径_誤差値","位置_測定値","真直_測定値",'2:var818', '2:x_axis', '2:z_axis', '2:c_axis']]
    timedata = timedata.sort_values("DATETIME_S")
    timedata = pd.concat([timedata,lotlist_1,lotlist_2],axis= 0)
    timedata = timedata.sort_values("uid").reset_index(drop=True)

    # 新しい列を作成する
    count = 0
    timedata['加工製品数'] = 0
    for idx, flag in enumerate(timedata['FLAG']):
        if flag == 1 or flag == 2:
            count = 0
            timedata.loc[idx, '加工製品数'] = count
        elif flag == 0:
            count += 1
            timedata.loc[idx, '加工製品数'] = count
    timedata['直径_測定値'] = timedata['直径_測定値'].str.strip()
    timedata['直径_誤差値'] = timedata['直径_誤差値'].str.strip()
    timedata['位置_測定値'] = timedata['位置_測定値'].str.strip()
    timedata['真直_測定値'] = timedata['真直_測定値'].str.strip()
    timedata['直径_測定値'] = timedata['直径_測定値'].astype(float)
    timedata['直径_誤差値'] = timedata['直径_誤差値'].astype(float)
    timedata['位置_測定値'] = timedata['位置_測定値'].astype(float)
    timedata['真直_測定値'] = timedata['真直_測定値'].astype(float)
    
    out_file1 = out_dir1 + LOT_PDF + "Timedata" + ".csv"
    # timedata.to_csv(out_file1, index=False,encoding="cp932")
    
    print("2.4 is finished")
    print("timedata is complete")
    return timedata


# 3. eq-oneの作成

## 3.1 eq-dataの読み込み

In [6]:
def eq_data(eq_dir):
    
    print("3.1 is started")
    eqPath = []
    for root, dirs, files in os.walk(eq_dir):
        for f in files:
            # 指定したフォルダ内のファイル情報をすべて取得
            full = os.path.join(root, f)
            eqPath.append(full)
            
    eq_data = pd.DataFrame()
    for p in eqPath:    
        t = os.path.getmtime(p)
        D = datetime.datetime.fromtimestamp(t).strftime('%Y/%m/%d %H:%M:%S.%f')[:-3]#最終更新日時をタイムスタンプ型に変換
        # ファイル読み込み、整形
        d = pd.read_csv(p, skiprows=2,encoding="cp932")
        d = d.rename(columns={'Time': 'Time(msec)', 'POSF': 'POSF(mm)','POSF.1': 'POSF.1(mm)','TCMD':'X軸_TCMD(%)','TCMD.1':'Z軸_TCMD(%)','TCMD.2':'B軸_TCMD(%)','TCMD.3':'C軸_TCMD(%)','TCMD.4':'スピンドル_TCMD(%)','SPEED':'SPEED(1/min)'})
        d = d.dropna()
        d["latest_time"] = D
        d["latest_time"] = pd.to_datetime(d["latest_time"])#最終更新日時をdatetime型に変換
        d['Time(msec)'] = (d['Time(msec)'].astype(float).astype(int))/1000 
        d['lt_unix'] = d.latest_time.apply(lambda t: int(t.timestamp()))
        d['lt_unix3'] = ((d['lt_unix'] - 299.995) + d['Time(msec)']).astype(int)
        d['lt'] = pd.to_datetime(d['lt_unix3'], unit='s')
        eq_data = pd.concat([eq_data,d], axis=0, ignore_index=True)
    eq_data = eq_data[['lt','X軸_TCMD(%)','Z軸_TCMD(%)','B軸_TCMD(%)','C軸_TCMD(%)','スピンドル_TCMD(%)','SPEED(1/min)']]
    for col in eq_data.columns[1:]:
        eq_data[col] = eq_data[col].astype(float)
    # eq_data = eq_data.drop(['Time(msec)','POSF(mm)','POSF.1(mm)'],axis=1)

    print("3.1 is finished")
    return eq_data


## 3.2 条件抽出

In [7]:
def eq_selected(eq):
    
    print("3.2 is started")
    # 一定間隔で取り出して1製品分の加工時間を抽出
    step = 60
    eq_selected = eq.loc[::step].reset_index(drop=True)
    eq_selected = eq_selected.sort_values('lt').reset_index(drop=True)#時系列ごとになってなかったので並び替え


    # 時系列順にしてから平均を取る。fh_s→前半の約６分のSPEED平均　sh_s→後半の約１分のSPEED平均
    for index, row in eq_selected.iterrows(): 
        start = index
        end = start + 1300
        eq_selected.loc[index, 'fh_s'] = eq_selected.loc[start:end, 'SPEED(1/min)'].mean()

    for index, row in eq_selected.iterrows(): 
        start = index + 1500
        end = start + 400
        eq_selected.loc[index, 'sh_s'] = eq_selected.loc[start:end, 'SPEED(1/min)'].mean()
    eq_selected.fillna(0, inplace=True)

    eqtime_list_s =[]
    eqtime_list_g =[]
    for time,speed,speed2,fh_ave,sh_ave,Ctcmd in zip(eq_selected['lt'], eq_selected['SPEED(1/min)'],eq_selected['SPEED(1/min)'].iloc[3:],eq_selected['fh_s'],eq_selected['sh_s'],eq_selected['C軸_TCMD(%)'].iloc[1000:]):
        if (speed > 10.0) & (speed <= 2500.0) & (speed2 >= 3000.0) & (fh_ave > 3300.0) & (Ctcmd < 0):
            eqtime_list_s.append(time)#加工開始
            if sh_ave >= 3000.0:
                eqtime_list_g.append(time + datetime.timedelta(minutes=9,seconds=55))#910加工終了
            elif sh_ave < 3000.0:
                eqtime_list_g.append(time + datetime.timedelta(minutes=6,seconds=58))#1214加工終了

    #抽出した加工時間をデータフレーム化
    eqtime_list_s2 = []
    eqtime_list_g2 = []
    eqtime_list = []
    for s in eqtime_list_s:
        for g in eqtime_list_g:
            t = g - s
            if (s not in eqtime_list_s2) & (g not in eqtime_list_g2):
                eqtime_list_s2.append(s)
                eqtime_list_g2.append(g)
                eqtime_list.append(t)
    eq_s = pd.DataFrame(eqtime_list_s2, columns = ['加工開始時間'])
    eq_g = pd.DataFrame(eqtime_list_g2, columns = ['加工終了時間'])
    eq_t = pd.DataFrame(eqtime_list, columns = ['経過時間'])
    eq_processing_time = pd.concat([eq_s, eq_g, eq_t], axis=1)
    
    print("3.2 is finished")

    return eq_processing_time


## 3.3 timedataと結合

In [8]:
def process_data(timedata, eq_processing_time, LOT):
    
    print("3.3 is started")
    #FLAGで絞り込み
    timedata_0 = timedata.loc[(timedata["FLAG"] == 0) & (timedata["LotNo"] == LOT)]
    #期間を設定
    START = timedata_0.iloc[0]["DATETIME_S"]
    FINISH = timedata_0.iloc[-1]["DATETIME_E"]

    timedata_12 = timedata.loc[(timedata["FLAG"] == 1)|(timedata["FLAG"] == 2)]
    timedata_12 = timedata_12.loc[(timedata_12['DATETIME_S'] >= START) & (timedata_12['DATETIME_S'] <= FINISH)]

    """
    df_time_start : timedataの加工開始時間
    df_eq_start : eqdataの加工開始時間
    df_comb_start : timedataの加工開始時間とeqdataの加工開始時間の結合したデータフレーム
    """
    time_start = []
    eq_start = []

    for a in timedata_0['DATETIME_S']:
        for b in eq_processing_time['加工開始時間']:
            if (b < ( a + datetime.timedelta(minutes= 10 ))) and (b > ( a + datetime.timedelta(minutes= -10 ))):
                if (a not in time_start) & (b not in eq_start):
                    time_start.append(a)
                    eq_start.append(b)
            continue

    df_time_start = pd.DataFrame(time_start, columns = ['DATETIME_S'])
    df_eq_start = pd.DataFrame(eq_start, columns = ['加工開始時間'])
    df_comb_start = pd.concat([df_time_start, df_eq_start], axis=1)

    """
    df_comb : timedataとeqdataを結合したデータフレーム
    """
    df_comb = pd.merge(df_comb_start, timedata_0, how = "outer", on = ["DATETIME_S"])
    df_comb["加工開始時間"] = pd.to_datetime(df_comb["加工開始時間"])
    df_comb = pd.merge(df_comb, eq_processing_time, how = "left", on = ["加工開始時間"])

    df_comb = pd.concat([df_comb,timedata_12], axis=0, ignore_index=True)
    df_comb = df_comb.rename(columns={'加工開始時間': 'eq_加工開始時間','加工終了時間':'eq_加工終了時間'})
    df_comb = df_comb[['LotNo','FLAG','DATETIME_S', 'DATETIME_E','eq_加工開始時間','eq_加工終了時間','経過時間']]
    df_comb = df_comb.sort_values(['DATETIME_S']).reset_index(drop=True)
    
    print("3.3 is finished")
    return df_comb

## 3.4 Excel出力

In [9]:
def export_csv_files(df_comb, eq, out_dir, LOT):
    
    print("3.4 is started")
    #時間抽出してリストに
    df_comb["file"]= df_comb['eq_加工開始時間'].dt.strftime('%Y%m%d_%H%M')
    filename = df_comb["file"].dropna()
    filename = filename.to_numpy().tolist()

    i=1
    for a,b in zip(df_comb['eq_加工開始時間'],df_comb['eq_加工終了時間']):
        df_output = eq.loc[(eq['lt'] >= a) & (eq['lt'] <= b)] 
        if df_output.empty:
            pass
        else:
            out_file = out_dir + filename[i-1] +" "+ LOT + '-' + str(i).zfill(3) + ".csv"
            df_output.to_csv(out_file, index=False,encoding="cp932")
            i+=1
    print("3.4 is finished")
    print('Export finished') 

# Main_File

In [10]:
if __name__ == '__main__':
    
    ##input_dir
    ##FLAG[2]を手入力済みのlotlistのディレクトリー
    lotlist_dir = r"C:\Users\YAMAJI\Desktop\output\1\lotlist.csv"
    #ドレッシング記録のディレクトリー
    dre_dir = r"C:\Users\YAMAJI\OneDrive\新入社員研修\メディカル\05_グライディングセンタ\ドレッシング記録20230216.xlsx"
    #inspection_data(3次元測定結果）のディレクトリー
    PDF_dir = r'C:\Users\YAMAJI\OneDrive\新入社員研修\メディカル\05_グライディングセンタ\python_output\Pdf'
    #eq_dataのディレクトリー
    eq_dir = r"C:\Users\YAMAJI\Downloads\2300002-000_ver2\\"
    
    ##output_dir
    #eq_oneの出力先
    out_dir = r'C:\Users\YAMAJI\Desktop\output\2\\'
    #timedataの出力先
    out_dir1 = r'C:\Users\YAMAJI\Desktop\output\3\\'
    
    ##製品情報
    LOT_PDF ='1214AZUL 32+0 2300002-000'
    LOT = '2300002-000'
    
    ##2.timedata作成
    lotlist_LOT,lotlist_1,lotlist_2= process_lotlist_data(lotlist_dir, LOT)           #2.1 lotlistの読み込み
    dreLOT = read_dre_excel(dre_dir, LOT)                                             #2.2 ドレッシング記録の読み込み
    df_inspection = extract_data_from_pdfs(PDF_dir)                                   #2.3 3次元測定データの読み込み
    timedata = lotlist_sanjigensoku(df_inspection,lotlist_LOT)                        #2.4 lotlistと3次元測定データの結合
    ##3.eq_one作成
    eq_data = eq_data(eq_dir)                                                         #3.1 eq-dataの読み込み
    eq_processing_time = eq_selected(eq_data)                                         #3.2 条件抽出
    df_comb = process_data(timedata, eq_processing_time, LOT)                         #3.3 3.3 timedataと結合
    export_csv_files(df_comb, eq_data, out_dir, LOT)                                  #3.4 Excel出力

2.1 is started
2.1 is finished
2.2 is started
2.2 is finished
2.3 is started
2.3 is finished
2.4 is started
2.4 is finished
timedata is complete
3.1 is started
3.1 is finished
3.2 is started
3.2 is finished
3.3 is started
3.3 is finished
3.4 is started
3.4 is finished
Export finished


In [11]:
eq_data

Unnamed: 0,lt,X軸_TCMD(%),Z軸_TCMD(%),B軸_TCMD(%),C軸_TCMD(%),スピンドル_TCMD(%),SPEED(1/min)
0,2023-02-07 10:23:34,-0.247185,-1.318319,-0.617962,8.445482,-0.037842,0.2
1,2023-02-07 10:23:34,0.082395,-1.318319,-0.617962,8.555342,0.059814,-0.8
2,2023-02-07 10:23:34,-0.054930,-1.318319,-0.590497,8.225762,0.006104,-0.2
3,2023-02-07 10:23:34,-0.302115,-1.318319,-0.576765,8.335622,-0.012207,0.0
4,2023-02-07 10:23:34,-0.082395,-1.277122,-0.604230,8.459215,-0.012207,0.0
...,...,...,...,...,...,...,...
16799995,2023-02-08 09:57:37,-0.082395,-1.565504,1.112332,-7.539138,5.134277,3499.4
16799996,2023-02-08 09:57:37,-0.082395,-1.551771,1.180994,-5.836309,5.039062,3498.4
16799997,2023-02-08 09:57:37,-0.329580,-1.538039,1.057402,-8.472947,5.194092,3501.2
16799998,2023-02-08 09:57:37,-0.343312,-1.634166,1.071134,-8.321890,5.261230,3497.2
