# 1.ライブラリのインポート

In [1]:
import numpy as np
import pandas as pd
import datetime
import os
import glob
import re
import fitz

# 2.timedataの作成

## 2.1 lotlistの読み込み
FLAG[2]を手入力済みのlotlistの読み込み・加工

In [2]:
def process_lotlist_data(lotlist_dir, LOT):
    
    print("2.1 is started")
    
    # read in csv files and concatenate into one dataframe
    files1 = glob.glob(lotlist_dir)
    df_list = [pd.read_csv(file, encoding="cp932") for file in files1]
    lotlist = pd.concat(df_list, axis=0, ignore_index=True)
    lotlist = lotlist.rename(columns={'lotno': 'LotNo'})

    # convert time columns to datetime type
    lotlist['DATETIME_S'] = pd.to_datetime(lotlist['DATETIME_S'])
    lotlist['DATETIME_E'] = pd.to_datetime(lotlist['DATETIME_E'])

    # cleansing
    lotlist_LOT = lotlist.loc[lotlist['LotNo'] == LOT]
    lotlist_LOT = lotlist_LOT.sort_values('DATETIME_S')
    serial_num = pd.RangeIndex(start=1, stop=len(lotlist_LOT.index) + 1, step=1)
    lotlist_LOT['uid'] = serial_num
    
    # lotlist_LOT = lotlist_LOT.drop('Unnamed: 0',axis=1)
    lotlist_1 = lotlist_LOT.loc[(lotlist_LOT['FLAG'] == 1)]
    lotlist_2 = lotlist_LOT.loc[(lotlist_LOT['FLAG'] == 2)]

    lotlist_LOT = lotlist_LOT.loc[(lotlist_LOT['FLAG'] == 0)]
    lotlist_LOT['No'] = lotlist_LOT.groupby(['LotNo']).cumcount() + 1
    lotlist_LOT['No'] = lotlist_LOT['No'].astype(str)

    lotlist_LOT = pd.concat([lotlist_LOT, lotlist_2], axis=0)
    lotlist_LOT = lotlist_LOT.sort_values('DATETIME_S')
    lotlist_LOT['No'] = lotlist_LOT['No'].fillna(0)
    lotlist_LOT['No'] = lotlist_LOT['No'].astype(str)
    lotlist_LOT['ナンバリング'] = lotlist_LOT['LotNo'] + '-' + lotlist_LOT['No']
    
    print("2.1 is finished")
    return lotlist_LOT,lotlist_1,lotlist_2

## 2.2ドレッシング記録の読み込み

In [3]:
def read_dre_excel(dre_dir, LOT):
    
    print("2.2 is started")
    dre = pd.read_excel(dre_dir, skiprows=1)
    dre2 = dre.filter(items=["s1_作業指図書no","s1_品名","s1_サイズ","s1_オフセット1","s1_オフセット2"
                             ,"s1_ドレスサイクルs","s1_ドレスサイクルe","s1_開始dt","s1_終了dt"
                             ,"s1_開始日付手入力","s1_終了日付手入力","s1_開始時間手入力","s1_終了時間手入力"])
    
    dre2["s1_開始dt"] = pd.to_datetime(dre2["s1_開始dt"])
    dre2["s1_終了dt"] = pd.to_datetime(dre2["s1_終了dt"])
    dreLOT = dre2.loc[dre2['s1_作業指図書no']==LOT].reset_index(drop=True)
    
    print("2.2 is finished")
    return dreLOT

## 2.3 3次元測定データのファイル名を一括変更する

In [4]:
def RenamePDF(PDF_dir,OutPDF_dir):
    
    print("2.3 is start")
    ##PDFファイル取得
    flist = glob.glob(PDF_dir + '\*.pdf')
    
    ##PDFファイルの情報取得
    for file in flist:
        F = file.split('\\')
        FileName = F[-2]
        FileName = FileName.split(' ')
        Name0 = FileName[0]
        Name1 = FileName[1]
        Name2 = FileName[2]
        No =  F[-1]
        No = No.split('.')
        No = No[-2]

        ##ファイル名を一括で変更する。
        ReName = OutPDF_dir + Name0 +' '+ Name1 +' '+ 'No.' + No + ' '+ Name2 +'.pdf'
        os.rename(file,ReName)
        
    RenamePDF_dir = OutPDF_dir + '\*.pdf'
    print("2.3 is finished")
    
    return RenamePDF_dir

## 2.4 3次元測定データの読み込み
三次元測定データPDFからテーパ角度/直径情報を抽出

In [5]:
def extract_data_from_pdfs(PDF_dir):
    
    print("2.4 is started")
    df_inspection = pd.DataFrame(columns=["フォルダー名","ファイル名","円錐角度_測定値","円錐角度_誤差値"
                                          ,"直径_測定値","直径_誤差値","位置_測定値","真直_測定値"])
    for root, dirs, files in os.walk(OutPDF_dir):
        for f in files:
            full = os.path.join(root, f)
            doc = fitz.open(full)
            name = f
            Name = root
            for page in range(len(doc)):
                text = doc[page].get_text()   
                Text = text.split('\n')
                for i in Text:
                    if i == "円錐角度_円錐2":
                        Ti2 = Text.index("円錐角度_円錐2")
                        data1 = Text[Ti2+6]
                        data2 = Text[Ti2+4]


                    elif i == "直径_円錐追加出力1":
                        Ti3 = Text.index("直径_円錐追加出力1")
                        data3 = Text[Ti3+6]
                        data4 = Text[Ti3+4]

                    elif i == "位置度1":
                        Ti4 = Text.index("位置度1")
                        data5 = Text[Ti4+5]

                    elif i == "真直度1":
                        Ti5 = Text.index("真直度1")
                        data6 = Text[Ti5+3]                       
                        df = pd.DataFrame([[Name,name,data1,data2,data3,data4,data5,data6]],columns=["フォルダー名","ファイル名","円錐角度_測定値","円錐角度_誤差値"
                                                                                                     ,"直径_測定値","直径_誤差値","位置_測定値","真直_測定値"])
                        df_inspection = pd.concat([df_inspection,df],axis=0,ignore_index=True)
    df_inspection['LotNo'] = df_inspection['ファイル名'].str[-15:-4]
    df_inspection = df_inspection.loc[(df_inspection["LotNo"] == LOT)]
    dp1=df_inspection.loc[df_inspection["LotNo"] == LOT]
    dp1['memo']=''
    dp1.loc[dp1['ファイル名'].astype(str).str.contains('No.'), 'memo'] = dp1['ファイル名'].astype(str).str.replace(r'(.*?)No.','',regex=True)
    dp1['No'] = dp1['memo'].str[:-16]
    df_inspection = pd.concat([dp1],axis=0,ignore_index=True)
    df_inspection = df_inspection[['ファイル名','円錐角度_測定値','円錐角度_誤差値',"直径_測定値","直径_誤差値","位置_測定値","真直_測定値",'LotNo','No']]
    df_inspection["ナンバリング"] = df_inspection['LotNo'] + "-" + df_inspection['No']
    
    print("2.4 is finished")
    return df_inspection

## 2.5 lotlistと3次元測定データの結合

In [6]:
def lotlist_sanjigensoku(df_inspection,lotlist_LOT):
    print("2.5 is started")
    # データ整理
    timedata = pd.merge(df_inspection ,lotlist_LOT ,how='left' ,on=['No','LotNo','ナンバリング'])
    timedata = timedata[['uid','ファイル名','LotNo', 'No','ナンバリング','FLAG','円錐角度_測定値','円錐角度_誤差値'
                         ,"直径_測定値","直径_誤差値","位置_測定値","真直_測定値",'DATETIME_S', 'DATETIME_E'
                         ,'2:var818', '2:x_axis', '2:z_axis', '2:c_axis']]
    # timedata = timedata.dropna(how='any', axis='index')
    timedata.reset_index(inplace=True, drop=True)

    #誤差値を文字としてリスト化、カラム追加
    gosa1=pd.DataFrame()
    for i in timedata['円錐角度_誤差値']:
        ddd = re.split('[°"]' , i)#　度と分秒に分けて抽出
        data1=ddd[0]
        data2=ddd[1]
    #データフレーム化
        data3 = pd.DataFrame([[data1,data2]],columns=["度2","に2"])
        gosa1=pd.concat([gosa1,data3],axis=0,ignore_index=True)
    timedata=pd.concat([timedata,gosa1],axis=1)

    gosa2=pd.DataFrame()
    for h in timedata['に2']:
        ddd = re.split("[']" , h)#　分と秒を分ける
        data1=ddd[0]
        data2=ddd[1]
    #データフレーム化
        data3 = pd.DataFrame([[data1,data2]],columns=["分2","秒2"])
        gosa2=pd.concat([gosa2,data3],axis=0,ignore_index=True)
    timedata=pd.concat([timedata,gosa2],axis=1)

    # #int型に変換
    timedata['分2'] = timedata['分2'].astype(int)
    timedata['秒2'] = timedata['秒2'].astype(int)

    for i in timedata['度2']:
        if '-0' in i:
            timedata.loc[timedata['度2'] == i,'誤差値(度)'] = -(timedata['分2']/60 + timedata['秒2']/3600)
        else:
            timedata.loc[timedata['度2'] == i,'誤差値(度)'] = timedata['分2']/60 + timedata['秒2']/3600


    #測定値を文字としてリスト化、カラム追加
    real1=pd.DataFrame()
    for i in timedata['円錐角度_測定値']:
        ddd = re.split('[°"]' , i)
        data1=ddd[0]
        data2=ddd[1]
    #データフレーム化
        df = pd.DataFrame([[data1,data2]],columns=["度1","に1"])#　度と分秒に分けて抽出
        real1=pd.concat([real1,df],axis=0,ignore_index=True)
    timedata=pd.concat([timedata,real1],axis=1)


    real2=pd.DataFrame()
    for h in timedata['に1']:
        ddd = re.split("[']" , h)
        data1=ddd[0]
        data2=ddd[1]
    #データフレーム化
        df = pd.DataFrame([[data1,data2]],columns=["分1","秒1"])#　分と秒を分ける
        real2=pd.concat([real2,df],axis=0,ignore_index=True)
    timedata=pd.concat([timedata,real2],axis=1)

    ##int型に変換
    timedata['度1'] = timedata['度1'].astype(int)
    timedata['分1'] = timedata['分1'].astype(int)
    timedata['秒1'] = timedata['秒1'].astype(int)
    timedata['測定値(度)']=abs(timedata['度1']) + timedata['分1']/60 + timedata['秒1']/3600
    timedata = timedata[['uid','ファイル名','LotNo', 'No','ナンバリング','FLAG','DATETIME_S', 'DATETIME_E'
                         ,'円錐角度_測定値','円錐角度_誤差値','誤差値(度)','測定値(度)',"直径_測定値","直径_誤差値"
                         ,"位置_測定値","真直_測定値",'2:var818', '2:x_axis', '2:z_axis', '2:c_axis']]
    timedata = timedata.sort_values("DATETIME_S")
    timedata = pd.concat([timedata,lotlist_1,lotlist_2],axis= 0)
    timedata = timedata.sort_values("uid").reset_index(drop=True)

    # 新しい列を作成する
    count = 0
    timedata['加工製品数'] = 0
    for idx, flag in enumerate(timedata['FLAG']):
        if flag == 1 or flag == 2:
            count = 0
            timedata.loc[idx, '加工製品数'] = count
        elif flag == 0:
            count += 1
            timedata.loc[idx, '加工製品数'] = count
    timedata['直径_測定値'] = timedata['直径_測定値'].str.strip()
    timedata['直径_誤差値'] = timedata['直径_誤差値'].str.strip()
    timedata['位置_測定値'] = timedata['位置_測定値'].str.strip()
    timedata['真直_測定値'] = timedata['真直_測定値'].str.strip()
    timedata['直径_測定値'] = timedata['直径_測定値'].astype(float)
    timedata['直径_誤差値'] = timedata['直径_誤差値'].astype(float)
    timedata['位置_測定値'] = timedata['位置_測定値'].astype(float)
    timedata['真直_測定値'] = timedata['真直_測定値'].astype(float)
    
    out_file1 = out_dir1 + LOT_PDF + "Timedata" + ".csv"
    timedata.to_csv(out_file1, index=False,encoding="cp932")
    
    print("2.5 is finished")
    print("timedata is complete")
    return timedata


# 3. eq-oneの作成

## 3.1 eq-dataの読み込み

In [7]:
def eq_data(eq_dir):
    
    print("3.1 is started")
    eqPath = []
    for root, dirs, files in os.walk(eq_dir):
        for f in files:
            # 指定したフォルダ内のファイル情報をすべて取得
            full = os.path.join(root, f)
            eqPath.append(full)
            
    eq_data = pd.DataFrame()
    for p in eqPath:    
        t = os.path.getmtime(p)
        D = datetime.datetime.fromtimestamp(t).strftime('%Y/%m/%d %H:%M:%S.%f')[:-3]#最終更新日時をタイムスタンプ型に変換
        # ファイル読み込み、整形
        d = pd.read_csv(p, skiprows=2,encoding="cp932")
        d = d.rename(columns={'Time': 'Time(msec)', 'POSF': 'POSF(mm)','POSF.1': 'POSF.1(mm)','TCMD':'X軸_TCMD(%)','TCMD.1':'Z軸_TCMD(%)'
                              ,'TCMD.2':'B軸_TCMD(%)','TCMD.3':'C軸_TCMD(%)','TCMD.4':'スピンドル_TCMD(%)','SPEED':'SPEED(1/min)'})
        d = d.dropna()
        d["latest_time"] = D
        d["latest_time"] = pd.to_datetime(d["latest_time"])#最終更新日時をdatetime型に変換
        d['Time(msec)'] = (d['Time(msec)'].astype(float).astype(int))/1000 
        d['lt_unix'] = d.latest_time.apply(lambda t: int(t.timestamp()))
        d['lt_unix3'] = ((d['lt_unix'] - 299.995) + d['Time(msec)']).astype(int)
        d['lt'] = pd.to_datetime(d['lt_unix3'], unit='s')
        eq_data = pd.concat([eq_data,d], axis=0, ignore_index=True)
    eq_data = eq_data[['lt','X軸_TCMD(%)','Z軸_TCMD(%)','B軸_TCMD(%)','C軸_TCMD(%)','スピンドル_TCMD(%)','SPEED(1/min)']]
    for col in eq_data.columns[1:]:
        eq_data[col] = eq_data[col].astype(float)
    # eq_data = eq_data.drop(['Time(msec)','POSF(mm)','POSF.1(mm)'],axis=1)

    print("3.1 is finished")
    return eq_data

## 3.2 Excel出力

In [8]:
def export_csv_files(timedata, lotlist_LOT, eq_data, out_dir, LOT):

    print("3.2 is started")
    #FLAGで絞り込み
    timedata_0 = timedata.loc[(timedata["FLAG"] == 0) & (timedata["LotNo"] == LOT)]
    #時間抽出してリストに
    timedata_0["file"]= timedata_0["DATETIME_S"].dt.strftime('%Y%m%d_%H%M')
    filename = timedata_0["file"].dropna()
    filename = filename.to_numpy().tolist()

    lotlist_0 = lotlist_LOT.loc[lotlist_LOT['FLAG'] == 0]
    start = lotlist_0['DATETIME_S'].tolist()
    end = lotlist_0['DATETIME_E'].tolist()
    number = timedata_0['No'].tolist()
    i = 1
            
    for s,e,num in zip(start,end,number):
        eq_one = eq_data.loc[(s <= eq_data['lt']) & (eq_data['lt'] <= e)]
        if eq_one.empty:
            pass
        else:
            if i-1 < len(filename):
                out_file = out_dir + filename[i-1] + " " + LOT + '-' + str(num).zfill(3) + ".csv"
                eq_one.to_csv(out_file ,index=False,encoding="cp932")
                i += 1
            else:
                print("No more filenames available.")
                break



    print("3.2 is finished")
    print('Export finished')


# Main_File

# 注意事項
測定データの格納先フォルダ名は必ず以下の形式で！！
### r"\910AZUL 32+0 2222612-000\*.pdf"

ファイル名は以下のように製品番号だけあればよい
### "1.pdf"


In [None]:
if __name__ == '__main__':
    
    ##input_dir
    ##FLAG[2]を手入力済みのlotlistのディレクトリー
    lotlist_dir = r"C:\Users\user\Desktop\05_グライディングセンタ\lotlist_origin.csv"
    #ドレッシング記録のディレクトリー
    dre_dir = r"C:\Users\user\Desktop\05_グライディングセンタ\ドレッシング記録20230216.xlsx"
    #inspection_data(3次元測定結果）のディレクトリー
    PDF_dir = r"C:\Users\user\Desktop\05_グライディングセンタ\三次元測定結果\1214AZUL 32+0 2300002-000"
    #eq_dataのディレクトリー
    eq_dir = r"C:\Users\user\Desktop\05_グライディングセンタ\eq_data\追加分\2300002-000"
    
    #output_dir
    #renameされたPDFの出力先
    OutPDF_dir = r'C:\Users\user\Desktop\output\\'
    #eq_oneの出力先
    out_dir = r'C:\Users\user\Desktop\output2\\'
    #timedataの出力先
    out_dir1 = r'C:\Users\user\Desktop\output3\\'
    
    #製品情報
    LOT_PDF ='1214AZUL 32+0 2300002-000'
    LOT = '2300002-000'
    
    #2.timedata作成
    lotlist_LOT,lotlist_1,lotlist_2= process_lotlist_data(lotlist_dir, LOT)           #2.1 lotlistの読み込み
    dreLOT = read_dre_excel(dre_dir, LOT)                                             #2.2 ドレッシング記録の読み込み
    RenamePDF_dir = RenamePDF(PDF_dir,OutPDF_dir)                                     #2.3 3次元測定データのファイル名変更
    df_inspection = extract_data_from_pdfs(RenamePDF_dir)                             #2.4 3次元測定データの読み込み
    timedata = lotlist_sanjigensoku(df_inspection,lotlist_LOT)                        #2.5 lotlistと3次元測定データの結合
    # 3.eq_one作成
    eq_data = eq_data(eq_dir)                                                         #3.1 eq-dataの読み込み
    export_csv_files(timedata, lotlist_LOT, eq_data, out_dir, LOT)                    #3.2 Excel出力

2.1 is started
2.1 is finished
2.2 is started
2.2 is finished
2.3 is start
2.3 is finished
2.4 is started
2.4 is finished
2.5 is started
2.5 is finished
timedata is complete
3.1 is started
3.1 is finished
3.2 is started


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  timedata_0["file"]= timedata_0["DATETIME_S"].dt.strftime('%Y%m%d_%H%M')


In [None]:
df_inspection

In [None]:
RenamePDF_dir

In [None]:
timedata

In [None]:
eq_data

In [None]:
lotlist_LOT