In [2]:
import os, pickle
from prog.tools import *
from prog.db import Database
import numpy as np
from tqdm import tqdm

In [3]:
input_ = {
    "model_id": "20230710105600",
    "start_time": "2023-06-12",
    "end_time": "2023-07-13"    
    # "start_time": "2024-06-13",
    # "end_time": "2024-06-19"
}

In [4]:
model_name = input_["model_id"]
start_time = input_["start_time"]
end_time = input_["end_time"]

root = os.getcwd()

# 取得predict位置
pred_path = os.path.join(root, "data", "predict")        
os.makedirs(pred_path, exist_ok = True)
output_json = os.path.join(pred_path, "output.json")
data_csv = os.path.join(pred_path, "data.csv")
data_rm_csv = os.path.join(pred_path, "data_rm.csv")
waste_json = os.path.join(pred_path, "waste.json")

# 取得model位置
model_detail = os.path.join(root, "data", "train", model_name, "model")

# 取得config        
config_path = os.path.join(root, "prog", "config.json")
with open(config_path, 'r') as f:
    config = json.load(f)

### Get data from database

In [13]:
db_con = config["database"]
db = Database(db_con)

# weights_df = db.get_data(db_con["table"]["weight"])
# weights = weights_df.iloc[-1][["e_ship_date", "length", "width", "area"]].astype(float).to_list()
weights = [0, 0, 0, 1]
print(f"weights = {weights}\n")


df = db.get_data(db_con["table"]["predict"], input_["start_time"], input_["end_time"])
remove = (df[["length", "width"]] <= 0).any(axis = 1)
df_rm = df[remove].reset_index(drop = True)
df = df[~remove].reset_index(drop = True) # 長寬為0無法切割
df = df[["order_id", "cabinet", "item_name", "color", "length", "width", "e_ship_date"]]
print(df.shape)
df.head(3)

weights = [0, 0, 0, 1]

(18526, 7)


Unnamed: 0,order_id,cabinet,item_name,color,length,width,e_ship_date
0,BC11205013,01_矮櫃,T腳,BJ8321TX_18,2600.0,130.0,2024-05-03
1,BC11205013,01_矮櫃,右側板,BJ8321TX_18,1320.0,560.0,2024-05-03
2,BC11205013,01_矮櫃,左側板,BJ8321TX_18,1320.0,560.0,2024-05-03


In [18]:
import datetime

In [19]:
start = datetime.date(2024, 6, 13)
end = datetime.date(2024, 6, 19)

In [24]:
df1 = df.query("@start <= e_ship_date <= @end")
df1["order_id"].nunique()

8

In [25]:
df1["order_id"].unique()

array(['BS11206028', 'BS11206038', 'BS11206051', 'BS11206052',
       'BS11206104', 'BS11206111', 'BS11206122', 'BS11206123'],
      dtype=object)

### Load model

In [5]:
features = pickle.load(open(os.path.join(model_detail, "feat_order.pkl"), "rb"))
outlier_boundary = pickle.load(open(os.path.join(model_detail, "outlier_boundary.pkl"), "rb"))
skew_feat = pickle.load(open(os.path.join(model_detail, "skew_feat.pkl"), "rb"))
pt = pickle.load(open(os.path.join(model_detail, "power_tf.pkl"), "rb"))
scaler = pickle.load(open(os.path.join(model_detail, "scaler.pkl"), "rb"))
model = pickle.load(open(os.path.join(model_detail, "model.pkl"), "rb"))

### Generate Feature

In [6]:
df.insert(0, "selected", 0)
df.insert(1, "label", None)
# test = test.drop(["e_ship_date"], axis = 1)

In [7]:
df["area"] = df["length"] * df["width"] # 矩形面積

# 板材的大小限制
length_limit = config["limit"]["length"]
width_limit = config["limit"]["width"]
print(f"length_limit = {length_limit}, width_limit = {width_limit}")

area_limit = length_limit * width_limit # 箱子的面積
df["area_prob"] = df["area"] / area_limit # 矩形佔箱子的面積
df.head(3)

length_limit = 3000, width_limit = 3000


Unnamed: 0,selected,label,order_id,cabinet,item_name,color,length,width,e_ship_date,area,area_prob
0,0,,BS11206028,00_共用,T腳,AL3612A_18,2500.0,120.0,2024-06-14,300000.0,0.033333
1,0,,BS11206028,00_共用,T腳,AL3612A_18,2500.0,120.0,2024-06-14,300000.0,0.033333
2,0,,BS11206028,00_共用,T腳,BK1185A_18,2500.0,120.0,2024-06-14,300000.0,0.033333


###  刪除無法排序的板材

In [8]:
df_rm1 = df.query("(length > @length_limit) or (width > @width_limit)")
df_rm1 = df_rm1.drop(['selected', 'label', 'area', 'area_prob'], axis = 1)
df_rm = pd.concat([df_rm, df_rm1], ignore_index = True)

In [9]:
df = df.query("(length <= @length_limit) or (width <= @width_limit)").reset_index(drop = True)

### 初始排序

In [10]:
df = df.sort_values(["color", "order_id", "cabinet", "length", "width"], ascending = [True, True, True, False, False])
df = df.reset_index(drop = True)
df.head(3)

Unnamed: 0,selected,label,order_id,cabinet,item_name,color,length,width,e_ship_date,area,area_prob
0,0,,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,2024-06-14,45984.0,0.005109
1,0,,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,2024-06-14,45984.0,0.005109
2,0,,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,2024-06-14,45984.0,0.005109


### 計算第n個類型的產品的總數量

In [11]:
df, color_accum_counts = calculate_num(col = ["color"], df = df, target = "color", flag = "predict")
df, cabinet_accum_counts = calculate_num(col = ["cabinet"], df = df, target = "cabinet", flag = "predict")
df, color_cabinet_accum_counts = calculate_num(col = ["color", "cabinet"], df = df, target = "color_cabinet", flag = "predict")
df, item_accum_counts = calculate_num(col = ["color", "cabinet", "length", "width"], df = df, target = "item", flag = "predict")
df, color_item_accum_counts = calculate_num(col = ["color", "length", "width"], df = df, target = "color_item", flag = "predict")
df.head(3)

Unnamed: 0,selected,label,order_id,cabinet,item_name,color,length,width,e_ship_date,area,area_prob,color_num,cabinet_num,color_cabinet_num,item_num,color_item_num
0,0,,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,2024-06-14,45984.0,0.005109,95,18,3,3,3
1,0,,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,2024-06-14,45984.0,0.005109,95,18,3,3,3
2,0,,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,2024-06-14,45984.0,0.005109,95,18,3,3,3


In [12]:
df, color_accum_counts1 = calculate_num(col = ["order_id", "color"], df = df, target = "color1", flag = "predict")
df, cabinet_accum_counts1 = calculate_num(col = ["order_id", "cabinet"], df = df, target = "cabinet1", flag = "predict")
df, color_cabinet_accum_counts1 = calculate_num(col = ["order_id", "color", "cabinet"], df = df, target = "color_cabinet1", flag = "predict")
df, item_accum_counts1 = calculate_num(col = ["order_id", "color", "cabinet", "length", "width"], df = df, target = "item1", flag = "predict")
df, color_item_accum_counts1 = calculate_num(col = ["order_id", "color", "length", "width"], df = df, target = "color_item1", flag = "predict")
df.head(3)

Unnamed: 0,selected,label,order_id,cabinet,item_name,color,length,width,e_ship_date,area,...,color_num,cabinet_num,color_cabinet_num,item_num,color_item_num,color1_num,cabinet1_num,color_cabinet1_num,item1_num,color_item1_num
0,0,,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,2024-06-14,45984.0,...,95,18,3,3,3,28,18,3,3,3
1,0,,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,2024-06-14,45984.0,...,95,18,3,3,3,28,18,3,3,3
2,0,,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,2024-06-14,45984.0,...,95,18,3,3,3,28,18,3,3,3


### 找下一個要切割的板材

In [13]:
col_dict = {
    0: ["color", "cabinet", "length", "width"], # 同plate，同cabinet，同item
    1: ["color", "cabinet"], # 同plate，同cabinet，不同item
    2: ["color", "length", "width"], # 同plate，不同cabinet，同item
    3: ["color"], # 同plate，不同cabinet，不同item
}

### 依序選出板材排序

In [14]:
df[["accum_areas", "waste", "plate_id", "fail"]] = [0, 1, 0, 0]
# 初始化第0個板材
df.loc[0, "plate_id"] = 1
df.loc[0, "accum_areas"] = df.loc[0, "area"] # 箱中的累積使用面積
df.loc[0, "waste"] = (1 - (df.loc[0, "accum_areas"] / (length_limit * width_limit))) # 計算waste欄位，及耗損率

pbar = tqdm(total = len(df)-2)
for index in range(len(df)-1): # 依序選出板材
    df.loc[index, "selected"] = 1 # 選到的板材的selected欄位由0改為1


    # 計算第n個類型的產品佔該類型產品總數量的比值，ex: color_num、cabinet_num、color_cabinet_num、item_num欄位
    ## 抓出產品的類型
    color_key = df.loc[index, "color"]
    cabinet_key = df.loc[index, "cabinet"]
    color_cabinet_key = tuple([i for i in df.loc[index, ["color", "cabinet"]]])
    item_key = tuple([i for i in df.loc[index, ["color", "cabinet", "length", "width"]]])    
    color_item_key = tuple([i for i in df.loc[index, ["color", "length", "width"]]])
    color_key1 = tuple([i for i in df.loc[index, ["order_id", "color"]]])
    cabinet_key1 = tuple([i for i in df.loc[index, ["order_id", "cabinet"]]])
    color_cabinet_key1 = tuple([i for i in df.loc[index, ["order_id", "color", "cabinet"]]])
    item_key1 = tuple([i for i in df.loc[index, ["order_id", "color", "cabinet", "length", "width"]]])    
    color_item_key1 = tuple([i for i in df.loc[index, ["order_id", "color", "length", "width"]]])

    ## 產品的類型的目前數量加1
    color_accum_counts[color_key] += 1
    cabinet_accum_counts[cabinet_key] += 1
    color_cabinet_accum_counts[color_cabinet_key] += 1
    item_accum_counts[item_key] += 1
    color_item_accum_counts[color_item_key] += 1
    color_accum_counts1[color_key1] += 1
    cabinet_accum_counts1[cabinet_key1] += 1
    color_cabinet_accum_counts1[color_cabinet_key1] += 1
    item_accum_counts1[item_key1] += 1
    color_item_accum_counts1[color_item_key1] += 1

    ## 計算第n個類型的產品佔該類型產品總數量的比值
    df.loc[index, "color_num"] = color_accum_counts[color_key] / df.loc[index, "color_num"]
    df.loc[index, "cabinet_num"] = cabinet_accum_counts[cabinet_key] / df.loc[index, "cabinet_num"]
    df.loc[index, "color_cabinet_num"] = color_cabinet_accum_counts[color_cabinet_key] / df.loc[index, "color_cabinet_num"]
    df.loc[index, "item_num"] = item_accum_counts[item_key] / df.loc[index, "item_num"]
    df.loc[index, "color_item_num"] = color_item_accum_counts[color_item_key] / df.loc[index, "color_item_num"]
    df.loc[index, "color1_num"] = color_accum_counts1[color_key1] / df.loc[index, "color1_num"]
    df.loc[index, "cabinet1_num"] = cabinet_accum_counts1[cabinet_key1] / df.loc[index, "cabinet1_num"]
    df.loc[index, "color_cabinet1_num"] = color_cabinet_accum_counts1[color_cabinet_key1] / df.loc[index, "color_cabinet1_num"]
    df.loc[index, "item1_num"] = item_accum_counts1[item_key1] / df.loc[index, "item1_num"]
    df.loc[index, "color_item1_num"] = color_item_accum_counts1[color_item_key1] / df.loc[index, "color_item1_num"]


    # 預測target
    x_test = df.loc[[index], features].copy()

    ## remove outlier 
    for col in features:
        min_ = outlier_boundary[col]["min"]
        max_ = outlier_boundary[col]["max"]
        x_test[col] = x_test[col].apply(lambda X: max_ if X > max_ else X)
        x_test[col] = x_test[col].apply(lambda X: min_ if X < min_ else X)
    
    ## skewing
    x_test[skew_feat] = pt.transform(x_test[skew_feat])
    
    ## scaling
    x_test[features] = scaler.transform(x_test[features])

    ## predict
    df.loc[index, "label"] = model.predict(x_test)[0]


    # 抓出下一塊板材的index
    df = get_next_plate(col_dict, index, weights, df, length_limit, width_limit)

    pbar.set_postfix({"plate_id": df.loc[index+1, "plate_id"], "waste": df.loc[index+1, "waste"]})
    pbar.update(1)

100%|██████████| 1693/1693 [01:14<00:00, 29.02it/s, plate_id=88, waste=0.875] 

### Cost time

In [15]:
mean_cut_time = ((8*60*60) / 8000) * 2
df_time = pd.DataFrame()
g = df.groupby("order_id")
for group in g.size().index:
    df1_1 = g.get_group(group)
    if len(df1_1) > 1:
        df1_1 = df1_1.iloc[[1, -1]]
        df_time.loc[group, "start"] = df1_1.index[0]
        df_time.loc[group, "end"] = df1_1.index[1]
    
df_time["item_gap"] = df_time["end"] - df_time["start"] + 1
df_time["time(秒)"] = df_time["item_gap"] * mean_cut_time

df_time[["time(秒)", "item_gap"]].describe().round(2)

Unnamed: 0,time(秒),item_gap
count,8.0,8.0
mean,4428.9,615.12
std,4342.13,603.07
min,21.6,3.0
25%,423.0,58.75
50%,4118.4,572.0
75%,6568.2,912.25
max,11736.0,1630.0


### 基礎耗損率

In [16]:
waste_score = calculate_mean_wast(df, waste_json = waste_json)
waste_score["plate"]

{'waste': 0.1218, 'plate': 41, 'no_rm_waste': 0.4291, 'no_rm_plate': 88}

### Save

In [17]:
df.insert(0, "order", df.index)
keep_col = ['order', 'order_id', 'cabinet', 'item_name', 'color', 'length', 'width', 'area', 'area_prob', 'plate_id', 'accum_areas', 'waste', "e_ship_date"]
df1 = df[keep_col].round(4)
df1.head(3)

Unnamed: 0,order,order_id,cabinet,item_name,color,length,width,area,area_prob,plate_id,accum_areas,waste,e_ship_date
0,0,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,45984.0,0.0051,1,45984.0,0.9949,2024-06-14
1,1,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,45984.0,0.0051,1,91968.0,0.9898,2024-06-14
2,2,BS11206028,01_鞋櫃,背拉,*不拘_18,479.0,96.0,45984.0,0.0051,1,137952.0,0.9847,2024-06-14


In [18]:
print(data_csv)
df1.to_csv(data_csv, encoding='utf-8-sig', index = False)
df_rm.to_csv(data_rm_csv, encoding='utf-8-sig', index = False)

c:\Users\tzuli\Documents\python\ChuanMai\data\predict\data.csv


1694it [01:30, 29.02it/s, plate_id=88, waste=0.875]                          