# 2025 造件数据

In [1]:
import pandas as pd
import numpy as np
import random
from sqlalchemy import create_engine
from tool.func_new import *

## 0. 定义参数 

In [2]:
# 通用参数

od_path = r'./file/2025OD_v31.xlsx'  # 文件路径
air_model_list = ['I', 'D', 'INF']  # 始发地，空侧
land_model_list = ['R']  # 始发地，陆侧

# 表格完整列名
parcel_col_list = [
    'small_id','parcel_id','parcel_type','src_type','dest_type',
    'plate_num','uld_num', 'arrive_time','send_time','plan_disallow_tm',
    'actual_disallow_tm','src_apt', 'dest_apt','dest_city_code', 'ident_des_zno',
    'plate_priority','is_mixture','inserted_on','modified_on']

# 按照数据库结构，修改对应的列名
db_columns_modify = {'model':'src_type', 'desc_model':'dest_type',
                     'Flight_ID':'plate_num','city':'dest_city_code',
                     'api':'dest_apt','apt':'src_apt',
                     'landing_time':'arrive_time'}

In [3]:
p_sheet_name = 'Parcel pcs'                # parcel表名
p_cols = 'B:YS'                             # 读取列范围
p_skip_num = 57                              # 跳过行数
p_cols_split = 5                            # 列索引切片
p_type_name = 'parcel'                     # 包裹类型
p_air_round_ceil = 0.97   # 空侧精度控制进位值
p_air_round_floor = 0.01   # 空侧精度控制置0值
p_land_round_ceil = 0.99   # 陆侧精度控制进位值
p_land_round_floor = 0.05   # 陆侧精度控制置0值

In [4]:
s_sheet_name = 'Small bag pcs'             # small表名
s_parse_cols = 'B:YS'                       # 读取列范围
s_parse_skip_num = 57                        # 跳过行数
s_parse_cols_split = 5                      # 列索引切片
s_type_name = 'small'               # 包裹类型
s_air_round_ceil = 0.97   # 空侧精度控制进位值
s_air_round_floor = 0.01   # 空侧精度控制置0值
s_land_round_ceil = 0.99   # 陆侧精度控制进位值
s_land_round_floor = 0.05   # 陆侧精度控制置0值

In [5]:
i_sheet_name = 'Irregular pcs'             # irregular表名
i_parse_cols = 'B:YS'                       # 读取列范围
i_parse_skip_num = 57                        # 跳过行数
i_parse_cols_split = 5                      # 列索引切片
i_type_name = 'irregular'               # 包裹类型
i_air_round_ceil = 0.97   # 空侧精度控制进位值
i_air_round_floor = 0.01   # 空侧精度控制置0值
i_land_round_ceil = 0.99   # 陆侧精度控制进位值
i_land_round_floor = 0.05   # 陆侧精度控制置0值

In [6]:
nc_sheet_name = 'NC pcs'                     # nc表名
nc_parse_cols = 'B:YS'                       # 读取列范围
nc_parse_skip_num = 57                      # 跳过行数
nc_parse_cols_split = 5                      # 列索引切片
nc_type_name = 'nc'                          # 包裹类型
nc_air_round_ceil = 0.97   # 空侧精度控制进位值
nc_air_round_floor = 0.01   # 空侧精度控制置0值
nc_land_round_ceil = 0.99   # 陆侧精度控制进位值
nc_land_round_floor = 0.05   # 陆侧精度控制置0值

In [7]:
isb_sheet_name = 'Mail pcs'                   # isb表名
isb_parse_cols = 'B:YS'                       # 读取列范围
isb_parse_skip_num = 57                        # 跳过行数
isb_parse_cols_split = 5                      # 列索引切片
isb_type_name = 'isb'               # 包裹类型
isb_air_round_ceil = 0.97   # 空侧精度控制进位值
isb_air_round_floor = 0.01   # 空侧精度控制置0值
isb_land_round_ceil = 0.99   # 陆侧精度控制进位值
isb_land_round_floor = 0.05   # 陆侧精度控制置0值

In [8]:
# 新版ULD对照表数据
uld_name = './file/0103ULD_id.xlsx'
uld_sheetname = 'ULD'
uld_parse_cols = 'A:C'

In [9]:
## 数据库信息
# 写入本地数据库
wirte_sql_name='i_od_parcel_2025v31_mix'
user_name='root'
password='root123'
db_name='ezhou'

engine = create_engine(
    "mysql+pymysql://{user_name}:{password}@localhost:3306/{db_name}?charset=utf8".format(
        user_name=user_name,
        password=password,
        db_name=db_name,)
)

# 1 读取parcel_pcs表

### 1.1 读取parcel_pcs，转置操作

In [10]:
# 读取parcel_pcs表
df_parcel = read_excel(
    excel_path=od_path,
    sheet_name=p_sheet_name,
    read_columns=p_cols,
    fill_val=0,
    skip_footer=p_skip_num)

In [11]:
df_parcel.head()

Unnamed: 0,apt,model,Flight ID,payload,landing time,10_PEK_D,20_CAN_D,21_PVG_D,22_TSN_D,23_CKG_D,...,712_712_R,713_713_R.1,714_714_R.1,715_715_R,716_716_R,717_717_R,722_722_R,724_724_R,728_728_R,792_792_R
0,WUX,D,CSS224,15801.457012,00:10:36.585000,0.0,296.022495,0.0,81.119819,87.239105,...,0.390378,0.266741,0.000191,0.00014,0.00014,9.7e-05,6.1e-05,5.4e-05,0.000181,0.0
1,HGH,D,CSS37,19052.423708,00:13:32.195000,0.0,0.0,0.0,87.21586,84.980792,...,1.060109,0.104269,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,PVG,D,CSS75,15018.120988,00:16:27.805000,32.264388,0.0,0.0,34.599533,49.161906,...,0.860304,0.438698,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,SZX,D,CSS1041,17323.418717,00:19:23.415000,14.913241,0.0,7.741348,84.423836,63.047272,...,3.907846,4.220644,3.17681,3.996641,4.733523,10.907394,2.114239,2.645777,3.11205,9.078392
4,WUX,D,CSS226,15798.509123,00:20:51.220000,0.0,295.967269,0.0,81.104685,87.22283,...,0.390305,0.266691,0.000191,0.00014,0.00014,9.7e-05,6.1e-05,5.4e-05,0.000181,0.0


In [12]:
df_parcel.shape

(205, 668)

In [13]:
# 取列名，替换空格
col_list = list(df_parcel.columns.str.replace(' ','_'))

# 重置列名
df_parcel.columns = col_list

In [14]:
# 横行变纵行
# melt参数
id_vars  = col_list[:p_cols_split]
value_vars = col_list[p_cols_split:]

# 整理parcel pcs表格，即每行的目的地只有一个
df_parcel = pd.melt(df_parcel,
                    id_vars=id_vars,
                    value_vars=value_vars,
                    var_name='desc_city_api_model',
                    value_name='parcel_sum')

In [15]:
df_parcel.head()

Unnamed: 0,apt,model,Flight_ID,payload,landing_time,desc_city_api_model,parcel_sum
0,WUX,D,CSS224,15801.457012,00:10:36.585000,10_PEK_D,0.0
1,HGH,D,CSS37,19052.423708,00:13:32.195000,10_PEK_D,0.0
2,PVG,D,CSS75,15018.120988,00:16:27.805000,10_PEK_D,32.264388
3,SZX,D,CSS1041,17323.418717,00:19:23.415000,10_PEK_D,14.913241
4,WUX,D,CSS226,15798.509123,00:20:51.220000,10_PEK_D,0.0


In [16]:
df_parcel.shape

(135915, 7)

### 1.2 分解 desc_city_api_model => dest_city / api / desc_model

In [17]:
# 重新读取desc_city_api_model列，生成DataFrame对象
a = df_parcel['desc_city_api_model'].str.split('_')
b = np.array(a)
c = list(b)
df_dest = pd.DataFrame(c,columns=['dest_city','api','desc_model'])

In [18]:
df_dest.head()

Unnamed: 0,dest_city,api,desc_model
0,10,PEK,D
1,10,PEK,D
2,10,PEK,D
3,10,PEK,D
4,10,PEK,D


In [19]:
# 将目的地的city,api，model的df对象与df_parcel合并,并删除desc_city_api_model列
df_parcel = pd.concat([df_parcel, df_dest], axis=1).drop('desc_city_api_model',axis='columns')

# 造parcel_type列
df_parcel['parcel_type'] = p_type_name

### 1.3 精度控制

In [20]:
#只读取国际:I，国内:D，INF出发数据
df_parcel_air = df_parcel.loc[df_parcel.model.isin(air_model_list), :].copy()
df_parcel_air_parcel_sum = df_parcel_air.parcel_sum.apply(lambda x:round_func(x, p_air_round_ceil, p_air_round_floor))
df_parcel_air.loc[:,'parcel_sum'] = df_parcel_air_parcel_sum

In [21]:
#只读取陆侧出发数据
df_parcel_land = df_parcel.loc[df_parcel.model.isin(air_model_list), :].copy()

# 陆侧精度控制
df_parcel_land_parcel_sum = df_parcel_land.parcel_sum.apply(lambda x:round_func(x, p_land_round_ceil, p_land_round_floor))
df_parcel_land.loc[:,'parcel_sum'] = df_parcel_land_parcel_sum

In [22]:
df_parcel_land.shape, df_parcel_air.shape

((68952, 10), (68952, 10))

In [23]:
df_parcel_land.parcel_sum.sum(),df_parcel_air.parcel_sum.sum()

(396968.0, 398409.0)

### 1.3 合并数据

In [24]:
df_parcel_total = pd.concat([df_parcel_air,df_parcel_land])
df_parcel_total.head()

Unnamed: 0,apt,model,Flight_ID,payload,landing_time,parcel_sum,dest_city,api,desc_model,parcel_type
0,WUX,D,CSS224,15801.457012,00:10:36.585000,0.0,10,PEK,D,parcel
1,HGH,D,CSS37,19052.423708,00:13:32.195000,0.0,10,PEK,D,parcel
2,PVG,D,CSS75,15018.120988,00:16:27.805000,32.0,10,PEK,D,parcel
3,SZX,D,CSS1041,17323.418717,00:19:23.415000,14.0,10,PEK,D,parcel
4,WUX,D,CSS226,15798.509123,00:20:51.220000,0.0,10,PEK,D,parcel


In [25]:
df_parcel_total.shape

(137904, 10)

### 1.4 数据处理

In [26]:
# parcel_sum 置空值列为0
series_sum = df_parcel_total['parcel_sum'].fillna(0)

In [27]:
# 读取parcel_sum列,统计每个去向的包裹数量
# 邮件数量统计
sum_small = series_sum.sum()
print('邮件总数量为：%s'%sum_small)

# 包裹数量统计
sum_parcel = series_sum.count()
print('包裹总数量为：%s'%sum_parcel)

邮件总数量为：795377.0
包裹总数量为：137904


In [28]:
# 去除 parcel_sum 的空值行
df_parcel_total = df_parcel_total.loc[df_parcel_total['parcel_sum'] != 0,:]
df_parcel_total.shape

(35204, 10)

In [29]:
parcel_column_list = list(df_parcel_total.columns)
parcel_sum_list = list(df_parcel_total['parcel_sum'].values)

In [30]:
parcel_total_list = list(np.array(df_parcel_total))
parcel_total_list_new = []
for i,parcel in enumerate(parcel_total_list):
    new = [parcel for i in range(int(parcel_sum_list[i]))]
    parcel_total_list_new += new

In [31]:
df_parcel_total = pd.DataFrame(parcel_total_list_new,columns=parcel_column_list)

In [32]:
# 修改列名（按照数据库）
df_parcel_total.rename(columns=db_columns_modify, inplace = True)

In [33]:
# 按数据库字段，重组columns结构
df_parcel_total = df_parcel_total.reindex(columns=parcel_col_list)
df_parcel_total.head()

Unnamed: 0,small_id,parcel_id,parcel_type,src_type,dest_type,plate_num,uld_num,arrive_time,send_time,plan_disallow_tm,actual_disallow_tm,src_apt,dest_apt,dest_city_code,ident_des_zno,plate_priority,is_mixture,inserted_on,modified_on
0,,,parcel,D,D,CSS75,,00:16:27.805000,,,,PVG,PEK,,,,,,
1,,,parcel,D,D,CSS75,,00:16:27.805000,,,,PVG,PEK,,,,,,
2,,,parcel,D,D,CSS75,,00:16:27.805000,,,,PVG,PEK,,,,,,
3,,,parcel,D,D,CSS75,,00:16:27.805000,,,,PVG,PEK,,,,,,
4,,,parcel,D,D,CSS75,,00:16:27.805000,,,,PVG,PEK,,,,,,


In [34]:
# 格式化时间
df_parcel_total['arrive_time'] = df_parcel_total['arrive_time'].apply(lambda x: x.strftime('%H:%M:%S'))

In [35]:
df_parcel_total.head(3)

Unnamed: 0,small_id,parcel_id,parcel_type,src_type,dest_type,plate_num,uld_num,arrive_time,send_time,plan_disallow_tm,actual_disallow_tm,src_apt,dest_apt,dest_city_code,ident_des_zno,plate_priority,is_mixture,inserted_on,modified_on
0,,,parcel,D,D,CSS75,,00:16:27,,,,PVG,PEK,,,,,,
1,,,parcel,D,D,CSS75,,00:16:27,,,,PVG,PEK,,,,,,
2,,,parcel,D,D,CSS75,,00:16:27,,,,PVG,PEK,,,,,,


In [36]:
df_parcel_total.shape

(795377, 19)

# 2 读取smalls bag pcs表

In [None]:
df_small.head()

In [None]:
# 取列名，替换空格，重置列名
df_small.columns = list(df_small.columns.str.replace(' ','_'))

In [None]:
## 宽行变纵行
# melt参数
id_vars_small  = col_list[:parse_cols_split]
value_vars_small = col_list[parse_cols_split:]

# 整理parcel pcs表格，即每行的目的地只有一个
df_small = pd.melt(df_small,
                    id_vars=id_vars_small,
                    value_vars=value_vars_small,
                    var_name='desc_city_api_model',
                    value_name='parcel_sum')

In [None]:
df_small.head()

In [None]:
df_small['parcel_sum']=df_small['parcel_sum'].apply(lambda x:x*20)

In [None]:
## 分解desc_city_api_model列
# 生成DataFrame对象，与原数据拼接
a_small=df_small['desc_city_api_model'].str.split('_')
b_small=np.array(a_small)
c_small=list(b_small)
df_desc_small= pd.DataFrame(c_small,columns=['city','api','desc_model'])

# 将目的地的city,api，model的df对象与df_small合并,并删除desc_city_api_model列
df_small = pd.concat([df_small, df_desc_small], axis=1).drop('desc_city_api_model',axis='columns')

In [None]:
# 增加parcel_type列，
df_small['parcel_type'] = small_type_name

In [None]:
## 精度转换
#只读取国际:I，国内:D，INF出发数据
df_small_air = df_small.loc[df_small.model.isin(small_air_model_list), :]

#只读取陆侧出发数据
df_small_land_land = df_small.loc[df_small.model.isin(['R']), :]


# 空侧精度控制
df_small_air['parcel_sum']=df_small_air['parcel_sum'].apply(lambda x:small_round_air(x))

# 陆侧精度控制
df_small_land_land['parcel_sum']=df_small_land_land['parcel_sum'].apply(lambda x:small_round_land(x))

In [None]:
## small类型空测包裹数据处理
# 读取parcel_sum列,每个去向的包裹数量
series_sum_air_small = df_small_air['parcel_sum'].fillna(0)
len_sum_air_small = series_sum_air_small.sum()
print('和为：%s'%len_sum_air_small)
arr_sum_small = list(np.array(series_sum_air_small,dtype=np.int64))

# 临时存放包裹
new_list_small = []

# 包裹的columns
df_small_columns_list =  list(df_small_air.columns)

arr1_list_small_air = list(np.array(df_small_air))
for i,val in enumerate(arr1_list_small_air):
    if arr_sum_small[i] > 0:
        for i in range(arr_sum_small[i]):
            new_list_small.append(val)
print('new_list_small长度：%s' % len(new_list_small))


# 根据包裹数量生成包裹
df_small_obj = pd.DataFrame(np.array(new_list_small),columns=df_small_columns_list)

# 修改列名（按照数据库）
df_small_obj.rename(columns=db_columns_modify, inplace = True)

# 按数据库字段，重组columns结构
air_small_data_new = df_small_obj.loc[df_small_obj.src_type.isin(small_air_model_list), parcel_col_list]

# 格式化时间
air_small_data_new['arrive_time'] = air_small_data_new['arrive_time'].apply(lambda x: x.strftime('%H:%M:%S'))

In [None]:
## small类型陆测包裹数据处理
# 读取parcel_sum列,每个去向的包裹数量
series_sum_land_small = df_small_land_land['parcel_sum'].fillna(0)

len_sum_air_small_land = series_sum_land_small.sum()
print('和为：%s'%len_sum_air_small_land)

arr_sum_small_land = list(np.array(series_sum_land_small,dtype=np.int64))

# 临时存放包裹
new_list_small_land = []

# 包裹的columns
df_small_columns_list =  list(df_small_land_land.columns)

arr1_list_small_land = list(np.array(df_small_land_land))
for i,val in enumerate(arr1_list_small_land):
    if arr_sum_small_land[i] > 0:
        for i in range(arr_sum_small_land[i]):
            new_list_small_land.append(val)
print('new_list_small_land长度：%s' % len(new_list_small_land))


# 根据包裹数量生成包裹
df_small_obj_land = pd.DataFrame(np.array(new_list_small_land),columns=df_small_columns_list)

# 修改列名（按照数据库）
df_small_obj_land.rename(columns=db_columns_modify, inplace = True)

# 按数据库字段，重组columns结构
land_small_data_new = df_small_obj_land.loc[df_small_obj_land.src_type.isin(['R']), parcel_col_list]

# 格式化时间
land_small_data_new['arrive_time'] = land_small_data_new['arrive_time'].apply(lambda x: x.strftime('%H:%M:%S'))

In [None]:
# small类型包裹合并
total_small=pd.concat([land_small_data_new,air_small_data_new])

In [None]:
total_small.head()

In [None]:
total_small.shape

# 3. 读取Irregular pcs表

In [None]:
# 读取irregular bag pcs表
df_irregular = read_excel(
    excel_path=parcel_path,
    sheet_name=irregular_sheet_name,
    read_columns=irregular_parse_cols,
    fill_val=0,
    skip_footer=irregular_parse_skip_num
)

# 取列名，替换空格，
col_list_irregular = list(df_irregular.columns.str.replace(' ','_'))
# 重置列名
df_irregular.columns = col_list_irregular

In [None]:
# melt参数
id_vars_nc  = col_list_irregular[:parse_cols_split]
value_vars_nc = col_list_irregular[parse_cols_split:]

# 整理parcel pcs表格，即每行的目的地只有一个
df_irregular = pd.melt(
    df_irregular,
    id_vars=id_vars_nc,
    value_vars=value_vars_nc,
    var_name='desc_city_api_model',
    value_name='parcel_sum'
)

In [None]:
# 重新读取desc_city_api_model列，生成DataFrame对象，与原数据拼接
a_irregular=df_irregular['desc_city_api_model'].str.split('_')
b_irregular=np.array(a_irregular)
c_irregular=list(b_irregular)
df_desc_irregular= pd.DataFrame(c_irregular,columns=['city','api','desc_model'])

# 将目的地的city,api，model的df对象与df_irregular合并,并删除desc_city_api_model列
df_irregular = pd.concat([df_irregular, df_desc_irregular], axis=1).drop('desc_city_api_model',axis='columns')

In [None]:
# 增加parcel_type列，
df_irregular['parcel_type'] = irregular_type_name

In [None]:
#只读取国际:I，国内:D，INF出发数据
df_irregular_air = df_irregular.loc[df_irregular.model.isin(irregular_air_model_list), :]

#只读取陆侧出发数据
df_irregular_land_land = df_irregular.loc[df_irregular.model.isin(['R']), :]

# 空侧精度控制
df_irregular_air['parcel_sum']=df_irregular_air['parcel_sum'].apply(lambda x:irregular_round_air(x))

# 陆侧精度控制
df_irregular_land_land['parcel_sum']=df_irregular_land_land['parcel_sum'].apply(lambda x:irregular_round_land(x))

In [None]:
# 根据包裹的数量，造irregular_id,parcel_id
# irregular类型空测包裹

# 读取parcel_sum列,每个去向的包裹数量
series_sum_air_irregular = df_irregular_air['parcel_sum'].fillna(0)
len_sum_air_irregular = series_sum_air_irregular.sum()
print('和为：%s'%len_sum_air_irregular)
arr_sum_irregular = list(np.array(series_sum_air_irregular,dtype=np.int64))

# 临时存放包裹
new_list_irregular = []

# 包裹的columns
df_irregular_columns_list =  list(df_irregular_air.columns)

arr1_list_irregular_air = list(np.array(df_irregular_air))
for i,val in enumerate(arr1_list_irregular_air):
    if arr_sum_irregular[i] > 0:
        for i in range(arr_sum_irregular[i]):
            new_list_irregular.append(val)
print('new_list_irregular长度：%s' % len(new_list_irregular))


# 根据包裹数量生成包裹
df_irregular_obj = pd.DataFrame(np.array(new_list_irregular),columns=df_irregular_columns_list)

# 修改列名（按照数据库）
df_irregular_obj.rename(columns=db_columns_modify, inplace = True)

# 按数据库字段，重组columns结构
air_irregular_data_new = df_irregular_obj.loc[df_irregular_obj.src_type.isin(irregular_air_model_list), parcel_col_list]

# 格式化时间
air_irregular_data_new['arrive_time'] = air_irregular_data_new['arrive_time'].apply(lambda x: x.strftime('%H:%M:%S'))

In [None]:
## irregular类型陆测包裹
# 读取parcel_sum列,每个去向的包裹数量
series_sum_land_irregular = df_irregular_land_land['parcel_sum'].fillna(0)

len_sum_air_irregular_land = series_sum_land_irregular.sum()
print('和为：%s'%len_sum_air_irregular_land)

arr_sum_irregular_land = list(np.array(series_sum_land_irregular,dtype=np.int64))

# 临时存放包裹
new_list_irregular_land = []

# 包裹的columns
df_irregular_columns_list =  list(df_irregular_land_land.columns)

arr1_list_irregular_land = list(np.array(df_irregular_land_land))
for i,val in enumerate(arr1_list_irregular_land):
    if arr_sum_irregular_land[i] > 0:
        for i in range(arr_sum_irregular_land[i]):
            new_list_irregular_land.append(val)
print('new_list_irregular_land长度：%s' % len(new_list_irregular_land))


# 根据包裹数量生成包裹
df_irregular_obj_land = pd.DataFrame(np.array(new_list_irregular_land),columns=df_irregular_columns_list)

# 修改列名（按照数据库）
df_irregular_obj_land.rename(columns=db_columns_modify, inplace = True)

# 按数据库字段，重组columns结构
land_irregular_data_new = df_irregular_obj_land.loc[df_irregular_obj_land.src_type.isin(['R']), parcel_col_list]

# 格式化时间
land_irregular_data_new['arrive_time'] = land_irregular_data_new['arrive_time'].apply(lambda x: x.strftime('%H:%M:%S'))

In [None]:
# irregular类型包裹
total_irregular=pd.concat([land_irregular_data_new,air_irregular_data_new])

In [None]:
total_irregular.head()

In [None]:
total_irregular.shape

# 4. 读取NC表

In [None]:
# 读取nc bag pcs表
df_nc = read_excel(
    excel_path=parcel_path,
    sheet_name=nc_sheet_name,
    read_columns=nc_parse_cols,
    fill_val=0,
    skip_footer=nc_parse_skip_num
)

# 取列名，替换空格
col_list_nc = list(df_nc.columns.str.replace(' ','_'))

# 重置列名
df_nc.columns = col_list_nc

In [None]:
# melt参数
id_vars_nc  = col_list_nc[:parse_cols_split]
value_vars_nc = col_list_nc[parse_cols_split:]

# 整理parcel pcs表格，即每行的目的地只有一个
df_nc = pd.melt(
    df_nc,
    id_vars=id_vars_nc,
    value_vars=value_vars_nc,
    var_name='desc_city_api_model',
    value_name='parcel_sum'
)

In [None]:
# 重新读取desc_city_api_model列，生成DataFrame对象，与原数据拼接
a_nc=df_nc['desc_city_api_model'].str.split('_')
b_nc=np.array(a_nc)
c_nc=list(b_nc)
df_desc_nc= pd.DataFrame(c_nc,columns=['city','api','desc_model'])

# 将目的地的city,api，model的df对象与df_nc合并,并删除desc_city_api_model列
df_nc = pd.concat([df_nc, df_desc_nc], axis=1).drop('desc_city_api_model',axis='columns')

In [None]:
# 增加parcel_type列，
df_nc['parcel_type'] = nc_type_name

In [None]:
#只读取国际:I，国内:D，INF出发数据
df_nc_air = df_nc.loc[df_nc.model.isin(nc_air_model_list), :]

#只读取陆侧出发数据
df_nc_land_land = df_nc.loc[df_nc.model.isin(['R']), :]


# 空侧精度控制
df_nc_air['parcel_sum']=df_nc_air['parcel_sum'].apply(lambda x:nc_round_air(x))

# 陆侧精度控制
df_nc_land_land['parcel_sum']=df_nc_land_land['parcel_sum'].apply(lambda x:nc_round_land(x))

In [None]:
# 根据包裹的数量，造nc_id,parcel_id
# nc类型空测包裹

# 读取parcel_sum列,每个去向的包裹数量
series_sum_air_nc = df_nc_air['parcel_sum'].fillna(0)
len_sum_air_nc = series_sum_air_nc.sum()
print('和为：%s'%len_sum_air_nc)
arr_sum_nc = list(np.array(series_sum_air_nc,dtype=np.int64))

# 临时存放包裹
new_list_nc = []

# 包裹的columns
df_nc_columns_list =  list(df_nc_air.columns)

arr1_list_nc_air = list(np.array(df_nc_air))
for i,val in enumerate(arr1_list_nc_air):
    if arr_sum_nc[i] > 0:
        for i in range(arr_sum_nc[i]):
            new_list_nc.append(val)
print('new_list_nc长度：%s' % len(new_list_nc))


# 根据包裹数量生成包裹
df_nc_obj = pd.DataFrame(np.array(new_list_nc),columns=df_nc_columns_list)

# 修改列名（按照数据库）
df_nc_obj.rename(columns=db_columns_modify, inplace = True)

# 按数据库字段，重组columns结构
air_nc_data_new = df_nc_obj.loc[df_nc_obj.src_type.isin(nc_air_model_list), parcel_col_list]

# 格式化时间
air_nc_data_new['arrive_time'] = air_nc_data_new['arrive_time'].apply(lambda x: x.strftime('%H:%M:%S'))

In [None]:
# nc类型陆测包裹
# 读取parcel_sum列,每个去向的包裹数量
series_sum_land_nc = df_nc_land_land['parcel_sum'].fillna(0)

len_sum_air_nc_land = series_sum_land_nc.sum()
print('和为：%s'%len_sum_air_nc_land)

arr_sum_nc_land = list(np.array(series_sum_land_nc,dtype=np.int64))

# 临时存放包裹
new_list_nc_land = []

# 包裹的columns
df_nc_columns_list =  list(df_nc_land_land.columns)

arr1_list_nc_land = list(np.array(df_nc_land_land))
for i,val in enumerate(arr1_list_nc_land):
    if arr_sum_nc_land[i] > 0:
        for i in range(arr_sum_nc_land[i]):
            new_list_nc_land.append(val)
print('new_list_nc_land长度：%s' % len(new_list_nc_land))


# 根据包裹数量生成包裹
df_nc_obj_land = pd.DataFrame(np.array(new_list_nc_land),columns=df_nc_columns_list)



# 修改列名（按照数据库）
df_nc_obj_land.rename(columns=db_columns_modify, inplace = True)

# 按数据库字段，重组columns结构
land_nc_data_new = df_nc_obj_land.loc[df_nc_obj_land.src_type.isin(['R']), parcel_col_list]

# 格式化时间
land_nc_data_new['arrive_time'] = land_nc_data_new['arrive_time'].apply(lambda x: x.strftime('%H:%M:%S'))



In [None]:
# NC类型包裹
total_nc=pd.concat([land_nc_data_new,air_nc_data_new])

In [None]:
total_nc.head()

In [None]:
total_nc.shape

# 5. 读取mail表

In [None]:
# 读取isb bag pcs表
df_isb = read_excel(
    excel_path=parcel_path,
    sheet_name=isb_sheet_name,
    read_columns=isb_parse_cols,
    fill_val=0,
    skip_footer=isb_parse_skip_num
)

# 取列名，替换空格
col_list_isb = list(df_isb.columns.str.replace(' ','_'))

# 重置列名
df_isb.columns = col_list_isb

In [None]:
# melt参数
id_vars_isb  = col_list_isb[:parse_cols_split]
value_vars_isb = col_list_isb[parse_cols_split:]

# 整理parcel pcs表格，即每行的目的地只有一个
df_isb = pd.melt(
    df_isb,
    id_vars=id_vars_isb,
    value_vars=value_vars_isb,
    var_name='desc_city_api_model',
    value_name='parcel_sum'
)

In [None]:
df_isb['parcel_sum']=df_isb['parcel_sum'].apply(lambda x:x*100)

In [None]:
# 重新读取desc_city_api_model列，生成DataFrame对象，与原数据拼接
a_isb=df_isb['desc_city_api_model'].str.split('_')
b_isb=np.array(a_isb)
c_isb=list(b_isb)
df_desc_isb= pd.DataFrame(c_isb,columns=['city','api','desc_model'])

# 将目的地的city,api，model的df对象与df_isb合并,并删除desc_city_api_model列
df_isb = pd.concat([df_isb, df_desc_isb], axis=1).drop('desc_city_api_model',axis='columns')

In [None]:
# 增加parcel_type列，
df_isb['parcel_type'] = isb_type_name

In [None]:
#只读取国际:I，国内:D，INF出发数据
df_isb_air = df_isb.loc[df_isb.model.isin(isb_air_model_list), :]

#只读取陆侧出发数据
df_isb_land_land = df_isb.loc[df_isb.model.isin(['R']), :]


# 空侧精度控制
df_isb_air['parcel_sum']=df_isb_air['parcel_sum'].apply(lambda x:mail_round_air(x))

# 陆侧精度控制
df_isb_land_land['parcel_sum']=df_isb_land_land['parcel_sum'].apply(lambda x:mail_round_land(x))

In [None]:
# 根据包裹的数量，造isb_id,parcel_id
# isb类型空测包裹

# 读取parcel_sum列,每个去向的包裹数量
series_sum_air_isb = df_isb_air['parcel_sum'].fillna(0)
len_sum_air_isb = series_sum_air_isb.sum()
print('和为：%s'%len_sum_air_isb)
arr_sum_isb = list(np.array(series_sum_air_isb,dtype=np.int64))

# 临时存放包裹
new_list_isb = []

# 包裹的columns
df_isb_columns_list =  list(df_isb_air.columns)

arr1_list_isb_air = list(np.array(df_isb_air))
for i,val in enumerate(arr1_list_isb_air):
    if arr_sum_isb[i] > 0:
        for i in range(arr_sum_isb[i]):
            new_list_isb.append(val)
print('new_list_isb长度：%s' % len(new_list_isb))


# 根据包裹数量生成包裹
df_isb_obj = pd.DataFrame(np.array(new_list_isb),columns=df_isb_columns_list)

# 修改列名（按照数据库）
df_isb_obj.rename(columns=db_columns_modify, inplace = True)

# 按数据库字段，重组columns结构
air_isb_data_new = df_isb_obj.loc[df_isb_obj.src_type.isin(isb_air_model_list), parcel_col_list]

# 格式化时间
air_isb_data_new['arrive_time'] = air_isb_data_new['arrive_time'].apply(lambda x: x.strftime('%H:%M:%S'))

In [None]:
# isb类型陆测包裹
# 读取parcel_sum列,每个去向的包裹数量
series_sum_land_isb = df_isb_land_land['parcel_sum'].fillna(0)

len_sum_air_isb_land = series_sum_land_isb.sum()
print('和为：%s'%len_sum_air_isb_land)

arr_sum_isb_land = list(np.array(series_sum_land_isb,dtype=np.int64))

# 临时存放包裹
new_list_isb_land = []

# 包裹的columns
df_isb_columns_list =  list(df_isb_land_land.columns)

arr1_list_isb_land = list(np.array(df_isb_land_land))
for i,val in enumerate(arr1_list_isb_land):
    if arr_sum_isb_land[i] > 0:
        for i in range(arr_sum_isb_land[i]):
            new_list_isb_land.append(val)
print('new_list_isb_land长度：%s' % len(new_list_isb_land))


# 根据包裹数量生成包裹
df_isb_obj_land = pd.DataFrame(np.array(new_list_isb_land),columns=df_isb_columns_list)

# 修改列名（按照数据库）
df_isb_obj_land.rename(columns=db_columns_modify, inplace = True)

# 按数据库字段，重组columns结构
land_isb_data_new = df_isb_obj_land.loc[df_isb_obj_land.src_type.isin(['R']), parcel_col_list]

# 格式化时间
land_isb_data_new['arrive_time'] = land_isb_data_new['arrive_time'].apply(lambda x: x.strftime('%H:%M:%S'))

In [None]:
# isb类型包裹
total_isb=pd.concat([land_isb_data_new,air_isb_data_new])

In [None]:
total_isb.head()

In [None]:
total_isb.shape

# 6. 合并所有类型的表

In [None]:
total=pd.concat([total_parcel,total_small,total_irregular,total_nc,total_isb])

In [None]:
total_isb.head()

In [None]:
total.shape

### 6.1 制造small_id

In [None]:
id_list_small =["{i:0>{n}}".format(i=i,n=7) for i in range(1000000,int(total.shape[0])+1000000)]
df_id_small = pd.DataFrame({"small_id":id_list_small})
print()

In [None]:
display(df_id_small.head(),df_id_small.tail())

In [None]:
total['small_id']=id_list_small

In [None]:
# 调整时间格式
total['arrive_time']=total['arrive_time'].apply(lambda x:"2045-02-07 "+x if int(x[0:2])>12 else "2045-02-08 "+x)

In [None]:
# 纯票类的 parcel ,irregular ,nc
total_a = total.loc[total.parcel_type.isin(['parcel','irregular','nc'])]

# 包类的 small ,isb
total_b = total.loc[total.parcel_type.isin(['small','isb'])]

total_a['parcel_id'] = total_a['small_id']

total=pd.concat([total_a,total_b])

total.reset_index(drop=True,inplace=True)

In [None]:
total[total['parcel_type']=='isb'].head()

In [None]:
total.shape

# 7. 读取航班与uld的关系

In [None]:
# 读取新版的ULD数据
uld_name = r'./file/0103ULD_id.xlsx'
uld_sheetname = 'ULD'
uld_parse_cols = 'A:C'

df_uld_base = pd.read_excel(
    io=uld_name,
    sheet_name=uld_sheetname,
    usecols=uld_parse_cols,
)

In [None]:
df_uld_base.shape

In [None]:
df_uld_base['property'].unique()

In [None]:
len(df_uld_base['ULD id'].unique())

In [None]:
len(df_uld_base['flight id'].unique())

In [None]:
# 选取为loaded的箱子
df_uld = df_uld_base.loc[df_uld_base.property.isin(['loaded']),:]

In [None]:
# 聚合 flight id
uld_gp = df_uld.groupby("flight id")

In [None]:
# 每个航班中有哪些ULD
uld_dic = uld_gp['ULD id'].apply(list).to_dict()

In [None]:
type(uld_dic)

In [None]:
len(uld_dic)

In [None]:
uld_dic['CSS1']

In [None]:
# 根据航班进行分组
tota_gp = total.groupby('plate_num')
# 每个航班中有哪些包裹
total_small_dic = tota_gp.small_id.apply(list).to_dict()  

In [None]:
len(total_small_dic)

In [None]:
total_small_dic.keys()

In [None]:
# 所有的ULD
uld_li=list(df_uld['ULD id'].drop_duplicates())
uld_con={i:[] for i in uld_li}

In [None]:
import random

for plate_num,uld_list in  uld_dic.items():  # plate_num:航班，uld_list：每个航班的ULD列表
    while total_small_dic[plate_num]: # 如果该航班不空，一直去排队去small_id
        for uld_num in uld_list:
#             print(uld_num)

            # 每个箱子排队拿一个small_id？一个箱子拿多少个呢？
            a_parcel = random.choice(total_small_dic[plate_num])
            total_small_dic[plate_num].remove(a_parcel)
            uld_con[uld_num].append(a_parcel)
            
            if total_small_dic[plate_num]==[]:
                break

In [None]:
len(uld_con)

In [None]:
# 赋值uld_num
for uld_num,small_list in uld_con.items():
    total.loc[total.small_id.isin(small_list),'uld_num'] = uld_num

In [None]:
total['uld_num'][:20]

# 8. 造parcel_id

In [None]:
total.index = list(total['small_id'])

In [None]:
total_small_base = total.loc[total.parcel_type.isin(['small']),['small_id','parcel_id','plate_num','uld_num','src_type','dest_type',]]

In [None]:
# small类型--空测
total_small = total_small_base.loc[total_small_base.src_type.isin(['I','D','INF']),:]
# small类型--陆测
total_small_src = total_small_base.loc[total_small_base.src_type.isin(['R']),:]

In [None]:
od_small = total_small.groupby('uld_num')

### 8.1 空测的用uld造

In [None]:
# 取出所有的 uld_num 字典
uld_num_dict = {}
uld_num_list = []
for uld_num,value in od_small:
    uld_num_dict[uld_num] = value
    uld_num_list.append(uld_num)

In [None]:
#  打乱 big_dict[uld_num] 中的数据
big_dict = {uld:None for uld in uld_num_list}

for uld_num,value in uld_num_dict.items():
    li = list(value['small_id'])
    random.shuffle(li)
    big_dict[uld_num] = li

In [None]:
# k2=1  #2263
for uld_num,val in big_dict.items():
    n = 1
    le = len(val) / 20
    b = len(val) // 20
    if le  > b:
        le = b + 1
    else:
        le = b
    for i in range(le):
        list1 = val[20*i:20*(i+1)]
        if list1:
            total['parcel_id'][list1] = uld_num + 's' + str(n) # 索引取值
            n += 1
#     print(k2)             
#     k2+=1

### 8.2  陆侧的用航班造

In [None]:
od_small_land = total_small_src.groupby('plate_num')

# plate_num : [small_id] 字典化
plate_num_dict = {}
plate_num_list = []
for plate_num,value in od_small_land:
    plate_num_dict[plate_num] = value
    plate_num_list.append(plate_num)
    
# plate_num : [small_id] 字典中的数据打乱
big_dict_land = {plate:None for plate in plate_num_list}

for plate_num,value in plate_num_dict.items():
    li = list(value['small_id'])
    random.shuffle(li)
    big_dict_land[plate_num] = li

In [None]:
# k3=1    # 101    
for plate_num,val in big_dict_land.items():
    
    n=1
    le=len(val)/20
    b=len(val)//20                                                                                                         
    if le>b:
        le=b+1
    else:
        le=b
    for i in range(le):
        list1=val[20*i:20*i+20]
        if list1:
            total['parcel_id'][list1] = plate_num + 's' + str(n) # 索引取值
            n += 1
            
#     print(k3)
#     k3+=1

### 9. 国际小件包造 uld_num / parcel_id

In [None]:
total[total['parcel_type'] == 'isb'].head()

In [None]:
total[total['parcel_type'] == 'isb']['uld_num'].unique()

In [None]:
len(total[total['parcel_type'] == 'isb']['uld_num'].unique())

In [None]:
# 国际小件包
total_isb_base = total.loc[total.parcel_type.isin(['isb']),['small_id','parcel_id','plate_num','uld_num','src_type','dest_type',]]

# isb类型--空测  total_isb
total_isb = total_isb_base.loc[total_isb_base.src_type.isin(['I','D','INF']),:]
# isb类型--陆测  total_isb_land  ？？？国际小件包是否没有陆侧
total_isb_land = total_isb_base.loc[total_isb_base.src_type.isin(['R']),:]

### 9.1 空侧数据处理

In [None]:
total_isb.src_type = total_isb.src_type.str.replace("INF", "I")
total_isb.dest_type = total_isb.dest_type.str.replace("INF", "I")

total_isb_gp = total_isb.groupby('uld_num')

uld_num_dict_isb = {}
uld_num_list_isb = []
for uld_num,value in total_isb_gp:
    uld_num_dict_isb[uld_num] = value
    uld_num_list_isb.append(uld_num)

In [None]:
uld_num_dict_isb['ULD6102062C0224'].groupby(['src_type','dest_type']).groups

In [None]:
# 根据每一个ULD箱子,创建一个字典，每个字典里，根据存储列表
# 'ULD39855532': {('I', 'D'): [], ('I', 'R'): []}
# 'ULD39855533': {('I', 'D'): [], ('I', 'R'): []}
big_dict_isb = {uld:{i:[] for i,j in uld_num_dict_isb[uld].groupby(['src_type','dest_type'])} 
                for uld in uld_num_list_isb}

for uld_num,value in uld_num_dict_isb.items():
    for i,j in value.groupby(['src_type','dest_type']):
        big_dict_isb[uld_num][i] = list(j['small_id'])

In [None]:
# k6=1
for uld_num,line_dict in big_dict_isb.items():
    n = 1
    for line,val in line_dict.items():
        print(uld_num,line)
        le = len(val)/20
        b = len(val)//20
        if le>b:
            le = b+1
        else:
            le=b
        for i in range(le):
            list1=val[20*i:20*(i+1)]
            if list1:
                total['parcel_id'][list1] = uld_num + 'i' + str(n)
                n += 1
#     print(k6)
#     k6+=1

### 9.2 陆侧数据处理

In [None]:
total_isb_land.src_type = total_isb_land.src_type.str.replace("INF", "I")
total_isb_land.dest_type = total_isb_land.dest_type.str.replace("INF", "I")

total_isb_land_gp=total_isb_land.groupby('plate_num')

plate_dict_isb_land={}
plate_list_isb_land=[]
for plate_num,value in total_isb_land_gp:
    plate_dict_isb_land[plate_num]=value
    plate_list_isb_land.append(plate_num)

    
big_dict_isb_land={plate:{i:[] for i,j in plate_dict_isb_land[plate].groupby(['src_type','dest_type'])} 
                   for plate in plate_list_isb_land}

for plate_num,value in plate_dict_isb_land.items():
    for i,j in value.groupby(['src_type','dest_type']):
        big_dict_isb_land[plate_num][i]=list(j['small_id'])

In [None]:
for plate_num,line_dict in big_dict_isb_land.items():
    n = 1
    for line,val in line_dict.items():
        le = len(val)/20
        b = len(val)//20
        if le>b:
            le = b+1
        else:
            le = b
        for i in range(le):
            list1 = val[20*i:20*i+20]
            if list1:
                total['parcel_id'][list1] = plate_num + 'i' + str(n)
                n += 1

# 10. 写入数据库

In [None]:
# total.to_sql(wirte_sql_name,engine,if_exists='append',chunksize=10000,index=False)