## 本程序调用构建好的网络，对读取的feature数据集进行预测，并做简单分析

## 输入文件有两个，一是训练好的模型，默认在“nn_train”文件夹下的h5文件；另一个是feature文件，默认在“datasets”文件夹下；
## 预测的输出的结果为txt文件，默认放置在“prediction”中

## 输入的feature文件需要是数组数据。下面是一个例子（去除开头的#）

In [None]:
# 1.562486e-01  1.549052e-01  1.508934e-01  ... -3.554264e-07 -5.424697e-07
# 2.557644e-01  2.537106e-01  2.477222e-01  ... -1.189495e-06 -1.089308e-06
# ...
# 2.487297e-01  2.467340e-01  2.409130e-01  ... -2.220299e-06 -1.854691e-06

last update: 2022.6.11

在46_预测分析_20220609的基础上进行修改，将其拆分成与测和分析两个模块，本模块为预测模块

contacts：zhaohf@ihep.ac.cn

# 配置环境

通用模块构建
  检查python、Tensorflow等模块的版本，确保搭建的框架能够稳定运行。

In [1]:
import os
# sys is only for the pintout of python version
import sys
import pandas as pd
import numpy as np
from tensorflow import keras

输出导入模板的版本。

In [2]:
print('python version:',sys.version)
print('panda version:',pd.__version__)
print('numpy version:',np.__version__)
print('keras version:',keras.__version__)

python version: 3.9.12 (main, Apr  4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]
panda version: 1.4.2
numpy version: 1.21.5
keras version: 2.7.0


# 参数设置

## 输入文件夹

In [3]:
#
# 设置需要导入的模型所在的文件夹名称（及其相对于本代码的路径）以及文件名称
# dir_model : Directory that ML model file exists
# file_saved_model : file name of saved ML model
#============================
dir_model = './../nn_train'
#
#file_saved_model = 'au150_DW_ann_chi_cn1_200_20_1_run_2022_05_28-13_54_31.h5'
#file_saved_model = 'au150_DW_ann_chi_cr1_200_20_1_run_2022_05_28-15_27_10.h5'
#file_saved_model = 'au150_DW_ann_k2chi_cn1_200_20_1_run_2022_05_29-23_50_32.h5'
#file_saved_model = 'au150_DW_ann_k2chi_cr1_200_20_1_run_2022_05_30-06_47_04.h5'
#file_saved_model = 'au150_DW_ann_xmu_cn1_200_20_1_run_2022_05_28-15_56_55.h5'
file_saved_model = 'au150_DW_ann_xmu_cr1_200_20_1_run_2022_05_28-16_45_51.h5'
#============================
#
# 设置准备预测的输入文件名及其所在文件夹（相对于本程序路径）
# dir_data : Directory for input data
# file_feature : file name of input feature
#===================================
dir_data = './../datasets'
file_feature = 'Au_xmu_test.txt'
#===================================
#

## 输出设置

设置预测结果和预测分析所放置的文件夹

In [5]:
#
# 设置输出结果的文件夹（及其相对于本代码的路径）
#======================================
dir_pred = './../prediction'
#=======================================
#

## 格式化输出设置

In [6]:
#
# 对预测值是否进行格式化输出，如果是，则对有效位数进行设置
# w_format_output : whether giving a formated output?
#                   True : then the effective number of bits (N_digit_pred) will be used
#                   False : non-formated output of prediction
# N_digit_pred : the effective number of bits of prediction value, i.e, the sum of digit numbers before and after dot.
#                预测值的有效位数，即预测值小数点后和小数点前位数之和，用于格式化打印出来的数字
#=======================
w_format_output = True
N_digit_pred = 6
#=======================
#

# 函数、类模块

将字典格式化输出到文件

In [7]:
def dict2txt(filepath, dic, note='', wa='w'):
    """
    """
    fout = open(filepath,wa)
    print(note)
    print(note,file=fout)
    for key in dic.keys():
        print(f"  {key:<25s}: {dic[key]:>10.7f}")
        print(f"  {key:<25s}: {dic[key]:>10.7f}",file=fout)
    fout.close()
#

# 正文

## 确定当前目录和工作目录

In [10]:
path = os.path.abspath(os.curdir)
path

'D:\\haifeng\\work_20211031\\XAS-ML\\code\\neural network module\\jupyternotebook_code'

检查这些文件是否存在

In [11]:
path_dir_data = os.path.join(path, dir_data)
path_file_data = os.path.join(path_dir_data, file_feature)
#
# 检查csv是否存在，若不存在，报错
exist = os.path.exists(path_file_data)
if not exist:
    print()
    print(f"** Error!! cannot find file {file_feature} in directory {dir_data}! **")
    print()
    sys.exit()

## 数据导入

### 读取test文件，制成数据集。

In [12]:
#   df.values, df.as_matrix() and np.array(df) all works.
data_df = pd.read_csv(path_file_data, header=None, delim_whitespace=True)
feature_test = data_df.values
#

In [13]:
print(feature_test.shape)

(595, 400)


In [14]:
feature_test

array([[0.48637 , 0.487087, 0.488887, ..., 0.669524, 0.668488, 0.667451],
       [0.490616, 0.491061, 0.49206 , ..., 0.671798, 0.670758, 0.669718],
       [0.455003, 0.455963, 0.458579, ..., 0.664318, 0.663289, 0.66226 ],
       ...,
       [0.491162, 0.491696, 0.493186, ..., 0.674351, 0.673308, 0.672265],
       [0.465608, 0.466137, 0.467324, ..., 0.671872, 0.670832, 0.669792],
       [0.462475, 0.46302 , 0.464253, ..., 0.671356, 0.670317, 0.669278]])

设置网络训练参数值，输入数组维数

## 模型导入

### 检查网络模型是否存在，若不存在，报错

In [15]:
path_model = os.path.join(path, dir_model)
path_saved_model = os.path.join(path_model, file_saved_model)
#
exist = os.path.exists(path_saved_model)
if exist:
    pass
else:
    print()
    print('** Error!! cannot find ANN file: ', file_saved_model)
    print()
    sys.exit()

## 预测

In [16]:
model = keras.models.load_model(path_saved_model)
pred = model.predict(feature_test)
pred

array([[2.8626764],
       [2.8689287],
       [2.8405063],
       [2.8795326],
       [2.8721554],
       [2.8362243],
       [2.8752635],
       [2.8732631],
       [2.9060059],
       [2.8860862],
       [2.8551948],
       [2.8709788],
       [2.8960836],
       [2.921078 ],
       [2.8631241],
       [2.874155 ],
       [2.8810503],
       [2.8838923],
       [2.815476 ],
       [2.8828428],
       [2.8899567],
       [2.8770077],
       [2.8593543],
       [2.8779998],
       [2.8637497],
       [2.8348527],
       [2.898757 ],
       [2.8477194],
       [2.8967772],
       [2.8402162],
       [2.8731399],
       [2.900366 ],
       [2.8825576],
       [2.903923 ],
       [2.8576767],
       [2.8204138],
       [2.8913927],
       [2.8800802],
       [2.8449485],
       [2.8529587],
       [2.8932219],
       [2.8962061],
       [2.831138 ],
       [2.8683991],
       [2.87974  ],
       [2.8810775],
       [2.8886082],
       [2.871709 ],
       [2.8498733],
       [2.8843737],


## 看一下预测结果小数点左右各多少，好格式化输出

In [17]:
if w_format_output:
    n_pred_after_dot = 0
    n_pred_before_dot = 0
    for data in pred.flatten():
        a = str(data).split(".")
        n_pred_before_dot = max(n_pred_before_dot, len(a[0]))
        if len(a) == 2:
            n_pred_after_dot = max(n_pred_after_dot, len(a[1]))
# 如果小数点后数字太多，则降低它
    if n_pred_before_dot + n_pred_after_dot > N_digit_pred:
        if n_pred_before_dot < N_digit_pred:
            n_pred_after_dot = N_digit_pred - n_pred_before_dot
            n_pred = N_digit_pred + 1
        else:
            n_pred_after_dot = 0
            n_pred = n_pred_before_dot
    else:
        n_pred = n_pred_before_dot + n_pred_after_dot
#

## 保存预测结果

In [18]:
file_prefix = os.path.splitext(os.path.basename(path_saved_model))[0]
# 保存预测文件夹的路径
path_dir_pred = os.path.join(os.curdir, dir_pred)
os.makedirs(path_dir_pred, exist_ok=True)
file_pred_txt = file_prefix + '_pred.txt'
path_file_pred_txt = os.path.join(path_dir_pred, file_pred_txt)
fout = open(path_file_pred_txt,'w')
for i in range(pred.size):
    print(f"{pred[i][0]:>{n_pred}.{n_pred_after_dot}f}")
    print(f"{pred[i][0]:>{n_pred}.{n_pred_after_dot}f}", file=fout)
fout.close()
#

2.86268
2.86893
2.84051
2.87953
2.87216
2.83622
2.87526
2.87326
2.90601
2.88609
2.85519
2.87098
2.89608
2.92108
2.86312
2.87416
2.88105
2.88389
2.81548
2.88284
2.88996
2.87701
2.85935
2.87800
2.86375
2.83485
2.89876
2.84772
2.89678
2.84022
2.87314
2.90037
2.88256
2.90392
2.85768
2.82041
2.89139
2.88008
2.84495
2.85296
2.89322
2.89621
2.83114
2.86840
2.87974
2.88108
2.88861
2.87171
2.84987
2.88437
2.90488
2.85811
2.86727
2.90171
2.88421
2.87195
2.85705
2.87829
2.85888
2.85404
2.84222
2.89879
2.84375
2.86370
2.87074
2.85345
2.85825
2.86080
2.87082
2.86483
2.90930
2.88904
2.84591
2.88236
2.89799
2.87826
2.86764
2.87338
2.88984
2.87801
2.84676
2.88178
2.83390
2.86499
2.87690
2.89194
2.87766
2.89305
2.86134
2.86790
2.85780
2.86898
2.86951
2.85759
2.87184
2.89887
2.87333
2.86145
2.89769
2.86923
2.88088
2.87279
2.87155
2.87382
2.88080
2.91253
2.89330
2.89833
2.87324
2.87969
2.86737
2.88599
2.87783
2.87006
2.87746
2.84486
2.86487
2.87739
2.88429
2.91012
2.90576
2.86972
2.86472
2.81297
2.88396


## 简单分析和输出

In [19]:
dict_pred = {
        'Pred_maximum'            : np.max(pred),
        'Pred_minimum'            : np.min(pred),
        'Pred_mean'               : np.mean(pred),
        'Pred_variance'           : np.var(pred),
        'Pred_standard deviation' : np.std(pred),
        'Pred_median'             : np.median(pred)
             }
file_stat_txt = file_prefix + '_pred_statistics.txt'
path_file_stat = os.path.join(path_dir_pred, file_stat_txt)
dict2txt(path_file_stat, dict_pred, "\nPrediction Info:")


Prediction Info:
  Pred_maximum             :  2.9242852
  Pred_minimum             :  2.8037510
  Pred_mean                :  2.8730798
  Pred_variance            :  0.0004415
  Pred_standard deviation  :  0.0210118
  Pred_median              :  2.8742514
