In [1]:
#!pip install tensorflow

In [2]:
#!pip install numpy scipy matplotlib scikit-learn

In [3]:
#!pip install pandas

In [4]:
import tensorflow as tf

# 检查 TensorFlow 版本
print("TensorFlow version:", tf.__version__)

# 列出所有可用的物理设备
print("Physical devices:", tf.config.list_physical_devices())

# 检查是否有可用的 GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("Num GPUs Available: ", len(gpus))
else:
    print("No GPU found. TensorFlow is using CPU.")

2024-12-26 23:54:38.261158: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-26 23:54:38.266054: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-26 23:54:38.306181: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-26 23:54:38.308158: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorFlow version: 2.13.1
Physical devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
No GPU found. TensorFlow is using CPU.


2024-12-26 23:54:39.369329: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [5]:
## import package
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, mean_squared_error
import pandas as pd

### Build up basic model


In [6]:
def create_model(input_shape, num_classes):
    inputs = tf.keras.layers.Input(shape=input_shape)

    # Hidden Layers
    filter_sizes = [40, 20, 10, 20, 10, 30, 18, 18]
    kernel_sizes = [11, 1, 11, 1, 1, 11, 1, 3]
    x = inputs
    for filters, kernel_size in zip(filter_sizes, kernel_sizes):
        x = tf.keras.layers.Conv1D(filters, kernel_size, activation='relu', padding='same')(x)

    # Max Pooling Layer
    x = tf.keras.layers.MaxPooling1D(3)(x)

    # Flatten Layer
    x = tf.keras.layers.Flatten()(x)

    # Classification Layer
    classification_output = tf.keras.layers.Dense(input_shape[0], activation='sigmoid')(x)
    classification_output = tf.keras.layers.Reshape((input_shape[0],1))(classification_output)
    
    # Regression Layer
    reg_output = tf.keras.layers.Dense(100, name='reg_output')(x)


    # Model
    model = tf.keras.models.Model(inputs=inputs, outputs=[classification_output, reg_output])

    return model



In [7]:

input_shape = (100,1)  # 示例输入形状
num_classes = 2  # 假设有个类别

# 创建模型
model = create_model(input_shape, num_classes)
print(model.output_shape)


[(None, 100, 1), (None, 100)]


In [8]:
def masked_loss_function(y_true, y_pred):
    mask = tf.math.logical_not(tf.math.equal(y_true, -1))
    y_true_masked = tf.boolean_mask(y_true, mask)
    y_pred_masked = tf.boolean_mask(y_pred, mask)
    
    return tf.keras.losses.MSE(y_true_masked, y_pred_masked)

## Train the model

In [9]:
def compile_model(model):
    # 使用Adam优化器，学习率为0.002
    optimizer = optimizers.Adam(learning_rate=0.002)
    
    # 为每个输出指定一个损失函数
    losses = {
        'reshape_1': 'binary_crossentropy',  # 二元分类任务使用交叉熵损失函数
        'reg_output': masked_loss_function # 回归任务使用均方误差损失函数
    }
    
    # 为每个输出指定一个评估指标
    metrics = {
        'reshape_1': 'accuracy',  # 二元分类任务使用准确率作为评估指标
        'reg_output': masked_loss_function
    }
    
    # 编译模型
    model.compile(optimizer=optimizer, loss=losses, metrics=metrics)

In [10]:
import pandas as pd
import numpy as np

def load_data(filename):
    # 读取 Excel 文件中的所有工作表名称
    xls = pd.ExcelFile(filename)
    sheet_names = xls.sheet_names

    # 初始化用于存储所有工作表数据的列表
    X_data = []
    classification_Y_data = []
    regression_Y_data = []

    # 遍历每个工作表
    for sheet_name in sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet_name)
        
        # 提取 X 列和 y_classification 列的数据
        X = df['X'].to_numpy()
        Y = df['y_classification'].to_numpy()
        
        # 提取 y_regression 列的数据，作为回归任务的目标
        reg_target = df['y_regression'].to_numpy()

        # 将数据添加到列表中
        X_data.append(X)
        classification_Y_data.append(Y)
        regression_Y_data.append(reg_target)
    
    # 转换为 NumPy 数组并重塑为 (num_samples, sequence_length, num_features)
    X_data = np.array(X_data)
    classification_Y_data = np.array(classification_Y_data)
    regression_Y_data = np.array(regression_Y_data)

    return X_data, classification_Y_data, regression_Y_data

"""
# 示例调用
filename = "test_data.xlsx"
X_train, y_train_classification, y_train_regression = load_data(filename)

print(f'X_train shape: {X_train.shape}')
print(f'y_train_classification shape: {y_train_classification.shape}')
print(f'y_train_regression shape: {y_train_regression.shape}')
"""

'\n# 示例调用\nfilename = "test_data.xlsx"\nX_train, y_train_classification, y_train_regression = load_data(filename)\n\nprint(f\'X_train shape: {X_train.shape}\')\nprint(f\'y_train_classification shape: {y_train_classification.shape}\')\nprint(f\'y_train_regression shape: {y_train_regression.shape}\')\n'

In [11]:
# 读取 Excel 文件中的一个工作表
xls = pd.ExcelFile('test_data.xlsx')
df = pd.read_excel(xls, sheet_name='Sample_1')  # 替换 'Sample_1' 为实际工作表名

# 打印 y_regression 列的内容和形状
print(df['y_regression'])
print(df['y_regression'].shape)

0    -1
1    -1
2    -1
3    -1
4    -1
     ..
95   -1
96   -1
97   -1
98   -1
99   -1
Name: y_regression, Length: 100, dtype: int64
(100,)


In [12]:
from tensorflow.keras.callbacks import EarlyStopping
def train_classification_model(model, X_train, y_train_classification, y_train_regression, X_val, y_val_classification, y_val_regression):
    # 使用所有数据同时进行训练，batch_size设置为训练集大小
    batch_size = len(X_train)
    
    # 训练模型，训练4000个epoch
    early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)

    history = model.fit(X_train, [y_train_classification, y_train_regression], 
                        epochs=4000, 
                        batch_size=batch_size, 
                        validation_data=(X_val, [y_val_classification, y_val_regression]),
                        callbacks=[early_stopping])
                       
    return history

In [13]:
from sklearn.metrics import classification_report, mean_squared_error, mean_absolute_error, r2_score


def evaluate_model(model, X_val, y_val_classification, y_val_regression):
    # 预测验证集
    y_pred_classification, y_pred_regression = model.predict(X_val)
    print(f'y_pred_classification shape: {y_pred_classification.shape}')
    print(f'y_pred_regression shape: {y_pred_regression.shape}')
    
    # 将分类预测结果转换为整数标签
    y_pred_classes = (y_pred_classification > 0.5).astype(int)
    
    # 打印调试信息
    print("Debugging Information:")
    print(f'y_val_classification shape: {y_val_classification.shape}, y_val_classification dtype: {y_val_classification.dtype}')
    print(f'y_pred_classes shape: {y_pred_classes.shape}, y_pred_classes dtype: {y_pred_classes.dtype}')
    
    # 确保 y_val_classification 和 y_pred_classes 的形状一致
    y_val_classes = y_val_classification
    
    # 展开为一维数组
    y_val_classes_flat = y_val_classes.flatten()
    y_pred_classes_flat = y_pred_classes.flatten()
    
    # 打印分类报告
    print("Classification Report:")
    class_report = classification_report(y_val_classes_flat, y_pred_classes_flat, output_dict=True)
    print(classification_report(y_val_classes_flat, y_pred_classes_flat))
    
    # 将分类报告转换为 DataFrame 并写入 CSV 文件
    classification_df = pd.DataFrame(class_report).transpose()
    classification_df.to_csv('classification_report.csv', index=True)
    
    # 回归评估
    # 打印调试信息以检查形状
    print(f'y_val_regression shape: {y_val_regression.shape}')
    print(f'y_pred_regression shape: {y_pred_regression.shape}')
    
    # 确保 y_val_regression 和 y_pred_regression 的形状一致
    y_val_regression_flat = y_val_regression.flatten()
    y_pred_regression_flat = y_pred_regression.flatten()
    
    # 再次打印形状以确保一致性
    print(f'y_val_regression_flat shape: {y_val_regression_flat.shape}')
    print(f'y_pred_regression_flat shape: {y_pred_regression_flat.shape}')
    
    # 计算回归评估指标
    mse = mean_squared_error(y_val_regression_flat, y_pred_regression_flat)
    mae = mean_absolute_error(y_val_regression_flat, y_pred_regression_flat)
    r2 = r2_score(y_val_regression_flat, y_pred_regression_flat)
    
    print("Regression Evaluation:")
    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"R^2 Score: {r2}")
    
    # 将回归评估结果写入 CSV 文件
    regression_report = {
        'Metric': ['Mean Squared Error (MSE)', 'Mean Absolute Error (MAE)', 'R^2 Score'],
        'Value': [mse, mae, r2]
    }
    regression_df = pd.DataFrame(regression_report)
    regression_df.to_csv('regression_report.csv', index=False)






In [14]:
strategy = tf.distribute.MirroredStrategy()

with strategy.scope():
# 设置输入形状和类别数量
    input_shape = (100,1)  # 示例输入形状
    num_classes = 2  # 假设有2个类别

    # 创建模型
    model = create_model(input_shape, num_classes)
    model.summary()

    # 编译模型
    compile_model(model)

# 加载和预处理数据
# 从 train_data.xlsx 中加载数据
X_train, y_train_classification, y_train_regression = load_data('train_data.xlsx')
# 划分训练集和验证集
X_train, X_val, y_train_classification, y_val_classification, y_train_regression, y_val_regression = train_test_split(X_train, y_train_classification, y_train_regression, test_size=0.2)

# 从 test_data.xlsx 中加载数据（如果有相同的结构）
X_test, y_test_classification, y_test_regression = load_data('test_data.xlsx')

# 训练模型
history = train_classification_model(model, X_train, y_train_classification, y_train_regression, X_val, y_val_classification, y_val_regression)

# 评估模型
evaluate_model(model, X_test, y_test_classification, y_test_regression)



INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 100, 1)]             0         []                            
                                                                                                  
 conv1d_8 (Conv1D)           (None, 100, 40)              480       ['input_2[0][0]']             
                                                                                                  
 conv1d_9 (Conv1D)           (None, 100, 20)              820       ['conv1d_8[0][0]']            
                                                                                                  
 conv1d_10 (Conv1D)          (None, 100, 10)              2210      ['conv1d_9[0][0]']   

Exception ignored in: <generator object iterparse.<locals>.iterator at 0x78ecce901f90>
Traceback (most recent call last):
  File "/home/boyang/.conda/envs/boyangenv/lib/python3.8/xml/etree/ElementTree.py", line 1227, in iterator
    yield from pullparser.read_events()
KeyboardInterrupt: 


In [15]:
print(f'X_train shape: {X_train.shape}')
print(f'y_train_classification shape: {y_train_classification.shape}')
print(f'y_train_regression shape: {y_train_regression.shape}')
print(f'X_val shape: {X_val.shape}')
print(f'y_val_classification shape: {y_val_classification.shape}')
print(f'y_val_regression shape: {y_val_regression.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_test_classification shape: {y_test_classification.shape}')
print(f'y_test_regression shape: {y_test_regression.shape}')

X_train shape: (6220, 100)
y_train_classification shape: (6220, 100)
y_train_regression shape: (6220, 100)
X_val shape: (1556, 100)
y_val_classification shape: (1556, 100)
y_val_regression shape: (1556, 100)
X_test shape: (1024, 100)
y_test_classification shape: (1024, 100)
y_test_regression shape: (1024, 100)


In [16]:
y_pred_classification, y_pred_regression = model.predict(X_val)
print(f'y_pred_classification : {y_pred_classification[1]}')
print(f'y_pred_regression : {y_pred_regression[1]}')
print(f'y_val_classification : {y_val_classification[1]}')


print(f'y_pred_regression : {y_pred_regression[1]}')
print(f'y_val_regression : {y_val_regression[1]}')

2024-12-26 15:19:39.795892: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:695] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_68009"
    }
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset. You can do this by creating a new `tf.data.Options()` object then setting `options.experimental_distribute.auto_shard_policy = Auto

y_pred_classification : [[3.24509325e-28]
 [2.40306987e-30]
 [1.09750719e-28]
 [1.55921882e-27]
 [7.99315940e-31]
 [1.29024712e-29]
 [7.62607838e-31]
 [3.82242747e-29]
 [1.17351642e-28]
 [1.03041390e-30]
 [5.63263981e-27]
 [8.87126165e-27]
 [8.25070988e-28]
 [1.40468651e-28]
 [9.92297863e-30]
 [1.59350837e-31]
 [1.10503177e-27]
 [1.19083573e-31]
 [2.77032496e-34]
 [3.52776170e-27]
 [7.89793730e-01]
 [2.08199575e-28]
 [1.07963856e-29]
 [2.50243570e-27]
 [4.93447278e-28]
 [3.13118769e-27]
 [7.08966590e-28]
 [1.70172699e-36]
 [1.13370564e-31]
 [3.21666162e-27]
 [8.47297072e-01]
 [4.17964323e-29]
 [3.22838774e-28]
 [6.28644141e-28]
 [1.98134495e-32]
 [2.62250029e-27]
 [2.55251827e-30]
 [1.20707447e-27]
 [1.34368353e-36]
 [6.57428423e-33]
 [8.90540540e-01]
 [4.03799650e-29]
 [1.50552628e-28]
 [3.22335615e-27]
 [9.11881317e-28]
 [2.76155609e-27]
 [1.22288337e-27]
 [8.79808734e-29]
 [4.68411741e-31]
 [9.08506838e-27]
 [4.16152631e-27]
 [1.68963221e-27]
 [3.83145312e-27]
 [9.58666585e-28]
 [5.

In [17]:
print(f'y_pred_regression : {y_pred_regression[1]}')
print(f'y_val_regression : {y_val_regression[1]}')

y_pred_regression : [-3.3306889e+01  1.6673649e+01  7.0968237e+00 -1.2861862e+00
  4.4406285e+00 -7.0464668e+00 -8.9593449e+00 -2.8251244e+01
 -1.1827633e+01  1.2985057e+01 -9.1978321e+00 -2.2736151e+01
  2.9539215e+01  1.1241053e+01 -3.1167656e+01  4.5179067e+00
  5.3231144e-01 -1.0838147e+01  1.6633549e+00 -9.6969757e+00
  5.9877884e+01 -5.6009769e+00  7.8245745e+00  2.2508661e+01
  2.2725537e+00 -1.7045006e+01 -1.2817270e+01 -1.1362809e+01
 -1.9667236e+01  1.0873064e+01  5.9815147e+01  4.9754610e+00
  4.7838511e+00  1.2172503e+01  3.0943792e+00 -3.3179478e+01
 -8.7419109e+00  2.8955703e+00  2.3888601e+01 -1.0161875e+01
  6.0247139e+01  7.1176310e+00 -1.7459095e+01  8.1837940e-01
 -1.1428383e+01 -1.4289207e+01 -7.2917876e+00  6.1519823e+00
 -1.0274847e+00  1.0325830e+00  8.9515953e+00 -1.1747462e+01
  7.5422783e+00  1.2034836e+01 -4.1569247e+00 -1.3526339e+01
 -5.5724335e+00  4.4150858e+00  1.4625599e+01  1.0364891e+01
  6.0236156e+01  1.0231855e+01 -5.9706335e+00 -1.5672596e+01
  5.