In [1]:
#!pip install tensorflow

In [2]:
#!pip install numpy scipy matplotlib scikit-learn

In [3]:
#!pip install pandas

In [None]:
import tensorflow as tf

# 检查 TensorFlow 版本
print("TensorFlow version:", tf.__version__)

# 列出所有可用的物理设备
print("Physical devices:", tf.config.list_physical_devices())

# 检查是否有可用的 GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("Num GPUs Available: ", len(gpus))
else:
    print("No GPU found. TensorFlow is using CPU.")

In [5]:
## import package
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, mean_squared_error
import pandas as pd

### Build up basic model


In [6]:
def create_model(input_shape, num_classes):
    inputs = tf.keras.layers.Input(shape=input_shape)

    # Hidden Layers
    filter_sizes = [40, 20, 10, 20, 10, 30, 18, 18]
    kernel_sizes = [11, 1, 11, 1, 1, 11, 1, 3]
    x = inputs
    for filters, kernel_size in zip(filter_sizes, kernel_sizes):
        x = tf.keras.layers.Conv1D(filters, kernel_size, activation='relu', padding='same')(x)

    # Max Pooling Layer
    x = tf.keras.layers.MaxPooling1D(3)(x)

    # Flatten Layer
    x = tf.keras.layers.Flatten()(x)

    # Classification Layer
    classification_output = tf.keras.layers.Dense(input_shape[0] * num_classes, activation='softmax')(x)
    classification_output = tf.keras.layers.Reshape((input_shape[0], num_classes))(classification_output)
    
    # Regression Layer
    reg_output = tf.keras.layers.Dense(input_shape[0] * 3)(x)
    reg_output = tf.keras.layers.Reshape((input_shape[0], 3), name='reg_output')(reg_output)

    # Model
    model = tf.keras.models.Model(inputs=inputs, outputs=[classification_output, reg_output])

    return model

In [None]:

input_shape = (100,1)  # 示例输入形状
num_classes = 3  # 假设有3个类别

# 创建模型
model = create_model(input_shape, num_classes)
print(model.output_shape)


## Train the model

In [8]:
def masked_mean_squared_error(y_true, y_pred):
    # 创建一个掩码，只包含类别 1 或 2 的数据
    mask = tf.cast(tf.math.greater(y_true, 0), dtype=tf.float32)
    # 计算 MSE
    mse = tf.keras.losses.mean_squared_error(y_true, y_pred)
    # 扩展 mse 的维度以匹配 mask 的形状
    mse = tf.expand_dims(mse, -1)
    # 重复 mse 以匹配 mask 的形状
    mse = tf.repeat(mse, 3, axis=-1)
    # 应用掩码
    masked_mse = mask * mse
    # 返回掩码后的平均 MSE
    return tf.reduce_mean(masked_mse)

In [9]:
def compile_model(model):
    # 使用Adam优化器，学习率为0.002
    optimizer = optimizers.Adam(learning_rate=0.002)
    
    # 为每个输出指定一个损失函数
    losses = {
        'reshape_1': 'sparse_categorical_crossentropy',  # 分类任务使用交叉熵损失函数
        'reg_output': masked_mean_squared_error  # 回归任务使用均方误差损失函数
    }
    
    # 为每个输出指定一个评估指标
    metrics = {
        'reshape_1': 'accuracy',  # 分类任务使用准确率作为评估指标
        'reg_output': tf.keras.metrics.MeanSquaredError()  # 回归任务使用均方误差作为评估指标
    }
    
    # 编译模型
    model.compile(optimizer=optimizer, loss=losses, metrics=metrics)

In [5]:
import pandas as pd
import numpy as np
from tensorflow.keras.utils import to_categorical

def load_data(filename):
    # 读取 Excel 文件中的所有工作表名称
    xls = pd.ExcelFile(filename)
    sheet_names = xls.sheet_names
    
    # 初始化用于存储所有工作表数据的列表
    X_data = []
    classification_Y_data = []
    regression_Y_data = []
    
    # 遍历每个工作表
    for sheet_name in sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet_name)
        
        # 提取 X 列和 Y 列的数据
        X = df['X'].to_numpy()
        Y = df['Y'].to_numpy()
        
        # 提取 Area, Center, Sigma 列的数据，作为回归任务的目标
        reg_target = df[['Area', 'Center', 'Sigma']].to_numpy().reshape(-1, 3)

        # 将数据添加到列表中
        X_data.append(X)
        classification_Y_data.append(Y)
        regression_Y_data.append(reg_target)
    
    # 转换为 NumPy 数组并重塑为 (num_samples, sequence_length, num_features)
    X_data = np.array(X_data)
    classification_Y_data = np.array(classification_Y_data)
    regression_Y_data = np.array(regression_Y_data)

    
    return X_data, classification_Y_data, regression_Y_data

In [11]:
from tensorflow.keras.callbacks import EarlyStopping
def train_classification_model(model, X_train, y_train_classification, y_train_regression, X_val, y_val_classification, y_val_regression):
    # 使用所有数据同时进行训练，batch_size设置为训练集大小
    batch_size = len(X_train)
    # 训练模型，训练4000个epoch

    early_stopping = EarlyStopping(monitor='val_loss', patience=35, restore_best_weights=True)

    history = model.fit(X_train, [y_train_classification, y_train_regression], 
                        epochs=4000, 
                        batch_size=batch_size, 
                        validation_data=(X_val, [y_val_classification, y_val_regression]),
                        callbacks=[early_stopping])
                       
                        
    return history
    #

In [12]:
from sklearn.metrics import classification_report, mean_squared_error, mean_absolute_error, r2_score


def evaluate_model(model, X_val, y_val_classification, y_val_regression):
    # 预测验证集
    y_pred_classification, y_pred_regression = model.predict(X_val)
    print(f'y_pred_classification shape: {y_pred_classification.shape}')
    print(f'y_pred_regression shape: {y_pred_regression.shape}')
    
    # 将分类预测结果转换为整数标签
    y_pred_classes = np.argmax(y_pred_classification, axis=-1)
    
    # 打印调试信息
    print("Debugging Information:")
    print(f'y_val_classification shape: {y_val_classification.shape}, y_val_classification dtype: {y_val_classification.dtype}')
    print(f'y_pred_classes shape: {y_pred_classes.shape}, y_pred_classes dtype: {y_pred_classes.dtype}')
    
    # 确保 y_val_classification 和 y_pred_classes 的形状一致
    y_val_classes = y_val_classification
    
    # 展开为一维数组
    y_val_classes_flat = y_val_classes.flatten()
    y_pred_classes_flat = y_pred_classes.flatten()
    
    # 打印分类报告
    print("Classification Report:")
    print(classification_report(y_val_classes_flat, y_pred_classes_flat))
    
    # 将分类报告转换为 DataFrame 并写入 CSV 文件
    classification_report_dict = classification_report(y_val_classes_flat, y_pred_classes_flat, output_dict=True)
    classification_df = pd.DataFrame(classification_report_dict).transpose()
    classification_df.to_csv('classification_report.csv', index=True)
    
    # 回归评估
    # 打印调试信息以检查形状
    print(f'y_val_regression shape: {y_val_regression.shape}')
    print(f'y_pred_regression shape: {y_pred_regression.shape}')
    
    # 确保 y_val_regression 和 y_pred_regression 的形状一致
    y_val_regression_flat = y_val_regression.flatten()
    y_pred_regression_flat = y_pred_regression.flatten()
    
    # 再次打印形状以确保一致性
    print(f'y_val_regression_flat shape: {y_val_regression_flat.shape}')
    print(f'y_pred_regression_flat shape: {y_pred_regression_flat.shape}')
    
    mse = mean_squared_error(y_val_regression_flat, y_pred_regression_flat)
    mae = mean_absolute_error(y_val_regression_flat, y_pred_regression_flat)
    r2 = r2_score(y_val_regression_flat, y_pred_regression_flat)
    
    print("Regression Evaluation:")
    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"R^2 Score: {r2}")
    
    # 将回归评估结果写入 CSV 文件
    regression_report = {
        'Metric': ['Mean Squared Error (MSE)', 'Mean Absolute Error (MAE)', 'R^2 Score'],
        'Value': [mse, mae, r2]
    }
    regression_df = pd.DataFrame(regression_report)
    regression_df.to_csv('regression_report.csv', index=False)







In [None]:
strategy = tf.distribute.MirroredStrategy()

with strategy.scope():
# 设置输入形状和类别数量
    input_shape = (100,1)  # 示例输入形状
    num_classes = 3  # 假设有3个类别

    # 创建模型
    model = create_model(input_shape, num_classes)
    model.summary()

    # 编译模型
    compile_model(model)

# 加载和预处理数据
# 从 train_data.xlsx 中加载数据
X_train, y_train_classification, y_train_regression = load_data('train_data.xlsx')
# 划分训练集和验证集
X_train, X_val, y_train_classification, y_val_classification, y_train_regression, y_val_regression = train_test_split(X_train, y_train_classification, y_train_regression, test_size=0.2)

# 从 test_data.xlsx 中加载数据（如果有相同的结构）
X_test, y_test_classification, y_test_regression = load_data('test_data.xlsx')

# 训练模型
history = train_classification_model(model, X_train, y_train_classification, y_train_regression, X_val, y_val_classification, y_val_regression)

# 评估模型
evaluate_model(model, X_test, y_test_classification, y_test_regression)

In [8]:
X_data, classification_Y_data, regression_Y_data  = load_data('train_data.xlsx')

print(f'X_data shape: {X_data.shape}')
print(f'classification_Y_data shape: {classification_Y_data.shape}')
print(f'regression_Y_data shape: {regression_Y_data.shape}')

X_data shape: (1024, 100)
classification_Y_data shape: (1024, 100)
regression_Y_data shape: (1024, 100, 3)


In [None]:
print(f'X_train shape: {X_train.shape}')
print(f'y_train_classification shape: {y_train_classification.shape}')
print(f'y_train_regression shape: {y_train_regression.shape}')
print(f'X_val shape: {X_val.shape}')
print(f'y_val_classification shape: {y_val_classification.shape}')
print(f'y_val_regression shape: {y_val_regression.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_test_classification shape: {y_test_classification.shape}')
print(f'y_test_regression shape: {y_test_regression.shape}')