In [1]:
"""!pip install tensorflow
!pip install numpy scipy matplotlib scikit-learn
!pip install pandas
"""

'!pip install tensorflow\n!pip install numpy scipy matplotlib scikit-learn\n!pip install pandas\n'

In [1]:
import tensorflow as tf

# 检查 TensorFlow 版本
print("TensorFlow version:", tf.__version__)

# 列出所有可用的物理设备
print("Physical devices:", tf.config.list_physical_devices())

# 检查是否有可用的 GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("Num GPUs Available: ", len(gpus))
else:
    print("No GPU found. TensorFlow is using CPU.")

TensorFlow version: 2.6.0
Physical devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')]
Num GPUs Available:  4


In [3]:
## import package
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, mean_squared_error
import pandas as pd

### Build up basic model


In [4]:
def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()

    model.add(layers.Input(shape=input_shape))
    
    # 使用一维卷积层 (Conv1D) 处理一维序列数据
    model.add(layers.Conv1D(32, 3, activation='relu', padding='same'))
    model.add(layers.Conv1D(32, 3, activation='relu', padding='same'))
    model.add(layers.Conv1D(64, 3, activation='relu', padding='same'))
    model.add(layers.Conv1D(64, 3, activation='relu', padding='same'))
    model.add(layers.MaxPooling1D(2))
    
    model.add(layers.Conv1D(128, 3, activation='relu', padding='same'))
    model.add(layers.Conv1D(128, 3, activation='relu', padding='same'))
    model.add(layers.MaxPooling1D(2))
    
    model.add(layers.Conv1D(256, 3, activation='relu', padding='same'))
    model.add(layers.Conv1D(256, 3, activation='relu', padding='same'))
    model.add(layers.MaxPooling1D(2))
    
    # 展平层
    model.add(layers.Flatten())
    
    # 全连接层
    model.add(layers.Dense(512, activation='relu'))
    
    # 输出层，假设输出是分类问题
    model.add(layers.Dense(input_shape[0] * num_classes, activation='softmax'))
    model.add(layers.Reshape((input_shape[0], num_classes)))  # 将输出重塑为 (sequence_length, num_classes)
    
    return model

"""
input_shape = (100, 1)  # 示例输入形状
num_classes = 3  # 假设有3个类别
model = create_cnn_model(input_shape, num_classes)
model.summary()
"""

'\ninput_shape = (100, 1)  # 示例输入形状\nnum_classes = 3  # 假设有3个类别\nmodel = create_cnn_model(input_shape, num_classes)\nmodel.summary()\n'

## Train the model

In [5]:
def compile_model(model,learning_rate=0.002):
    # 使用Adam优化器，学习率为0.002
    optimizer = optimizers.Adam(learning_rate=learning_rate)
    # 编译模型，使用交叉熵损失函数和准确率作为指标
    model.compile(optimizer=optimizer,
                  loss="sparse_categorical_crossentropy",
                  metrics=['accuracy'])

In [6]:
import pandas as pd
import numpy as np

def load_data(filename):
    # 读取 Excel 文件中的所有工作表名称
    xls = pd.ExcelFile(filename)
    sheet_names = xls.sheet_names
    
    # 初始化用于存储所有工作表数据的列表
    X_data = []
    Y_data = []
    
    # 遍历每个工作表
    for sheet_name in sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet_name)
        
        # 提取 X 列和 Y 列的数据
        X = df['X'].to_numpy()
        Y = df['Y'].to_numpy()
        
        # 将数据添加到列表中
        X_data.append(X)
        Y_data.append(Y)
    
    # 转换为 NumPy 数组
    X_data = np.array(X_data)
    Y_data = np.array(Y_data)
    
    return X_data, Y_data

"""
# 从 train_data.xlsx 中加载数据
train_X, train_Y = load_data('train_data.xlsx')

# 从 test_data.xlsx 中加载数据（如果有相同的结构）
test_X, test_Y = load_data('test_data.xlsx')

# 打印一些数据以进行验证
print(f'Number of training samples: {train_X.shape[0]}')
print(f'First training sample X shape: {train_X[0].shape}')
print(f'First training sample Y shape: {train_Y[0].shape}')

print(f'Number of testing samples: {test_X.shape[0]}')
print(f'First testing sample X shape: {test_X[0].shape}')
print(f'First testing sample Y shape: {test_Y[0].shape}')

print(train_X[0])
print(train_Y[0])
"""

"\n# 从 train_data.xlsx 中加载数据\ntrain_X, train_Y = load_data('train_data.xlsx')\n\n# 从 test_data.xlsx 中加载数据（如果有相同的结构）\ntest_X, test_Y = load_data('test_data.xlsx')\n\n# 打印一些数据以进行验证\nprint(f'Number of training samples: {train_X.shape[0]}')\nprint(f'First training sample X shape: {train_X[0].shape}')\nprint(f'First training sample Y shape: {train_Y[0].shape}')\n\nprint(f'Number of testing samples: {test_X.shape[0]}')\nprint(f'First testing sample X shape: {test_X[0].shape}')\nprint(f'First testing sample Y shape: {test_Y[0].shape}')\n\nprint(train_X[0])\nprint(train_Y[0])\n"

In [7]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
def train_classification_model(model, X_train, y_train, X_val, y_val, epochs=4000):
    # 设置早停和模型检查点回调
    batch_size = 32
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss')

    # 训练模型，训练4000个epoch
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val),epochs=epochs, batch_size = batch_size, callbacks=[early_stopping, model_checkpoint])
    return history

In [8]:
def evaluate_classification_model(model, X_val, y_val):
    # 评估模型在验证集上的性能
    test_loss, test_acc = model.evaluate(X_val, y_val)
    print(f'Test accuracy: {test_acc}')
    
    # 预测验证集
    y_pred = model.predict(X_val)
    print(f'y_pred shape: {y_pred.shape}')
    
    # 将预测结果转换为整数标签
    y_pred_classes = np.argmax(y_pred, axis=-1)
    
    """
    # 打印调试信息
    print("Debugging Information:")
    print(f'y_val shape: {y_val.shape}, y_val dtype: {y_val.dtype}')
    print(f'y_pred_classes shape: {y_pred_classes.shape}, y_pred_classes dtype: {y_pred_classes.dtype}')
    
    # 检查前10个元素
    print(f'y_val (first 10): {y_val[:10]}')
    print(f'y_pred_classes (first 10): {y_pred_classes[:10]}')
    """
    
    # 打印分类报告
    print(classification_report(y_val.flatten(), y_pred_classes.flatten()))

    report = classification_report(y_val.flatten(), y_pred_classes.flatten(), output_dict=True)

    # 转换为 DataFrame
    df = pd.DataFrame(report).transpose()

    # 写入 CSV 文件
    df.to_csv('classification_report.csv')

In [9]:
def visualize_data(X, y, num_samples=100):
    for i in range(num_samples):
        plt.plot(X[i], y[i])
        plt.title(f"Sample {i+1}")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.show()

"""
# 加载并可视化数据
X, y = load_data('train_data.xlsx')
visualize_data(X, y)
"""

"\n# 加载并可视化数据\nX, y = load_data('train_data.xlsx')\nvisualize_data(X, y)\n"

In [10]:
# 设置输入形状和类别数量
input_shape = (100,1)  # 示例输入形状
num_classes = 3  # 假设有3个类别

# 创建模型
model = create_cnn_model(input_shape, num_classes)
model.summary()

# 编译模型
compile_model(model)

# 加载和预处理数据

# 从 train_data.xlsx 中加载数据
num_classes = 3  
X_train, y_train = load_data('train_data.xlsx')
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

X_train = np.expand_dims(X_train, axis=-1)
X_val = np.expand_dims(X_val, axis=-1)

# 从 test_data.xlsx 中加载数据（如果有相同的结构）
X_test, y_test = load_data('test_data.xlsx')
X_test = np.expand_dims(X_test, axis=-1)
# 训练模型
history = train_classification_model(model, X_train, y_train, X_val, y_val)

evaluate_classification_model(model, X_test, y_test)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 100, 32)           128       
                                                                 
 conv1d_1 (Conv1D)           (None, 100, 32)           3104      
                                                                 
 conv1d_2 (Conv1D)           (None, 100, 64)           6208      
                                                                 
 conv1d_3 (Conv1D)           (None, 100, 64)           12352     
                                                                 
 max_pooling1d (MaxPooling1  (None, 50, 64)            0         
 D)                                                              
                                                                 
 conv1d_4 (Conv1D)           (None, 50, 128)           24704     
                                                        

  saving_api.save_model(


Epoch 3/4000
Epoch 4/4000
Epoch 5/4000
Epoch 6/4000
Epoch 7/4000
Epoch 8/4000
Epoch 9/4000
Epoch 10/4000
Epoch 11/4000
Epoch 12/4000
Epoch 13/4000
Epoch 14/4000
Epoch 15/4000
Epoch 16/4000
Epoch 17/4000
Epoch 18/4000
Epoch 19/4000
Epoch 20/4000
Epoch 21/4000
Epoch 22/4000
Epoch 23/4000
Epoch 24/4000
Epoch 25/4000
Epoch 26/4000
Epoch 27/4000
Epoch 28/4000
Epoch 29/4000
Test accuracy: 0.9755663871765137
y_pred shape: (1024, 100, 3)
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     99264
           1       0.60      0.59      0.60      3136

    accuracy                           0.98    102400
   macro avg       0.80      0.79      0.79    102400
weighted avg       0.98      0.98      0.98    102400



In [11]:
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_val shape: {X_val.shape}')
print(f'y_val shape: {y_val.shape}')

X_train shape: (819, 100)
y_train shape: (819, 100)
X_val shape: (205, 100)
y_val shape: (205, 100)


In [12]:
print("y_train shape:", y_train.shape)
print("y_val shape:", y_val.shape)

y_train shape: (819, 100)
y_val shape: (205, 100)


In [13]:
def create_simple_cnn_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Input(shape=input_shape))
    model.add(layers.Conv1D(32, 3, activation='relu', padding='same'))
    model.add(layers.MaxPooling1D(2))
    model.add(layers.Conv1D(64, 3, activation='relu', padding='same'))
    model.add(layers.MaxPooling1D(2))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(input_shape[0] * num_classes, activation='softmax'))
    model.add(layers.Reshape((input_shape[0], num_classes)))  # 将输出重塑为 (sequence_length, num_classes)
    return model

# 创建和编译简单模型
simple_model = create_simple_cnn_model(input_shape, num_classes)
compile_model(simple_model)

history = train_classification_model(simple_model, X_train, y_train, X_val, y_val)

# 评估模型
y_pred = simple_model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=-1)
print(classification_report(y_val.flatten(), y_pred_classes.flatten(), zero_division=1))

Epoch 1/4000
Epoch 2/4000
Epoch 3/4000


  saving_api.save_model(


Epoch 4/4000
Epoch 5/4000
Epoch 6/4000
Epoch 7/4000
Epoch 8/4000
Epoch 9/4000
Epoch 10/4000
Epoch 11/4000
Epoch 12/4000
Epoch 13/4000
Epoch 14/4000
Epoch 15/4000
Epoch 16/4000
Epoch 17/4000
Epoch 18/4000
Epoch 19/4000
Epoch 20/4000
Epoch 21/4000
Epoch 22/4000
Epoch 23/4000
Epoch 24/4000
Epoch 25/4000
Epoch 26/4000
Epoch 27/4000
Epoch 28/4000
Epoch 29/4000
Epoch 30/4000
Epoch 31/4000
Epoch 32/4000
Epoch 33/4000
Epoch 34/4000
              precision    recall  f1-score   support

           0       0.98      0.99      0.99     19719
           1       0.69      0.54      0.61       781

    accuracy                           0.97     20500
   macro avg       0.83      0.77      0.80     20500
weighted avg       0.97      0.97      0.97     20500



In [14]:
print('a')

a
