In [1]:
# import and set default
import os
import sys
import torch.nn as nn
import torch

base_path = os.getcwd()
print(f"Base_path is {base_path} !!")
sys.path.append(os.path.join(base_path))

Base_path is /mnt/mechanical/projects/engineering_practice/local !!


In [2]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

time_step = 8
num_class = 64

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"We are on device {device} !!\n")

We are on device cuda !!



In [3]:
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("./data/data.csv")
df = df[df.iloc[:, -1].between(1, num_class)]
scaler = StandardScaler()

X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X = scaler.fit_transform(X)
X = pd.DataFrame(X)
df = pd.concat([X, y], axis=1)
print(df.describe())

# 保存均值和标准差
mean = scaler.mean_
scale = scaler.scale_

# 保存均值和标准差到文件
np.save("data/mean.npy", mean)
np.save("data/scale.npy", scale)

                  0             1             2             3             4  \
count  2.501450e+05  2.501450e+05  2.501450e+05  2.501450e+05  2.501450e+05   
mean  -8.980599e-16  6.249152e-17 -4.072174e-16  7.544430e-17 -2.599647e-16   
std    1.000002e+00  1.000002e+00  1.000002e+00  1.000002e+00  1.000002e+00   
min   -3.172120e+00 -7.282713e+00 -4.634124e+00 -6.873010e+00 -4.220964e+00   
25%   -6.637011e-01 -4.439173e-01 -5.904385e-01 -5.189889e-01 -6.259770e-01   
50%   -1.006514e-01  1.222280e-01 -2.775436e-03  9.519070e-02 -3.401265e-02   
75%    5.159171e-01  6.162396e-01  5.382176e-01  6.487910e-01  5.541272e-01   
max    7.671914e+00  3.586334e+00  5.774233e+00  3.485932e+00  7.329959e+00   

                  5             6             7             8             9  \
count  2.501450e+05  2.501450e+05  2.501450e+05  2.501450e+05  2.501450e+05   
mean   4.817528e-17  7.817121e-17 -1.408900e-16  6.362773e-18 -8.180708e-18   
std    1.000002e+00  1.000002e+00  1.000002e+00  1.

In [None]:
# Load data
grouped_data = []


for label in range(1, num_class + 1):
    person = df[df.iloc[:, -1] == label]
    i = 0
    while True:
        if i + time_step > len(person):
            break
        sample = person[i : i + time_step]
        i += time_step
        grouped_data.append(sample)

print(f"We got totally {len(grouped_data)} for {num_class} persons")
print(len(grouped_data))

X = [sample.iloc[:, :-1].values for sample in grouped_data]
labels = []
for sample in grouped_data:
    ll = sample.iloc[:, -1].values
    labels.append([ll[0]])
X = np.array(X)
y = np.array(labels)
print(f"X shape: {X.shape}, y shape: {y.shape}")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.3, random_state=42
)
print(
    f"""Train np shape: {X_train.shape}, {y_train.shape}
        Val np shape: {X_val.shape}, {y_val.shape}
        Test np shape: {X_test.shape}, {y_test.shape}
      """
)

We got totally 31264 for 64 persons
31264
X shape: (31264, 8, 40), y shape: (31264, 1)
Train np shape: (15318, 8, 40), (15318, 1)
        Val np shape: (6566, 8, 40), (6566, 1)
        Test np shape: (9380, 8, 40), (9380, 1)
      


In [None]:
from torch.utils.data import DataLoader, TensorDataset

# Convert data to PyTorch tensors
X_train_tensor = torch.Tensor(X_train)
y_train_tensor = torch.Tensor(y_train)
X_val_tensor = torch.Tensor(X_val)
y_val_tensor = torch.Tensor(y_val)
X_test_tensor = torch.Tensor(X_test)
y_test_tensor = torch.Tensor(y_test)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 64

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(
    f"""Train loader shape: {len(train_loader)}
        Val loader shape: {len(val_loader)}
        Test loader shape: {len(test_loader)}
      """
)

for one in train_loader:
    print(len(one), len(one[0]))
    break

Train loader shape: 240
        Val loader shape: 103
        Test loader shape: 147
      
2 64


In [None]:
# Build model and Train example

from models.models import LSTMModel

print("Build model and Train example to valid data shape")
# Define model parameters
input_size = 40
hidden_size = 64
num_layers = 3
output_size = 1

# Instantiate the model
model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)

# Generate sample input data
input_data = torch.randn(batch_size, time_step, input_size).to(device)
print(input_data.shape)

# Forward pass
output = model(input_data)
print("Output shape:", output.shape)

print("here we know that data shape is ok")

Build model and Train example to valid data shape
torch.Size([64, 8, 40])
Output shape: torch.Size([64, 1])
here we know that data shape is ok


In [12]:
# Actually Train
import torch.optim as optim

learning_rate = 0.001
num_epochs = 400

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    total_loss = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.float().to(device), labels.float().to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    if (epoch + 1 + 600) in [800, 1000]:
        torch.save(
            model,
            os.path.join(
                base_path, "models", f"final_e{epoch+1+600}_full_model_for_64.pth"
            ),
        )
        torch.save(
            model.state_dict(),
            os.path.join(
                base_path, "models", f"final_e{epoch+1+600}_model_weights_for_64.pth"
            ),
        )
        print(f"saved model of {epoch+1}")


print("Finished Training")

saved model of 200
saved model of 400
Finished Training


In [25]:
# predict with torch
from sklearn.metrics import accuracy_score, f1_score


for e in [2, 4, 6, 8, 10, 20, 30, 40, 80, 120, 160, 200, 400, 600, 800, 1000]:
    all_y_true = np.zeros((0, 1))
    all_y_pred = np.zeros((0, 1))
    model_full = torch.load(f"models/final_e{e}_full_model_for_64.pth")
    # model.load_state_dict(torch.load(f"models/final_e{e}_model_weights_for_64.pth"))
    for inputs, y_true in val_loader:
        inputs, y_true = inputs.to(device), y_true.to(device)
        y_pred = model_full(inputs)

        y_pred = np.round(y_pred.to("cpu").detach().numpy())
        y_true = np.round(y_true.to("cpu").detach().numpy())
        all_y_true = np.concatenate([all_y_true, y_true])
        all_y_pred = np.concatenate([all_y_pred, y_pred])

    for inputs, y_true in test_loader:
        inputs, y_true = inputs.to(device), y_true.to(device)
        y_pred = model_full(inputs)

        y_pred = np.round(y_pred.to("cpu").detach().numpy())
        y_true = np.round(y_true.to("cpu").detach().numpy())
        all_y_true = np.concatenate([all_y_true, y_true])
        all_y_pred = np.concatenate([all_y_pred, y_pred])

    # 计算准确率
    accuracy = accuracy_score(all_y_true, all_y_pred)

    # 计算 F1 分数
    f1 = f1_score(all_y_true, all_y_pred, average="macro")
    print(f"for e = {e} : acc = {accuracy}, f1 = {f1}")

for e = 2 : acc = 0.017057569296375266, f1 = 0.0005241090146750524
for e = 4 : acc = 0.016367741126301266, f1 = 0.0005032547664589375
for e = 6 : acc = 0.08064718424683306, f1 = 0.06621835272369642
for e = 8 : acc = 0.13188260378778377, f1 = 0.11775124206390358
for e = 10 : acc = 0.21183995986454282, f1 = 0.2015388615678037
for e = 20 : acc = 0.5082779380408879, f1 = 0.4962581243949295
for e = 30 : acc = 0.675216355198796, f1 = 0.6590987806516768
for e = 40 : acc = 0.55863539445629, f1 = 0.554207148790254
for e = 80 : acc = 0.6777248212717923, f1 = 0.6767027345538582
for e = 120 : acc = 0.9436222250094067, f1 = 0.9435270586467183
for e = 160 : acc = 0.940674777373636, f1 = 0.9404597079865855
for e = 200 : acc = 0.9655085914963, f1 = 0.9654915471126182
for e = 400 : acc = 0.9705255236422927, f1 = 0.9704617094640144
for e = 600 : acc = 0.953154396086793, f1 = 0.952819959626162
for e = 800 : acc = 0.9798068481123793, f1 = 0.9796734488076233
for e = 1000 : acc = 0.97585601404741, f1 = 0.97

In [14]:
total_params = sum(p.numel() for p in model.parameters())
print("Total Parameters:", total_params)

Total Parameters: 93761


In [18]:
# export as onnx
import onnx
import onnxruntime

onnx_path = "models/final_lstm_64_class_dynamic.onnx"
# model = LSTMModel()
model = torch.load("models/final_e800_full_model_for_64.pth")
model = model.to("cpu")
# 虚拟输入张量，作为导出时的参数，其维度要跟输入维度相对应
dummy_input = torch.randn(1, 8, 40).to("cpu")  # 使用单个样本进行导出

torch.onnx.export(
    model,
    dummy_input,
    f=onnx_path,
    input_names=["input"],
    output_names=["output"],
    # 使用动态batch
    dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
)

Exported graph: graph(%input : Float(64, 8, 40, strides=[320, 40, 1], requires_grad=0, device=cpu),
      %fc.weight : Float(1, 64, strides=[64, 1], requires_grad=1, device=cpu),
      %fc.bias : Float(1, strides=[1], requires_grad=1, device=cpu),
      %onnx::LSTM_321 : Float(1, 256, 40, strides=[10240, 40, 1], requires_grad=0, device=cpu),
      %onnx::LSTM_322 : Float(1, 256, 64, strides=[16384, 64, 1], requires_grad=0, device=cpu),
      %onnx::LSTM_323 : Float(1, 512, strides=[512, 1], requires_grad=0, device=cpu),
      %onnx::LSTM_345 : Float(1, 256, 64, strides=[16384, 64, 1], requires_grad=0, device=cpu),
      %onnx::LSTM_346 : Float(1, 256, 64, strides=[16384, 64, 1], requires_grad=0, device=cpu),
      %onnx::LSTM_347 : Float(1, 512, strides=[512, 1], requires_grad=0, device=cpu),
      %onnx::LSTM_369 : Float(1, 256, 64, strides=[16384, 64, 1], requires_grad=0, device=cpu),
      %onnx::LSTM_370 : Float(1, 256, 64, strides=[16384, 64, 1], requires_grad=0, device=cpu),
    

In [31]:
# eval with onnx and batchsize=64 and without torch
onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model)

ort_session = onnxruntime.InferenceSession(onnx_path)

all_y_true = np.zeros((0, 1))
all_y_pred = np.zeros((0, 1))

for inputs, y_true in val_loader:
    inputs, y_true = (
        inputs.to("cpu").detach().numpy(),
        y_true.to("cpu").detach().numpy(),
    )
    # 跳过不符合要求的 batch
    if inputs.shape[0] != 64:
        continue
    # 进行推理
    y_pred = ort_session.run(None, {"input": inputs})[0]

    y_pred = np.round(y_pred)
    y_true = np.round(y_true)

    all_y_true = np.concatenate([all_y_true, y_true])
    all_y_pred = np.concatenate([all_y_pred, y_pred])

print(all_y_pred)
print(all_y_true)
# 计算准确率
accuracy = accuracy_score(all_y_true, all_y_pred)

# 计算 F1 分数
f1 = f1_score(all_y_true, all_y_pred, average="macro")
print(f"for e = {e} : acc = {accuracy}, f1 = {f1}")

[[36.]
 [36.]
 [46.]
 ...
 [56.]
 [31.]
 [54.]]
[[41.]
 [36.]
 [46.]
 ...
 [56.]
 [31.]
 [54.]]
for e = 1000 : acc = 0.9808517156862745, f1 = 0.980723155708039
