In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

accel_df = pd.read_csv('./dataset/0412_accel.csv')
accel_df.head()

Unnamed: 0,accel_value,speed,timestamp
0,0,36,2024-04-12 07:28:03.701426+00:00
1,141,33,2024-04-12 07:28:04.063157+00:00
2,145,33,2024-04-12 07:28:05.082944+00:00
3,45,36,2024-04-12 07:28:05.643850+00:00
4,32,36,2024-04-12 07:28:05.644737+00:00


In [41]:
# min, max 정규화
max_accel = max(accel_df['accel_value'])
min_accel = min(accel_df['accel_value'])

# max가 0이면 에러 발생
assert max_accel > 1e-5, 'Accel Sensor Error'

print("Max accel value: ", max_accel)
print("Min accel value: ", min_accel)

normal_accel = (accel_df['accel_value'] - min_accel) / (max_accel - min_accel)
accel_df['normalize_accel'] = normal_accel
accel_df.head()

Max accel value:  167
Min accel value:  0


Unnamed: 0,accel_value,speed,timestamp,normalize_accel
0,0,36,2024-04-12 07:28:03.701426+00:00,0.0
1,141,33,2024-04-12 07:28:04.063157+00:00,0.844311
2,145,33,2024-04-12 07:28:05.082944+00:00,0.868263
3,45,36,2024-04-12 07:28:05.643850+00:00,0.269461
4,32,36,2024-04-12 07:28:05.644737+00:00,0.191617


In [42]:
# speed 정규화
accel_df['shift_speed'] = accel_df['speed'] - accel_df['speed'].shift(1)
accel_df['shift_speed'].fillna(0, inplace = True)

accel_df.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  accel_df['shift_speed'].fillna(0, inplace = True)


Unnamed: 0,accel_value,speed,timestamp,normalize_accel,shift_speed
0,0,36,2024-04-12 07:28:03.701426+00:00,0.0,0.0
1,141,33,2024-04-12 07:28:04.063157+00:00,0.844311,-3.0
2,145,33,2024-04-12 07:28:05.082944+00:00,0.868263,0.0
3,45,36,2024-04-12 07:28:05.643850+00:00,0.269461,3.0
4,32,36,2024-04-12 07:28:05.644737+00:00,0.191617,0.0


In [43]:
# 정답값 지정
label = pd.Series(0 for i in range(0, len(accel_df)))

accel_df['target'] = label
accel_df.head()

Unnamed: 0,accel_value,speed,timestamp,normalize_accel,shift_speed,target
0,0,36,2024-04-12 07:28:03.701426+00:00,0.0,0.0,0
1,141,33,2024-04-12 07:28:04.063157+00:00,0.844311,-3.0,0
2,145,33,2024-04-12 07:28:05.082944+00:00,0.868263,0.0,0
3,45,36,2024-04-12 07:28:05.643850+00:00,0.269461,3.0,0
4,32,36,2024-04-12 07:28:05.644737+00:00,0.191617,0.0,0


In [44]:
# 5개의 데이터가 입력으로 들어감
seq_length = 5
# batch size는 임의로 지정
batch = 128

preprocess_data = accel_df.drop(labels=['accel_value', 'speed', 'timestamp'], axis = 1)
# 역순으로 정렬
preprocess_data = preprocess_data[::-1]
train_size = int(len(preprocess_data) * 0.7)
train_set = preprocess_data[0:train_size]
test_set = preprocess_data[train_size-seq_length:]

train_set.head()

Unnamed: 0,normalize_accel,shift_speed,target
745,0.0,0.0,0
744,0.0,0.0,0
743,0.0,1.0,0
742,0.0,0.0,0
741,0.0,1.0,0


In [56]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

def build_dataset(time_series, seq_length):
    dataX = []
    dataY = []
    for i in range(0, len(time_series)-seq_length):
        _x = time_series[i:i+seq_length, :]
        _y = time_series[i+seq_length, [-1]]
        dataX.append(_x)
        dataY.append(_y)
        
    return np.array(dataX, dtype=np.float32), np.array(dataY, dtype=np.float32)

trainX, trainY = build_dataset(np.array(train_set), seq_length)
testX, testY = build_dataset(np.array(test_set), seq_length)

# 텐서로 변환
trainX_tensor = torch.FloatTensor(trainX)
trainY_tensor = torch.FloatTensor(trainY)

testX_tensor = torch.FloatTensor(testX)
testY_tensor = torch.FloatTensor(testY)

# 텐서 형태로 데이터 정의
dataset = TensorDataset(trainX_tensor, trainY_tensor)

# 데이터로더는 기본적으로 2개의 인자를 입력받으며 배치크기는 통상적으로 2의 배수를 사용
dataloader = DataLoader(dataset,
                        batch_size=batch,
                        shuffle=True,  
                        drop_last=True)


print(trainX)

[[[0. 0. 0.]
  [0. 0. 0.]
  [0. 1. 0.]
  [0. 0. 0.]
  [0. 1. 0.]]

 [[0. 0. 0.]
  [0. 1. 0.]
  [0. 0. 0.]
  [0. 1. 0.]
  [0. 1. 0.]]

 [[0. 1. 0.]
  [0. 0. 0.]
  [0. 1. 0.]
  [0. 1. 0.]
  [0. 0. 0.]]

 ...

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]]


In [57]:
# 설정값
data_dim = 5
hidden_dim = 10 
output_dim = 1 
learning_rate = 0.01
nb_epochs = 100

class Net(nn.Module):
    # # 기본변수, layer를 초기화해주는 생성자
    def __init__(self, input_dim, hidden_dim, seq_len, output_dim, layers):
        super(Net, self).__init__()
        self.hidden_dim = hidden_dim
        self.seq_len = seq_len
        self.output_dim = output_dim
        self.layers = layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=layers,
                            # dropout = 0.1,
                            batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim, bias = True) 
        
    # 학습 초기화를 위한 함수
    def reset_hidden_state(self): 
        self.hidden = (
                torch.zeros(self.layers, self.seq_len, self.hidden_dim),
                torch.zeros(self.layers, self.seq_len, self.hidden_dim))
    
    # 예측을 위한 함수
    def forward(self, x):
        x, _status = self.lstm(x)
        x = self.fc(x[:, -1])
        return x

In [58]:
def train_model(model, train_df, num_epochs = None, lr = None, verbose = 10, patience = 10):
     
    criterion = nn.MSELoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)
    nb_epochs = num_epochs
    
    # epoch마다 loss 저장
    train_hist = np.zeros(nb_epochs)

    for epoch in range(nb_epochs):
        avg_cost = 0
        total_batch = len(train_df)
        
        for batch_idx, samples in enumerate(train_df):

            x_train, y_train = samples
            
            # seq별 hidden state reset
            model.reset_hidden_state()
            
            # H(x) 계산
            outputs = model(x_train)
                
            # cost 계산
            loss = criterion(outputs, y_train)                    
            
            # cost로 H(x) 개선
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            avg_cost += loss/total_batch
               
        train_hist[epoch] = avg_cost        
        
        if epoch % verbose == 0:
            print('Epoch:', '%04d' % (epoch), 'train loss :', '{:.4f}'.format(avg_cost))
            
        # patience번째 마다 early stopping 여부 확인
        if (epoch % patience == 0) & (epoch != 0):
            
            # loss가 커졌다면 early stop
            if train_hist[epoch-patience] < train_hist[epoch]:
                print('\n Early Stopping')
                
                break
            
    return model.eval(), train_hist

In [59]:
# 모델 학습
device = torch.device('cuda')
net = Net(data_dim, hidden_dim, seq_length, output_dim, 1).to(device)  
model, train_hist = train_model(net, dataloader, num_epochs = nb_epochs, lr = learning_rate, verbose = 20, patience = 10)

RuntimeError: Input and parameter tensors are not at the same device, found input tensor at cpu and parameter tensor at cuda:0

In [52]:
# epoch별 손실값
fig = plt.figure(figsize=(10, 4))
plt.plot(train_hist, label="Training loss")
plt.legend()
plt.show()

NameError: name 'train_hist' is not defined

<Figure size 1000x400 with 0 Axes>

In [60]:
# 모델 저장    
PATH = "./Timeseries_LSTM_data-02-stock_daily_.pth"
torch.save(model.state_dict(), PATH)

# 불러오기
model = Net(data_dim, hidden_dim, seq_length, output_dim, 1).to(device)  
model.load_state_dict(torch.load(PATH), strict=False)
model.eval()

NameError: name 'model' is not defined

In [61]:
# 예측 테스트
with torch.no_grad(): 
    pred = []
    for pr in range(len(testX_tensor)):

        model.reset_hidden_state()

        predicted = model(torch.unsqueeze(testX_tensor[pr], 0))
        predicted = torch.flatten(predicted).item()
        pred.append(predicted)

    # INVERSE
    pred_inverse = scaler_y.inverse_transform(np.array(pred).reshape(-1, 1))
    testY_inverse = scaler_y.inverse_transform(testY_tensor)

def MAE(true, pred):
    return np.mean(np.abs(true-pred))

print('MAE SCORE : ', MAE(pred_inverse, testY_inverse))

NameError: name 'model' is not defined

In [62]:
fig = plt.figure(figsize=(8,3))
plt.plot(np.arange(len(pred_inverse)), pred_inverse, label = 'pred')
plt.plot(np.arange(len(testY_inverse)), testY_inverse, label = 'true')
plt.title("Loss plot")
plt.show()

NameError: name 'pred_inverse' is not defined

<Figure size 800x300 with 0 Axes>