In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split

# Load and preprocess the data
# Giả sử `data_path` là đường dẫn tới file CSV của bạn
data = pd.read_csv('AirPassengers.csv')
data['Month'] = pd.to_datetime(data['Month'])
data['#Passengers'] = data['#Passengers'].astype(float)

print(data[:5])

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data['#Passengers'].values.reshape(-1,1))

# Split into training and test sets
train_size = int(len(data_scaled) * 0.67)
test_size = len(data_scaled) - train_size
train, test = data_scaled[0:train_size,:], data_scaled[train_size:len(data_scaled),:]




       Month  #Passengers
0 1949-01-01        112.0
1 1949-02-01        118.0
2 1949-03-01        132.0
3 1949-04-01        129.0
4 1949-05-01        121.0


In [3]:
# Convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

# Reshape into X=t and Y=t+1
look_back = 1
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test = create_dataset(test, look_back)

# Reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

In [4]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back), return_sequences=True))
model.add(LSTM(4, return_sequences=False))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

# Fit the model
model.fit(X_train, Y_train, epochs=100, batch_size=1, verbose=2)

# Making predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

Epoch 1/100
94/94 - 4s - loss: 0.0440 - 4s/epoch - 39ms/step
Epoch 2/100
94/94 - 0s - loss: 0.0221 - 252ms/epoch - 3ms/step
Epoch 3/100
94/94 - 0s - loss: 0.0184 - 245ms/epoch - 3ms/step
Epoch 4/100
94/94 - 0s - loss: 0.0178 - 213ms/epoch - 2ms/step
Epoch 5/100
94/94 - 0s - loss: 0.0172 - 219ms/epoch - 2ms/step
Epoch 6/100
94/94 - 0s - loss: 0.0165 - 247ms/epoch - 3ms/step
Epoch 7/100
94/94 - 0s - loss: 0.0158 - 230ms/epoch - 2ms/step
Epoch 8/100
94/94 - 0s - loss: 0.0151 - 241ms/epoch - 3ms/step
Epoch 9/100
94/94 - 0s - loss: 0.0139 - 212ms/epoch - 2ms/step
Epoch 10/100
94/94 - 0s - loss: 0.0128 - 224ms/epoch - 2ms/step
Epoch 11/100
94/94 - 0s - loss: 0.0111 - 240ms/epoch - 3ms/step
Epoch 12/100
94/94 - 0s - loss: 0.0090 - 235ms/epoch - 2ms/step
Epoch 13/100
94/94 - 0s - loss: 0.0073 - 217ms/epoch - 2ms/step
Epoch 14/100
94/94 - 0s - loss: 0.0054 - 214ms/epoch - 2ms/step
Epoch 15/100
94/94 - 0s - loss: 0.0041 - 245ms/epoch - 3ms/step
Epoch 16/100
94/94 - 0s - loss: 0.0032 - 241ms/epoc

In [5]:
# Invert predictions
from sklearn.metrics import mean_squared_error

train_predict = scaler.inverse_transform(train_predict)
Y_train = scaler.inverse_transform([Y_train])
test_predict = scaler.inverse_transform(test_predict)
Y_test = scaler.inverse_transform([Y_test])

# Calculate root mean squared error
train_score = np.sqrt(mean_squared_error(Y_train[0], train_predict[:,0]))
print('Train Score: %.2f RMSE' % (train_score))
test_score = np.sqrt(mean_squared_error(Y_test[0], test_predict[:,0]))
print('Test Score: %.2f RMSE' % (test_score))

Train Score: 22.74 RMSE
Test Score: 49.69 RMSE
