In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error

# Đọc dữ liệu từ các tệp CSV
train_path = "C:\\Users\\Laptop K1\\Downloads\\train.csv"
test_path = "C:\\Users\\Laptop K1\\Downloads\\test.csv"

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

# Chuyển đổi cột 'Date' thành số ngày kể từ ngày nhỏ nhất
train_df["Date"] = pd.to_datetime(train_df["Date"])
test_df["Date"] = pd.to_datetime(test_df["Date"])

min_date = train_df["Date"].min()
train_df["Date"] = (train_df["Date"] - min_date).dt.days
test_df["Date"] = (test_df["Date"] - min_date).dt.days

# Xử lý giá trị thiếu trong 'Revenue' của test_df bằng giá trị trung bình
test_df["Revenue"].fillna(test_df["Revenue"].mean(), inplace=True)

# Chuẩn hóa dữ liệu (bỏ cột 'ProductID' và 'Zip' nếu có)
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_df.drop(columns=["ProductID", "Zip"], errors='ignore'))
test_scaled = scaler.transform(test_df.drop(columns=["ProductID", "Zip"], errors='ignore'))

# Chia dữ liệu thành đầu vào (X) và đầu ra (y)
X_train, y_train = train_scaled[:, :-1], train_scaled[:, -1]  # Dự đoán Revenue
X_test, y_true = test_scaled[:, :-1], test_scaled[:, -1]  

# Định dạng lại dữ liệu để phù hợp với LSTM (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Xây dựng mô hình LSTM
model_lstm = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25, activation="relu"),
    Dense(1)
])

# Biên dịch mô hình
model_lstm.compile(optimizer="adam", loss="mean_squared_error")

# Huấn luyện mô hình
history = model_lstm.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.1, verbose=1)

# Dự đoán trên tập kiểm tra
y_pred_lstm = model_lstm.predict(X_test)

# Chuyển đổi dự đoán về giá trị gốc
y_pred_lstm_original = scaler.inverse_transform(np.concatenate((X_test[:, 0, :], y_pred_lstm), axis=1))[:, -1]

# Tính các chỉ số đánh giá mô hình
rmse = np.sqrt(mean_squared_error(y_true, y_pred_lstm_original))
r2 = r2_score(y_true, y_pred_lstm_original)
mape = mean_absolute_percentage_error(y_true, y_pred_lstm_original)

print(f"R² Score: {r2:.4f}")
print(f"RMSE: {rmse:.4f}")               
print(f"MAPE: {mape:.2f}%")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df["Revenue"].fillna(test_df["Revenue"].mean(), inplace=True)
  super().__init__(**kwargs)


Epoch 1/10
[1m12679/12679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 2ms/step - loss: 3.5984e-05 - val_loss: 3.2300e-06
Epoch 2/10
[1m12679/12679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2ms/step - loss: 2.1135e-05 - val_loss: 4.8289e-06
Epoch 3/10
[1m12679/12679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2ms/step - loss: 2.0211e-05 - val_loss: 3.5358e-06
Epoch 4/10
[1m12679/12679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 3ms/step - loss: 2.0764e-05 - val_loss: 3.9749e-06
Epoch 5/10
[1m12679/12679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 2ms/step - loss: 2.0555e-05 - val_loss: 3.6433e-06
Epoch 6/10
[1m12679/12679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 2ms/step - loss: 2.0448e-05 - val_loss: 3.5437e-06
Epoch 7/10
[1m12679/12679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 2ms/step - loss: 2.0196e-05 - val_loss: 5.2246e-06
Epoch 8/10
[1m12679/12679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m