In [1]:
# In [1]
import pandas as pd
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, mean_squared_log_error
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
# In [2]
df = pd.read_csv('Samsung_clean.csv')   # <-- file của bạn
df.head(10)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1/4/2010,16060,16180,16000,16180,11978.36523,11950800
1,1/5/2010,16520,16580,16300,16440,12170.84668,27925850
2,1/6/2010,16580,16820,16520,16820,12452.1709,22948850
3,1/7/2010,16820,16820,16260,16260,12037.59082,22107950
4,1/8/2010,16400,16420,16120,16420,12156.04102,14777550
5,1/11/2010,16420,16460,15940,15940,11800.6875,19896100
6,1/12/2010,15940,16180,15900,16180,11978.36523,19218050
7,1/13/2010,16100,16120,15940,15940,11800.6875,13375300
8,1/14/2010,16120,16620,16000,16540,12244.87988,26551550
9,1/15/2010,16640,16860,16580,16840,12466.97559,19905600


In [3]:
# In [2]
df = pd.read_csv('Samsung_clean.csv')   # <-- file của bạn
df.head(10)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1/4/2010,16060,16180,16000,16180,11978.36523,11950800
1,1/5/2010,16520,16580,16300,16440,12170.84668,27925850
2,1/6/2010,16580,16820,16520,16820,12452.1709,22948850
3,1/7/2010,16820,16820,16260,16260,12037.59082,22107950
4,1/8/2010,16400,16420,16120,16420,12156.04102,14777550
5,1/11/2010,16420,16460,15940,15940,11800.6875,19896100
6,1/12/2010,15940,16180,15900,16180,11978.36523,19218050
7,1/13/2010,16100,16120,15940,15940,11800.6875,13375300
8,1/14/2010,16120,16620,16000,16540,12244.87988,26551550
9,1/15/2010,16640,16860,16580,16840,12466.97559,19905600


In [4]:
# In [3]
# Nếu có cột 'Date' chuyển về datetime và set làm index
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)


In [5]:
# In [4]
df


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,16060,16180,16000,16180,11978.36523,11950800
2010-01-05,16520,16580,16300,16440,12170.84668,27925850
2010-01-06,16580,16820,16520,16820,12452.17090,22948850
2010-01-07,16820,16820,16260,16260,12037.59082,22107950
2010-01-08,16400,16420,16120,16420,12156.04102,14777550
...,...,...,...,...,...,...
2025-10-23,96800,98500,96300,96500,96500.00000,18488581
2025-10-24,97900,99000,97700,98800,98800.00000,18801925
2025-10-27,101300,102000,100600,102000,102000.00000,22169970
2025-10-28,100900,101000,99100,99500,99500.00000,20002282


In [6]:
# In [5]
# Select 'Close' column
data = df['Close'].values

# Scale data
scaler = MinMaxScaler(feature_range=(0,1))
data_scaled = scaler.fit_transform(data.reshape(-1,1))


In [7]:
# In [6]
# Define sizes
train_size = int(len(data_scaled) * 0.7)
val_size = int(len(data_scaled) * 0.1)
test_size = len(data_scaled) - train_size - val_size

# Split
train_data = data_scaled[:train_size]
val_data = data_scaled[train_size: train_size + val_size]
test_data = data_scaled[train_size + val_size : ]

print("Train size:", len(train_data), "Val size:", len(val_data), "Test size:", len(test_data))


Train size: 2724 Val size: 389 Test size: 779


In [8]:
# In [7]
def add_data(data, sequence_length=1):
    X, Y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i : (i + sequence_length), 0])
        Y.append(data[i + sequence_length, 0])
    return np.array(X), np.array(Y)


In [9]:
# In [8]
sequence_length = 60

# prepare windows for validation/test so they have preceding context
train_last_windowsize = train_data[-sequence_length:]
val_last_windowsize = val_data[-sequence_length:] if len(val_data) >= sequence_length else np.concatenate([train_last_windowsize[-(sequence_length - len(val_data)):], val_data], axis=0)
test_last_windowsize = test_data[-sequence_length:] if len(test_data) >= sequence_length else np.concatenate([val_data[-(sequence_length - len(test_data)):], test_data], axis=0)

# For training sequences directly from train_data
X_train, y_train = add_data(train_data, sequence_length)
# For validation: extend windows to ensure we can create sequences
val_extended = np.concatenate((train_data[-sequence_length:], val_data), axis=0)
X_val, y_val = add_data(val_extended, sequence_length)

# For test: extend with last part of val to build initial window
test_extended = np.concatenate((val_data[-sequence_length:], test_data), axis=0) if len(val_data) >= sequence_length else np.concatenate((train_data[-sequence_length:], test_data), axis=0)
X_test, y_test = add_data(test_extended, sequence_length)

# reshape to [samples, timesteps, features]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# y already shape (n,) -> reshape to (n,1) for consistency
y_train = y_train.reshape(-1,1)
y_val = y_val.reshape(-1,1)
y_test = y_test.reshape(-1,1)

print("X_train shape:", X_train.shape, "y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape, "y_val shape:", y_val.shape)
print("X_test shape:", X_test.shape, "y_test shape:", y_test.shape)


X_train shape: (2664, 60, 1) y_train shape: (2664, 1)
X_val shape: (389, 60, 1) y_val shape: (389, 1)
X_test shape: (779, 60, 1) y_test shape: (779, 1)


In [10]:
# In [9]
model = Sequential()

model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=50))
model.add(Dropout(0.2))

model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

model.summary()


  super().__init__(**kwargs)


In [11]:
# In [10]
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=32,verbose=2)


Epoch 1/50
84/84 - 19s - 226ms/step - loss: 0.0043 - val_loss: 0.0039
Epoch 2/50
84/84 - 7s - 85ms/step - loss: 0.0013 - val_loss: 0.0027
Epoch 3/50
84/84 - 10s - 123ms/step - loss: 0.0013 - val_loss: 8.8794e-04
Epoch 4/50
84/84 - 11s - 131ms/step - loss: 0.0011 - val_loss: 0.0031
Epoch 5/50
84/84 - 7s - 87ms/step - loss: 0.0011 - val_loss: 9.8063e-04
Epoch 6/50
84/84 - 11s - 131ms/step - loss: 9.3661e-04 - val_loss: 0.0010
Epoch 7/50
84/84 - 7s - 88ms/step - loss: 9.4035e-04 - val_loss: 0.0073
Epoch 8/50
84/84 - 8s - 94ms/step - loss: 9.6360e-04 - val_loss: 6.5523e-04
Epoch 9/50
84/84 - 9s - 109ms/step - loss: 9.3523e-04 - val_loss: 0.0028
Epoch 10/50
84/84 - 9s - 108ms/step - loss: 8.4538e-04 - val_loss: 0.0093
Epoch 11/50
84/84 - 9s - 102ms/step - loss: 8.3766e-04 - val_loss: 0.0020
Epoch 12/50
84/84 - 13s - 152ms/step - loss: 6.8136e-04 - val_loss: 0.0016
Epoch 13/50
84/84 - 10s - 117ms/step - loss: 7.0244e-04 - val_loss: 0.0031
Epoch 14/50
84/84 - 8s - 91ms/step - loss: 7.3756e-04