In [None]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import VotingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

In [None]:
# โหลดข้อมูลจากไฟล์ CSV
file_name = 'Thailand_Stock_Market_Data.csv'
data = pd.read_csv(file_name)
print("ข้อมูลเบื้องต้น:\n", data.head())
print("ข้อมูลที่หายไปในแต่ละคอลัมน์:\n", data.isnull().sum())

ข้อมูลเบื้องต้น:
         Date  Close Price  High Price  Low Price   Volume  \
0  5/17/2022      1035.42     1060.41    1008.71   4587.0   
1  5/18/2022      1834.77     1854.81    1820.66  48532.0   
2  5/19/2022      1299.43     1338.65    1279.73  58306.0   
3  5/20/2022      1613.39     1638.25    1589.38  25687.0   
4  5/21/2022      1005.77         NaN     977.20  34795.0   

   Moving Average (50 days)  Moving Average (200 days)  Change  \
0                   1023.00                    1008.98   -7.74   
1                       NaN                    1773.30   -3.34   
2                   1332.39                    1347.57     NaN   
3                       NaN                    1570.26     NaN   
4                    978.58                     986.95    0.86   

   Percentage Change  
0              -0.75  
1              -0.18  
2                NaN  
3                NaN  
4               0.09  
ข้อมูลที่หายไปในแต่ละคอลัมน์:
 Date                           0
Close Price     

In [None]:
# ตรวจสอบว่าคอลัมน์ที่ต้องการมีอยู่จริงหรือไม่
required_columns = ['Date', 'Close Price', 'High Price', 'Low Price', 'Volume', 'Change', 'Moving Average (50 days)', 'Moving Average (200 days)', 'Percentage Change']
for col in required_columns:
    if col not in data.columns:
        raise ValueError(f"Missing required column: {col}")

In [None]:
# แยก X และ y
X = data.drop(['Date', 'Close Price'], axis=1)
y = data['Close Price'].ffill()  # เติมค่าที่หายไปใน target

In [None]:
# เติมค่า missing values ใน X
imputer = SimpleImputer(strategy='mean')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

In [None]:
# Scaling ข้อมูล
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_imputed), columns=X.columns)

In [None]:
# Split ข้อมูล
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# สร้างและฝึกสอนโมเดล
knn = KNeighborsRegressor(n_neighbors=5)
decision_tree = DecisionTreeRegressor(random_state=42)
param_grid_svr = {'C': [1, 10, 100], 'epsilon': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf']}

grid_search_svr = GridSearchCV(SVR(), param_grid_svr, scoring='r2', cv=5)
grid_search_svr.fit(X_train, y_train)
svm = grid_search_svr.best_estimator_

In [None]:
# Train Models
knn.fit(X_train, y_train)
decision_tree.fit(X_train, y_train)

In [None]:
# ฟังก์ชันประเมินผลโมเดล
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    accuracy = 100 - mape

    print(f"{model.__class__.__name__} Evaluation")
    print(f"MAE: {mae}")
    print(f"MSE: {mse}")
    print(f"R^2 Score: {r2}")
    print(f"MAPE: {mape}%")
    print(f"Prediction Accuracy: {accuracy}%")
    print("=" * 50)
    return y_pred

In [None]:
# Evaluate Models
evaluate_model(knn, X_test, y_test)
evaluate_model(decision_tree, X_test, y_test)
evaluate_model(svm, X_test, y_test)

KNeighborsRegressor Evaluation
MAE: 65.22198000000002
MSE: 13466.682323440004
R^2 Score: 0.8501622726257395
MAPE: 4.38520507817006%
Prediction Accuracy: 95.61479492182994%
DecisionTreeRegressor Evaluation
MAE: 62.483799999999995
MSE: 24925.21042005917
R^2 Score: 0.7226683756275851
MAPE: 4.095967917944737%
Prediction Accuracy: 95.90403208205527%
SVR Evaluation
MAE: 51.255052393686
MSE: 12857.7314109803
R^2 Score: 0.8569377959962304
MAPE: 3.4660482866449365%
Prediction Accuracy: 96.53395171335507%


array([1528.95668391, 1991.60116165, 1768.90231843, 1296.37809096,
       1142.79524275, 1483.63233827, 1166.88549051, 1304.2512343 ,
       1031.7029494 , 1775.43820565, 1201.44107479, 1753.66987522,
       1270.77248881, 1798.2055928 , 1483.63062642, 1853.50358424,
       1483.63080726,  996.58883234, 1669.92911152, 1736.43421258,
       1474.48562759, 1584.71160446, 1192.21023892, 1347.98197086,
       1395.60092054, 1483.6314177 , 1147.99322134, 1933.61411335,
       1977.94798655, 1547.72543779, 1918.52075765, 1727.4058261 ,
       1725.25488536, 1145.79554858, 1464.90777677, 1073.60025368,
       1535.13999588, 1716.91438184, 1460.28938572, 1543.85884326,
       1930.52914404, 1520.51022722, 1431.51333096, 1661.77602875,
       1933.67642012, 1716.13367852, 1447.18837726, 1954.8972227 ,
       1297.32079638, 1974.62022586, 1399.27418102, 1551.61073368,
       1564.77571973, 1500.81958317, 1774.22301016, 1373.94861187,
       1483.63010637, 1152.04806431, 1903.53025614, 1145.56438

In [None]:
# บันทึกโมเดล
joblib.dump(decision_tree, 'decision_tree_model.pkl')
joblib.dump(knn, 'knn_model.pkl')
joblib.dump(svm, 'svr_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [None]:

# โหลดโมเดลที่เทรนไว้
model_1 = joblib.load('decision_tree_model.pkl')
model_2 = joblib.load('knn_model.pkl')
model_3 = joblib.load('svr_model.pkl')
scaler = joblib.load('scaler.pkl')

In [None]:

# โหลดข้อมูลใหม่
df = pd.read_csv('Thailand_Stock_Market_Data.csv')

# ลบช่องว่างด้านหน้า-หลังของชื่อคอลัมน์ทั้งหมด
df.columns = df.columns.str.strip()

# แยก Features (X) และ Target (y)
X_test_new = df[required_columns[2:]].copy()
y_test = df['Close Price'].ffill()  # เติมค่าหายไปใน target ด้วย forward fill

# จัดเรียงคอลัมน์ของ X_test_new ให้ตรงกับ X_train
X_test_new = X_test_new.reindex(columns=X_train.columns)

# เติมค่าที่หายไปใน X_test_new ด้วยค่าเฉลี่ยของ X_train
imputer = SimpleImputer(strategy='mean')
X_test_new = pd.DataFrame(imputer.fit_transform(X_test_new), columns=X_train.columns)

# ใช้ scaler ที่เคย fit กับ training data แล้ว
X_test_scaled = pd.DataFrame(scaler.transform(X_test_new), columns=X_train.columns)

# ตรวจสอบว่าไม่มีค่า missing เหลืออยู่
print(X_test_scaled.isnull().sum())
print(y_test.isnull().sum())


High Price                   0
Low Price                    0
Volume                       0
Moving Average (50 days)     0
Moving Average (200 days)    0
Change                       0
Percentage Change            0
dtype: int64
0


In [None]:
print(f"Shape of X_test_scaled: {X_test_scaled.shape}")
print(f"Shape of y_test: {y_test.shape}")


Shape of X_test_scaled: (1000, 7)
Shape of y_test: (1000,)


In [None]:
X_test_scaled = X_test_scaled[:len(y_test)]
print(f"Fixed X_test_scaled shape: {X_test_scaled.shape}")


Fixed X_test_scaled shape: (1000, 7)


In [None]:
X_test_scaled = X_test_scaled.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)


In [None]:
# ทำนายด้วยโมเดล
pred_1 = model_1.predict(X_test_scaled)
pred_2 = model_2.predict(X_test_scaled)
pred_3 = model_3.predict(X_test_scaled)
y_pred_ensemble = (pred_1 + pred_2 + pred_3) / 3


In [None]:
mae = mean_absolute_error(y_test, y_pred_ensemble)
mse = mean_squared_error(y_test, y_pred_ensemble)
r2 = r2_score(y_test, y_pred_ensemble)
mape = np.mean(np.abs((y_test - y_pred_ensemble) / y_test)) * 100
accuracy = 100 - mape

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"R2 Score: {r2}")
print(f"MAPE: {mape}%")
print(f"Prediction Accuracy: {accuracy}%")

MAE: 38.3838300403303
MSE: 7078.100869776951
R2 Score: 0.9194065304203449
MAPE: 2.6276402146308797%
Prediction Accuracy: 97.37235978536913%


In [None]:
print(X_test_scaled.head())
print(y_test.head())


     High Price  Low Price    Volume  Moving Average (50 days)  \
0 -1.679278e+00  -1.659103 -1.642579                 -1.799712   
1  1.258137e+00   1.360983  0.016754                  0.000000   
2 -6.504432e-01  -0.651031  0.385813                 -0.575887   
3  4.573734e-01   0.500726 -0.845858                  0.000000   
4 -8.407487e-16  -1.776306 -0.501946                 -1.975420   

   Moving Average (200 days)        Change  Percentage Change  
0                  -1.886665 -1.834676e+00      -2.467527e+00  
1                   1.133378 -8.320639e-01      -6.553684e-01  
2                  -0.548801  1.264912e-17       1.103015e-17  
3                   0.331110  1.264912e-17       1.103015e-17  
4                  -1.973712  1.249747e-01       2.030223e-01  
0    1035.42
1    1834.77
2    1299.43
3    1613.39
4    1005.77
Name: Close Price, dtype: float64
