<a href="https://colab.research.google.com/github/JidaphaMekon/Opportunities-for-Pioneering-Practices-in-AI-Workshop/blob/main/model_1_nointerp_lag7_5_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

| ขั้นตอน | รายละเอียด                                     |
| ------- | ---------------------------------------------- |
| 1       | รวบรวมหุ้นหลายตัว (AAPL, MSFT, TSLA, …)        |
| 2       | ตรวจวันหยุดตลาด + Interpolation (3 วิธี)       |
| 3       | สร้าง features (Lag7, SMA, EMA, BB, RSI, MACD) |
| 4       | Scaling train เท่านั้น                         |
| 5       | ใช้ TimeSeriesSplit หรือ Sliding 70/15/15      |
| 6       | Train หลายโมเดล (Linear, XGB, LSTM, Prophet)   |
| 7       | เก็บ RMSE, R² ลง DataFrame                     |
| 8       | สรุปเปรียบเทียบผลลัพธ์                         |


In [None]:
!pip install pandas_market_calendars
!pip install yfinance scikit-learn xgboost matplotlib
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pandas_market_calendars as mcal
import matplotlib.pyplot as plt

# ไลบรารีสำหรับ Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler

Collecting pandas_market_calendars
  Downloading pandas_market_calendars-5.1.1-py3-none-any.whl.metadata (9.7 kB)
Collecting exchange-calendars>=3.3 (from pandas_market_calendars)
  Downloading exchange_calendars-4.11.1-py3-none-any.whl.metadata (38 kB)
Collecting pyluach (from exchange-calendars>=3.3->pandas_market_calendars)
  Downloading pyluach-2.3.0-py3-none-any.whl.metadata (4.3 kB)
Collecting korean_lunar_calendar (from exchange-calendars>=3.3->pandas_market_calendars)
  Downloading korean_lunar_calendar-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Downloading pandas_market_calendars-5.1.1-py3-none-any.whl (127 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.4/127.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading exchange_calendars-4.11.1-py3-none-any.whl (208 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m208.9/208.9 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading korean_lunar_calendar-0.3.1-py3-none-any.whl 

In [None]:

# ------------------------------
# 0. กำหนด list หุ้นทั้งหมด
# ------------------------------
tickers = ["AAPL", "AMD", "AVGO", "GOOG", "GOOGL", "META", "MSFT", "NVDA"]

# กำหนดช่วงวันที่
start_date = datetime(2019, 1, 1)
end_date   = datetime(2024, 12, 31)

all_data_list = []

# ------------------------------
# 1. ดาวน์โหลดข้อมูลหุ้นทั้งหมด
# ------------------------------
for ticker in tickers:
    print(f"📥 Downloading {ticker} ...")
    data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))

    if data.empty:
        print(f"❌ No data for {ticker}")
        continue

    data.reset_index(inplace=True)

    if isinstance(data.columns, pd.MultiIndex):
        data.columns = [' '.join(col).strip() if isinstance(col, tuple) else col for col in data.columns.values]

    def clean_columns(cols):
        cleaned = []
        for col in cols:
            if 'Date' in col:
                cleaned.append('Date')
            else:
                cleaned.append(col.split()[0])
        return cleaned

    data.columns = clean_columns(data.columns)
    data['Symbol'] = ticker.upper()

    wanted_cols = ['Date', 'Close', 'Symbol']
    data = data[[col for col in wanted_cols if col in data.columns]]

    all_data_list.append(data)

# Concatenate all dataframes in the list into a single dataframe
flat_df = pd.concat(all_data_list, ignore_index=True)



📥 Downloading AAPL ...


  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading AMD ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading AVGO ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading GOOG ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading GOOGL ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading META ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading MSFT ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading NVDA ...


[*********************100%***********************]  1 of 1 completed


In [None]:
# สร้าง lag ของราคาย้อนหลัง 7 วัน (Close_lag1 ... Close_lag7)
for lag in range(1, 8):  # 1 ถึง 7 วัน
    flat_df[f'Close_lag{lag}'] = flat_df.groupby('Symbol')['Close'].shift(lag)
# 9. สร้าง Target: Close ของวันถัดไป
flat_df['Close_next'] = flat_df.groupby('Symbol')['Close'].shift(-1)

# 10. สร้าง Feature Matrix X
lag_cols = [f'Close_lag{i}' for i in range(1,8)]  # Close_lag1 ... Close_lag7

X_cols = lag_cols
X = flat_df[X_cols]
# Target
y = flat_df['Close_next']

# ตรวจสอบตัวอย่าง
print("Feature Matrix X:")
print(X.head(10))
print("\nTarget y:")
print(y.head(10))

# 11. ลบ row ที่มี NaN ใน X หรือ y
data_ml = pd.concat([X, y], axis=1)  # รวม X และ y ชั่วคราว
data_ml_clean = data_ml.dropna().reset_index(drop=True)

# แยกกลับเป็น X และ y
X_clean = data_ml_clean[X_cols]
y_clean = data_ml_clean['Close_next']

# ตรวจสอบตัวอย่าง
print("Feature Matrix X (clean):")
print(X_clean.head(10))
print("\nTarget y (clean):")
print(y_clean.head(10))

import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ตั้งค่า Sliding Window
window_train = 0.7
window_test = 0.15

# เตรียมโมเดล
models = {
    'Linear': LinearRegression(),
    'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
    'GradientBoosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
    'XGBoost': XGBRegressor(n_estimators=100, random_state=42, verbosity=0),
    'SVR': SVR()
}

# ตัวแปรเก็บผลลัพธ์ทั้งหมด
all_results = []

# สมมติว่าเรา assign Model Name เอง
model_name = "Model 1 nointerp_lag7_5 models"

# Loop แต่ละ Symbol
symbols = flat_df['Symbol'].unique()
lag_cols = [f'Close_lag{i}' for i in range(1, 8)]
X_cols = lag_cols

for sym in symbols:
    df_sym = flat_df[flat_df['Symbol'] == sym].sort_values('Date').reset_index(drop=True)

    # Feature & Target
    X = df_sym[X_cols]
    y = df_sym['Close_next']

    # ลบ NaN
    data_ml = pd.concat([X, y], axis=1).dropna().reset_index(drop=True)
    X_clean = data_ml[X_cols]
    y_clean = data_ml['Close_next']

    n = len(X_clean)
    step = int(n * window_test)
    start = 0

    # Sliding Window
    while start + int(n * window_train) + step <= n:
        train_idx = range(start, start + int(n*window_train))
        test_idx  = range(start + int(n*window_train), start + int(n*window_train) + step)

        X_train = X_clean.iloc[train_idx].values
        y_train = y_clean.iloc[train_idx].values
        X_test  = X_clean.iloc[test_idx].values
        y_test  = y_clean.iloc[test_idx].values

        # Scale Training set
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled  = scaler.transform(X_test)

        # Train & Evaluate ทุกโมเดล
        for name, model in models.items():
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)

            all_results.append({
                'Symbol': sym,
                'model': name,
                'start_idx': start,
                'rmse': np.sqrt(mean_squared_error(y_test, y_pred)),
                'mae': mean_absolute_error(y_test, y_pred),
                'r2': r2_score(y_test, y_pred),
                'Model Name': model_name
            })

        start += step

# แปลงเป็น DataFrame
results_df = pd.DataFrame(all_results)

# แสดงผล
pd.set_option('display.max_rows', None)
print(results_df)

# Export ไป Drive (ถ้าเป็น Google Colab)
results_df.to_csv('/content/drive/MyDrive/results_all_models.csv', index=False)
print("✅ Exported results to Drive: results_all_models.csv")


Feature Matrix X:
   Close_lag1  Close_lag2  Close_lag3  Close_lag4  Close_lag5  Close_lag6  \
0         NaN         NaN         NaN         NaN         NaN         NaN   
1   37.575214         NaN         NaN         NaN         NaN         NaN   
2   33.832443   37.575214         NaN         NaN         NaN         NaN   
3   35.276730   33.832443   37.575214         NaN         NaN         NaN   
4   35.198208   35.276730   33.832443   37.575214         NaN         NaN   
5   35.869186   35.198208   35.276730   33.832443   37.575214         NaN   
6   36.478298   35.869186   35.198208   35.276730   33.832443   37.575214   
7   36.594906   36.478298   35.869186   35.198208   35.276730   33.832443   
8   36.235607   36.594906   36.478298   35.869186   35.198208   35.276730   
9   35.690739   36.235607   36.594906   36.478298   35.869186   35.198208   

   Close_lag7  
0         NaN  
1         NaN  
2         NaN  
3         NaN  
4         NaN  
5         NaN  
6         NaN  
7   37

In [None]:
# -----------------------------
# 1️⃣ Run Sliding Window per Symbol
# -----------------------------
# (โค้ด Sliding Window ที่ผมเขียนให้ด้านบน)
# ผลลัพธ์จะถูกเก็บใน list ชื่อ results

# -----------------------------
# 2️⃣ เก็บไฟล์ใน Google Drive
# -----------------------------
from google.colab import drive
import pandas as pd

# เชื่อม Drive
drive.mount('/content/drive')

# แปลงเป็น DataFrame
df_results = pd.DataFrame(results)

# กำหนด path
file_path = '/content/drive/MyDrive/all_models_sliding_window_results.csv'

# บันทึก CSV
df_results.to_csv(file_path, index=False)

print(f"✅ Export เสร็จ! ไฟล์อยู่ที่ {file_path}")
