<a href="https://colab.research.google.com/github/JidaphaMekon/Opportunities-for-Pioneering-Practices-in-AI-Workshop/blob/main/Model_2_nointerp_lag7_indicators_5models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

| ขั้นตอน | รายละเอียด                                     |
| ------- | ---------------------------------------------- |
| 1       | รวบรวมหุ้นหลายตัว (AAPL, MSFT, TSLA, …)        |
| 2       | ตรวจวันหยุดตลาด + Interpolation (3 วิธี)       |
| 3       | สร้าง features (Lag7, SMA, EMA, BB, RSI, MACD) |
| 4       | Scaling train เท่านั้น                         |
| 5       | ใช้ TimeSeriesSplit หรือ Sliding 70/15/15      |
| 6       | Train หลายโมเดล (Linear, XGB, LSTM, Prophet)   |
| 7       | เก็บ RMSE, R² ลง DataFrame                     |
| 8       | สรุปเปรียบเทียบผลลัพธ์                         |


In [1]:
!pip install pandas_market_calendars
!pip install yfinance scikit-learn xgboost matplotlib
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pandas_market_calendars as mcal
import matplotlib.pyplot as plt

# ไลบรารีสำหรับ Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler

Collecting pandas_market_calendars
  Downloading pandas_market_calendars-5.1.1-py3-none-any.whl.metadata (9.7 kB)
Collecting exchange-calendars>=3.3 (from pandas_market_calendars)
  Downloading exchange_calendars-4.11.1-py3-none-any.whl.metadata (38 kB)
Collecting pyluach (from exchange-calendars>=3.3->pandas_market_calendars)
  Downloading pyluach-2.3.0-py3-none-any.whl.metadata (4.3 kB)
Collecting korean_lunar_calendar (from exchange-calendars>=3.3->pandas_market_calendars)
  Downloading korean_lunar_calendar-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Downloading pandas_market_calendars-5.1.1-py3-none-any.whl (127 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.4/127.4 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading exchange_calendars-4.11.1-py3-none-any.whl (208 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m208.9/208.9 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading korean_lunar_calendar-0.3.1-py3-none-any.whl (

In [2]:

# ------------------------------
# 0. กำหนด list หุ้นทั้งหมด
# ------------------------------
tickers = ["AAPL", "AMD", "AVGO", "GOOG", "GOOGL", "META", "MSFT", "NVDA"]

# กำหนดช่วงวันที่
start_date = datetime(2019, 1, 1)
end_date   = datetime(2024, 12, 31)

all_data_list = []

# ------------------------------
# 1. ดาวน์โหลดข้อมูลหุ้นทั้งหมด
# ------------------------------
for ticker in tickers:
    print(f"📥 Downloading {ticker} ...")
    data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))

    if data.empty:
        print(f"❌ No data for {ticker}")
        continue

    data.reset_index(inplace=True)

    if isinstance(data.columns, pd.MultiIndex):
        data.columns = [' '.join(col).strip() if isinstance(col, tuple) else col for col in data.columns.values]

    def clean_columns(cols):
        cleaned = []
        for col in cols:
            if 'Date' in col:
                cleaned.append('Date')
            else:
                cleaned.append(col.split()[0])
        return cleaned

    data.columns = clean_columns(data.columns)
    data['Symbol'] = ticker.upper()

    wanted_cols = ['Date', 'Close', 'Symbol']
    data = data[[col for col in wanted_cols if col in data.columns]]

    all_data_list.append(data)

# Concatenate all dataframes in the list into a single dataframe
flat_df = pd.concat(all_data_list, ignore_index=True)



📥 Downloading AAPL ...


  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading AMD ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading AVGO ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading GOOG ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading GOOGL ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading META ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading MSFT ...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))


📥 Downloading NVDA ...


[*********************100%***********************]  1 of 1 completed


In [None]:
!pip install ta

Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=986a028d74976fc2e150ba6a28255cc0a6f2fbc331a1390860cb31e7be02a997
  Stored in directory: /root/.cache/pip/wheels/5c/a1/5f/c6b85a7d9452057be4ce68a8e45d77ba34234a6d46581777c6
Successfully built ta


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import ta

# -----------------------------
# 0️⃣ สมมติ flat_df มี 'Date', 'Symbol', 'Close'
# -----------------------------

# 1️⃣ SMA, EMA
flat_df['SMA_20'] = flat_df.groupby('Symbol')['Close'].transform(lambda x: x.rolling(20).mean())
flat_df['SMA_50'] = flat_df.groupby('Symbol')['Close'].transform(lambda x: x.rolling(50).mean())
flat_df['EMA_20'] = flat_df.groupby('Symbol')['Close'].transform(lambda x: x.ewm(span=20, adjust=False).mean())
flat_df['EMA_50'] = flat_df.groupby('Symbol')['Close'].transform(lambda x: x.ewm(span=50, adjust=False).mean())

# 2️⃣ Bollinger Bands
flat_df['BB_middle'] = flat_df.groupby('Symbol')['Close'].transform(lambda x: x.rolling(20).mean())
flat_df['BB_std'] = flat_df.groupby('Symbol')['Close'].transform(lambda x: x.rolling(20).std())
flat_df['BB_upper'] = flat_df['BB_middle'] + 2 * flat_df['BB_std']
flat_df['BB_lower'] = flat_df['BB_middle'] - 2 * flat_df['BB_std']
flat_df.drop(columns=['BB_std'], inplace=True)

# 3️⃣ Lag 1-7
for lag in range(1, 8):
    flat_df[f'Close_lag{lag}'] = flat_df.groupby('Symbol')['Close'].shift(lag)

# 4️⃣ Target
flat_df['Close_next'] = flat_df.groupby('Symbol')['Close'].shift(-1)

# 5️⃣ Feature & Target
lag_cols = [f'Close_lag{i}' for i in range(1,8)]
X_cols = lag_cols
X = flat_df[X_cols]
y = flat_df['Close_next']

# 6️⃣ ลบ NaN
data_ml = pd.concat([X, y], axis=1).dropna().reset_index(drop=True)
X_clean = data_ml[X_cols]
y_clean = data_ml['Close_next']

# -----------------------------
# 7️⃣ Sliding Window
# -----------------------------
window_train = 0.7
window_test = 0.15

models = {
    'Linear': LinearRegression(),
    'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
    'GradientBoosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
    'XGBoost': XGBRegressor(n_estimators=100, random_state=42, verbosity=0),
    'SVR': SVR()
}

results = []

for sym in flat_df['Symbol'].unique():
    df_sym = flat_df[flat_df['Symbol']==sym].sort_values('Date').reset_index(drop=True)
    X = df_sym[X_cols]
    y = df_sym['Close_next']
    data_ml = pd.concat([X, y], axis=1).dropna().reset_index(drop=True)
    X_clean = data_ml[X_cols]
    y_clean = data_ml['Close_next']

    n = len(X_clean)
    step = int(n * window_test)
    start = 0

    while start + int(n*window_train) + step <= n:
        train_idx = range(start, start + int(n*window_train))
        test_idx = range(start + int(n*window_train), start + int(n*window_train) + step)

        X_train = X_clean.iloc[train_idx].values
        y_train = y_clean.iloc[train_idx].values
        X_test  = X_clean.iloc[test_idx].values
        y_test  = y_clean.iloc[test_idx].values

        # Scale
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled  = scaler.transform(X_test)

        # Train & Evaluate
        for name, model in models.items():
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
            results.append({
                'Symbol': sym,
                'model': name,
                'start_idx': start,
                'rmse': np.sqrt(mean_squared_error(y_test, y_pred)),
                'mae': mean_absolute_error(y_test, y_pred),
                'r2': r2_score(y_test, y_pred)
            })

        start += step

# -----------------------------
# 8️⃣ แปลงเป็น DataFrame และแสดงทั้งหมด
# -----------------------------
results_df = pd.DataFrame(results)
pd.set_option('display.max_rows', None)  # แสดงทุก row
print(results_df)



In [None]:
from google.colab import drive
import pandas as pd

# 1️⃣ เชื่อม Google Drive
drive.mount('/content/drive')

# 2️⃣ สมมติ results เป็น list ของ dict จาก Sliding Window + โมเดลทั้งหมด
df_results = pd.DataFrame(results)

# 3️⃣ กำหนด path ใน Drive ที่ต้องการบันทึก
file_path = '/content/drive/MyDrive/all_models_sliding_window_results.csv'

# 4️⃣ Export เป็น CSV
df_results.to_csv(file_path, index=False)

print(f"✅ Export เสร็จ! ไฟล์อยู่ที่ {file_path}")
