# 0. Setting

## 0.1. Installation

In [1]:
# !pip install python-dotenv
# !pip install python-binance

In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

from dotenv import load_dotenv
from datetime import datetime

In [3]:
load_dotenv()

api_key = os.environ.get("API_KEY") if os.environ.get("API_KEY") else ""
api_secret = os.environ.get("API_SECRET") if os.environ.get("API_SECRET") else ""

## 0.2. Connecting API

In [4]:
import requests
import json

url = "https://api.binance.com"
api_call = "/api/v3/ticker/price"
headers = {"content-type": "application/json", "X-MBX-APIKEY": api_key}

try:
    response = requests.get(url + api_call, headers=headers, timeout=10)
    response.raise_for_status()
    response_data = response.json()
    print(response_data[:1])
except requests.RequestException as e:
    print(e)
    response_data = []

[{'symbol': 'ETHBTC', 'price': '0.02535000'}]


In [5]:
if response_data:
    df = pd.DataFrame.from_records(response_data)
    print(df.head())
else:
    print("No data from API")

    symbol       price
0   ETHBTC  0.02535000
1   LTCBTC  0.00079400
2   BNBBTC  0.00580100
3   NEOBTC  0.00005310
4  QTUMETH  0.00074500


# 1. Binance API

```text
Documentation: https://developers.binance.com/docs/binance-spot-api-docs
```

In [6]:
class BinanceAPI:
    def __init__(self, api_key=None, api_secret=None):
        self.base_url = "https://api.binance.com"
        self.api_key = api_key
        self.api_secret = api_secret
        
    # ข้อมูลเทียนหรือกราฟแท่งเทียนในอดีตสำหรับคู่การเทรดที่กำหนดตาม Symbol
    def get_klines(self, symbol, interval, limit=1000, start_time=None, end_time=None):
        endpoint = "/api/v3/klines"
        params = {
            'symbol': symbol,
            'interval': interval,
            'limit': limit
        }
        
        if start_time:
            params['startTime'] = start_time
        if end_time:
            params['endTime'] = end_time
            
        response = requests.get(self.base_url + endpoint, params=params)
        return response.json()
    
    def get_n_symbol(self, n) :
        endpoint = "/api/v3/ticker/price"
        headers = {"content-type": "application/json", "X-MBX-APIKEY": self.api_key}
        response = requests.get(self.base_url + endpoint, headers=headers)
        response = json.loads(response.text)
        df = pd.DataFrame.from_records(response)
        return df.loc[:n, "symbol"] 
    
    def get_server_time(self, as_timestamp=False) :
        endpoint = "/api/v3/time"
        response = requests.get(self.base_url + endpoint)
        ts = response.json()["serverTime"]
        if as_timestamp:
            return ts
        time = datetime.fromtimestamp(ts / 1000)
        return time.strftime("%Y-%m-%d %H:%M:%S")
    
    # สถิติการเปลี่ยนแปลงราคา 24 ชั่วโมงสำหรับคู่การเทรดที่กำหนดตาม Symbol
    def get_24hr_ticker(self, symbol):
        endpoint = "/api/v3/ticker/24hr"
        params = {'symbol': symbol}
        response = requests.get(self.base_url + endpoint, params=params)
        return response.json()
    
    # ข้อมูล order book ปัจจุบันสำหรับคู่การเทรดที่กำหนดตาม Symbol
    def get_orderbook(self, symbol, limit=100):
        endpoint = "/api/v3/depth"
        params = {'symbol': symbol, 'limit': limit}
        response = requests.get(self.base_url + endpoint, params=params)
        return response.json()

In [7]:
api = BinanceAPI(api_key, api_secret)
api

<__main__.BinanceAPI at 0x24acbfccad0>

In [8]:
api.get_server_time()

'2025-07-15 17:45:13'

In [9]:
api.get_n_symbol(10)

0      ETHBTC
1      LTCBTC
2      BNBBTC
3      NEOBTC
4     QTUMETH
5      EOSETH
6      SNTETH
7      BNTETH
8      BCCBTC
9      GASBTC
10     BNBETH
Name: symbol, dtype: object

In [10]:
api.get_klines("ETHBTC", "1m", limit=5)

[[1752576060000,
  '0.02538000',
  '0.02538000',
  '0.02537000',
  '0.02537000',
  '1.55280000',
  1752576119999,
  '0.03939448',
  28,
  '0.03550000',
  '0.00090066',
  '0'],
 [1752576120000,
  '0.02537000',
  '0.02537000',
  '0.02537000',
  '0.02537000',
  '0.68990000',
  1752576179999,
  '0.01750258',
  52,
  '0.68990000',
  '0.01750258',
  '0'],
 [1752576180000,
  '0.02537000',
  '0.02537000',
  '0.02535000',
  '0.02535000',
  '13.46510000',
  1752576239999,
  '0.34147513',
  70,
  '0.00390000',
  '0.00009890',
  '0'],
 [1752576240000,
  '0.02536000',
  '0.02536000',
  '0.02535000',
  '0.02536000',
  '26.77840000',
  1752576299999,
  '0.67900770',
  81,
  '0.39420000',
  '0.00999671',
  '0'],
 [1752576300000,
  '0.02535000',
  '0.02535000',
  '0.02535000',
  '0.02535000',
  '6.21200000',
  1752576359999,
  '0.15747417',
  20,
  '0.00400000',
  '0.00010140',
  '0']]

# 2. Collect Data

In [11]:
def collect_historical_data(api: BinanceAPI, symbol, interval='1m', days=1, end_time=None):
    if end_time is None:
        end_time = int(api.get_server_time(as_timestamp=True))
    start_time = end_time - (days * 24 * 60 * 60 * 1000)
    
    klines = api.get_klines(
        symbol=symbol,
        interval=interval,
        start_time=start_time,
        end_time=end_time,
    )
    
    # Convert to DataFrame
    df = pd.DataFrame(klines, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
    ])
    
    # Convert data types
    numeric_columns = ['open', 'high', 'low', 'close', 'volume', 'quote_asset_volume']
    for col in numeric_columns:
        df[col] = pd.to_numeric(df[col])
    
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df['close_time'] = pd.to_datetime(df['close_time'], unit='ms')
    df.drop(["ignore"], axis=1, inplace=True)
    
    return df

In [12]:
api = BinanceAPI(api_key, api_secret)
symbol = "ETHBTC"

train_df = collect_historical_data(api, symbol, interval="5m", days=4)
test_df = collect_historical_data(api, symbol, interval="5m", days=1)

test_df = test_df[test_df['timestamp'] > train_df['timestamp'].max()]

In [13]:
train_df

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
0,2025-07-11 10:50:00,0.02547,0.02547,0.02545,0.02545,12.0021,2025-07-11 10:54:59.999,0.305582,66,6.06980000,0.15454949
1,2025-07-11 10:55:00,0.02544,0.02546,0.02540,0.02546,70.9895,2025-07-11 10:59:59.999,1.804568,216,46.61600000,1.18496964
2,2025-07-11 11:00:00,0.02546,0.02546,0.02542,0.02543,117.8030,2025-07-11 11:04:59.999,2.995644,147,17.02680000,0.43308986
3,2025-07-11 11:05:00,0.02544,0.02544,0.02540,0.02541,44.0224,2025-07-11 11:09:59.999,1.119159,214,13.70600000,0.34841608
4,2025-07-11 11:10:00,0.02541,0.02543,0.02537,0.02539,59.4120,2025-07-11 11:14:59.999,1.508781,346,31.94290000,0.81135132
...,...,...,...,...,...,...,...,...,...,...,...
995,2025-07-14 21:45:00,0.02509,0.02510,0.02506,0.02508,13.4166,2025-07-14 21:49:59.999,0.336455,111,0.22260000,0.00558368
996,2025-07-14 21:50:00,0.02508,0.02508,0.02506,0.02507,4.3710,2025-07-14 21:54:59.999,0.109575,85,0.11340000,0.00284363
997,2025-07-14 21:55:00,0.02507,0.02508,0.02506,0.02507,12.9144,2025-07-14 21:59:59.999,0.323750,83,0.61710000,0.01547479
998,2025-07-14 22:00:00,0.02508,0.02508,0.02505,0.02505,15.6178,2025-07-14 22:04:59.999,0.391309,80,0.63960000,0.01603428


In [14]:
test_df

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
136,2025-07-14 22:10:00,0.02505,0.02506,0.02502,0.02503,36.2219,2025-07-14 22:14:59.999,0.907190,139,3.99780000,0.10014421
137,2025-07-14 22:15:00,0.02504,0.02504,0.02501,0.02502,14.7091,2025-07-14 22:19:59.999,0.367993,129,2.27670000,0.05696309
138,2025-07-14 22:20:00,0.02503,0.02506,0.02503,0.02506,47.2625,2025-07-14 22:24:59.999,1.184301,42,47.20470000,1.18285442
139,2025-07-14 22:25:00,0.02506,0.02506,0.02505,0.02505,20.3876,2025-07-14 22:29:59.999,0.510880,62,18.64960000,0.46734177
140,2025-07-14 22:30:00,0.02505,0.02510,0.02505,0.02510,19.5405,2025-07-14 22:34:59.999,0.490008,147,18.19540000,0.45628323
...,...,...,...,...,...,...,...,...,...,...,...
283,2025-07-15 10:25:00,0.02545,0.02545,0.02544,0.02544,11.3730,2025-07-15 10:29:59.999,0.289336,139,6.88510000,0.17516369
284,2025-07-15 10:30:00,0.02544,0.02544,0.02537,0.02537,56.7119,2025-07-15 10:34:59.999,1.440211,461,9.23410000,0.23448832
285,2025-07-15 10:35:00,0.02538,0.02540,0.02537,0.02537,17.9712,2025-07-15 10:39:59.999,0.456079,196,4.18140000,0.10612816
286,2025-07-15 10:40:00,0.02537,0.02538,0.02535,0.02536,46.6813,2025-07-15 10:44:59.999,1.183809,307,4.85140000,0.12307549


<b>Columns</b>

1) <b>open</b>: ราคา *แรกสุด* ที่มีการซื้อขายในช่วงเวลา t
2) <b>high</b>: ราคา *สูงสุด* ที่มีการซื้อขายในช่วงเวลา t
3) <b>low</b>: ราคา *ต่ำสุด* ที่มีการซื้อขายในช่วงเวลา t
4) <b>close</b>: ราคา *สุดท้าย* ที่มีการซื้อขายในช่วงเวลา t

``` 
4 Columns นี้มีการพิจารณาค่าตัวเลขเหมือนกัน เข่น 0.2389 คือ 1 ETH แลกได้ 0.2389 BTC 
```

---

5. <b>volume</b>: จำนวนเหรียญหลักรวมที่มีการซื้อขายในช่วงเวลา t
6. <b>quote_asset_volume</b>: จำนวนเหรียญคู่รวมที่มีการซื้อขายในช่วงเวลา t BTC รวม
7. <b>number_of_trades</b>: จำนวนครั้งที่มีการซื้อขายในช่วงเวลา t
8. <b>taker_buy_base_asset_volume</b>: จำนวนเหรียญหลักรวมที่มีการรีบซื้อในทันทีในช่วงเวลา t
9. <b>taker_buy_quote_asset_volume</b>: จำนวนเหรียญคู่รวมที่มีการรีบซื้อในทันทีในช่วงเวลา t

---

10. <b>timestamp</b>: เวลาเริ่มต้นของการซื้อขาย
11. <b>close_time</b>: เวลาสิ้นสุดของการซื้อขาย

---

In [15]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   timestamp                     1000 non-null   datetime64[ns]
 1   open                          1000 non-null   float64       
 2   high                          1000 non-null   float64       
 3   low                           1000 non-null   float64       
 4   close                         1000 non-null   float64       
 5   volume                        1000 non-null   float64       
 6   close_time                    1000 non-null   datetime64[ns]
 7   quote_asset_volume            1000 non-null   float64       
 8   number_of_trades              1000 non-null   int64         
 9   taker_buy_base_asset_volume   1000 non-null   object        
 10  taker_buy_quote_asset_volume  1000 non-null   object        
dtypes: datetime64[ns](2), float64(6

In [16]:
train_df["taker_buy_base_asset_volume"] = train_df["taker_buy_base_asset_volume"].astype(float)
train_df["taker_buy_quote_asset_volume"] = train_df["taker_buy_quote_asset_volume"].astype(float)

train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   timestamp                     1000 non-null   datetime64[ns]
 1   open                          1000 non-null   float64       
 2   high                          1000 non-null   float64       
 3   low                           1000 non-null   float64       
 4   close                         1000 non-null   float64       
 5   volume                        1000 non-null   float64       
 6   close_time                    1000 non-null   datetime64[ns]
 7   quote_asset_volume            1000 non-null   float64       
 8   number_of_trades              1000 non-null   int64         
 9   taker_buy_base_asset_volume   1000 non-null   float64       
 10  taker_buy_quote_asset_volume  1000 non-null   float64       
dtypes: datetime64[ns](2), float64(8

In [17]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 152 entries, 136 to 287
Data columns (total 11 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   timestamp                     152 non-null    datetime64[ns]
 1   open                          152 non-null    float64       
 2   high                          152 non-null    float64       
 3   low                           152 non-null    float64       
 4   close                         152 non-null    float64       
 5   volume                        152 non-null    float64       
 6   close_time                    152 non-null    datetime64[ns]
 7   quote_asset_volume            152 non-null    float64       
 8   number_of_trades              152 non-null    int64         
 9   taker_buy_base_asset_volume   152 non-null    object        
 10  taker_buy_quote_asset_volume  152 non-null    object        
dtypes: datetime64[ns](2), float64(6), i

In [18]:
test_df["taker_buy_base_asset_volume"] = test_df["taker_buy_base_asset_volume"].astype(float)
test_df["taker_buy_quote_asset_volume"] = test_df["taker_buy_quote_asset_volume"].astype(float)

test_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 152 entries, 136 to 287
Data columns (total 11 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   timestamp                     152 non-null    datetime64[ns]
 1   open                          152 non-null    float64       
 2   high                          152 non-null    float64       
 3   low                           152 non-null    float64       
 4   close                         152 non-null    float64       
 5   volume                        152 non-null    float64       
 6   close_time                    152 non-null    datetime64[ns]
 7   quote_asset_volume            152 non-null    float64       
 8   number_of_trades              152 non-null    int64         
 9   taker_buy_base_asset_volume   152 non-null    float64       
 10  taker_buy_quote_asset_volume  152 non-null    float64       
dtypes: datetime64[ns](2), float64(8), i

# 3. Create Indicators

## 3.1. Moving Average

คำนวณค่าเฉลี่ยแบบเคลื่อนที่ทุกๆ n จุด แล้วดูแนวโน้มค่าเฉลี่ยเหล่านั้น

- Bullish: มีแนวโน้มว่าในอนาคต ราคาสูงขึ้น -> 1 ETH มีแนวโน้มจะได้ BTC มากขึ้น
    - ถ้าเรามี ETH อยู่ เราควรถือไว้ หรือซื้อ ETH เพิ่มเติม
    - ถ้าเรามี BTC อยู่ เราควรขายเพื่อซื้อ ETH
     
- Bearish: มีแนวโน้มว่าในอนาคต ราคาลดลง -> 1 ETH มีแนวโน้มจะได้ BTC น้อยลง
    - ถ้าเรามี ETH อยู่ เราควรขายเพื่อซื้อ BTC
    - ถ้าเรามี BTC อยู่ เราควรถือไว้ หรือซื้อ BTC เพิ่มเติม

In [19]:
def add_moving_average(df: pd.DataFrame):
    df = df.copy()
    
    price_col = "close"
    
    # SMA
    for period in [20, 50, 200]:
        df[f'SMA_{period}'] = df[price_col].rolling(period, min_periods=1).mean()
    
    # EMA   
    for period in [12, 26]:
        df[f'EMA_{period}'] = df[price_col].ewm(span=period, adjust=False).mean()
    
    # Golden Cross: 50-day SMA crosses above 200-day SMA (Long-term Bullish)
    df['golden_cross'] = ((df['SMA_50'] > df['SMA_200']) & 
                         (df['SMA_50'].shift(1) <= df['SMA_200'].shift(1))).astype(int)
    
    # Death Cross: 50-day SMA crosses below 200-day SMA (Long-term Bearish)
    df['death_cross'] = ((df['SMA_50'] < df['SMA_200']) & 
                        (df['SMA_50'].shift(1) >= df['SMA_200'].shift(1))).astype(int)
    
    # Bullish Cross: 20-day SMA crosses above 50-day SMA (Short-term Bullish)
    df['bullish_cross'] = ((df['SMA_20'] > df['SMA_50']) & 
                          (df['SMA_20'].shift(1) <= df['SMA_50'].shift(1))).astype(int)
    
    # Bearish Cross: 20-day SMA crosses below 50-day SMA (Short-term Bearish)
    df['bearish_cross'] = ((df['SMA_20'] < df['SMA_50']) & 
                          (df['SMA_20'].shift(1) >= df['SMA_50'].shift(1))).astype(int)
    
    # EMA Bullish Cross: 12-day EMA crosses above 26-day EMA (Momentum turning up)
    df['ema_bullish_cross'] = ((df['EMA_12'] > df['EMA_26']) & 
                              (df['EMA_12'].shift(1) <= df['EMA_26'].shift(1))).astype(int)
    
    # EMA Bearish Cross: 12-day EMA crosses below 26-day EMA (Momentum turning down)
    df['ema_bearish_cross'] = ((df['EMA_12'] < df['EMA_26']) & 
                              (df['EMA_12'].shift(1) >= df['EMA_26'].shift(1))).astype(int)
    
    # 0 = Very Bearish, 5 = Very Bullish
    df['trend_strength'] = ((df[price_col] > df['SMA_20']).astype(int) + 
                           (df[price_col] > df['SMA_50']).astype(int) + 
                           (df[price_col] > df['SMA_200']).astype(int) +
                           (df[price_col] > df['EMA_12']).astype(int) +
                           (df[price_col] > df['EMA_26']).astype(int))
    
    # Price Distance from MAs
    # Positive = Above MA, Negative = Below MA
    df['price_sma20_dist'] = ((df[price_col] - df['SMA_20']) / df['SMA_20']).fillna(0)
    df['price_sma50_dist'] = ((df[price_col] - df['SMA_50']) / df['SMA_50']).fillna(0)
    df['price_sma200_dist'] = ((df[price_col] - df['SMA_200']) / df['SMA_200']).fillna(0)
    
    # Price Distance from EMAs
    # Positive = Above MA, Negative = Below MA
    df['price_ema12_dist'] = ((df[price_col] - df['EMA_12']) / df['EMA_12']).fillna(0)
    df['price_ema26_dist'] = ((df[price_col] - df['EMA_26']) / df['EMA_26']).fillna(0)
    
    # Bullish Alignment: SMA_20 > SMA_50 > SMA_200
    df['bullish_alignment'] = ((df['SMA_20'] > df['SMA_50']) & 
                              (df['SMA_50'] > df['SMA_200'])).astype(int)
    
    return df

## 3.2. Relative Strength Index (RSI)

https://www.investopedia.com/terms/r/rsi.asp
- บ่งบอกความแรงของราคา (Momentum) ในช่วงเวลาที่กำหนด ซึ่งมักนิยมใช้ 14 วัน
- มีค่าอยู่ในช่วงระหว่าง 0 ถึง 100

พิจารณา ETHBTC
- RSI สูง (>70) = คนซื้อ ETH ด้วย BTC เยอะมาก 
    - ETH อาจแพงเกินไปที่จะซื้อตอนนี้ 
    - ตอนนี้เราควรขาย ETH และซื้อ BTC
- RSI ต่ำ (<30) = คนขาย ETH เพื่อซื้อ BTC เยอะมาก 
    - ETH อาจถูกเกินไปที่จะขายตอนนี้ 
    - ตอนนี้เราควรซื้อ ETH และขาย BTC
- RSI กลางๆ (~50) = การซื้อขาย ETH/BTC ปกติดี  
    - ตอนนี้ควรรอดูสถานการณ์ก่อน

In [20]:
def add_rsi(df:pd.DataFrame, period=14):
    df = df.copy()
    
    price_col='close'
    
    delta = df[price_col].diff()
    
    gains = delta.where(delta > 0, 0)
    losses = -delta.where(delta < 0, 0)
    
    avg_gains = gains.ewm(alpha=1/period, adjust=False).mean()
    avg_losses = losses.ewm(alpha=1/period, adjust=False).mean()
    
    # Calculate RSI
    rs = avg_gains / avg_losses
    rsi = 100 - (100 / (1 + rs))

    df['rsi'] = rsi
    
    return df

## 3.3. MACD

- ใช้ดูแนวโน้ม (trend) และโมเมนตัม (momentum) ของราคา 
- ดูจากความแตกต่างของค่าเฉลี่ยเคลื่อนที่แบบ EMA สองเส้น (fast: EMA12 ลบกับ slow: EMA26)

พิจารณา ETHBTC
- MACD > 0 แสดงว่าราคากำลังขึ้น หรือก็คือมีแนวโน้มว่าในอนาคต BTC จะมีราคาสูงขึ้น
    - ถ้าเราถือ ETH เราควรถือไว้ รอขายในอนาคต
    - ถ้าเราถือ BTC เราควรขายเพื่อซื้อ ETH
- MACD < 0 แสดงว่าราคากำลังลง หรือก็คือมีแนวโน้มว่าในอนาคต BTC จะมีราคาต่ำขึ้น
    - ถ้าเราถือ ETH เราควรขายเพื่อซื้อ BTC
    - ถ้าเราถือ BTC เราควรถือไว้ รอขายในอนาคต

** ยิ่งค่าห่างจาก 0 ยิ่งมีแนวโน้มที่จะไปทางนั้นๆ สูง

- จังหวะที่ควรซื้อ หรือขาย คือจังหวะที่เส้นของ MACD ตัดกับเส้น MACD_Signal
    - MACD ตัดแล้วขึ้นสูงกว่า MACD_Signal: เป็นช่วงราคากำลังขึ้น
    - MACD ตัดแล้วต่ำกว่า MACD_Signal: เป็นช่วงราคากำลังลง

In [21]:
def add_macd(df, fast=12, slow=26, signal=9):
    # Calculate EMA fast and slow
    df['ema_fast'] = df['close'].ewm(span=fast, adjust=False).mean()
    df['ema_slow'] = df['close'].ewm(span=slow, adjust=False).mean()
    
    # MACD line
    df['macd'] = df['ema_fast'] - df['ema_slow']
    
    # Signal line
    df['macd_signal'] = df['macd'].ewm(span=signal, adjust=False).mean()
    
    return df

## 3.4. Bollinger Bands

- ประกอบด้วย 3 เส้นหลัก:

1. เส้นกลาง (Middle Band) : SMA ของ close หมายถึงแนวโน้มราคากลาง ๆ ในช่วงเวลาที่กำหนด

2. เส้นบน (Upper Band): SMA + 2SD บ่งบอกขอบเขตราคาที่ "สูงกว่าปกติ" หรือเป็นระดับแนวต้าน

3. เส้นล่าง (Lower Band): SMA - 2SD บ่งบอกขอบเขตราคาที่ "ต่ำกว่าปกติ" หรือเป็นระดับแนวรับ

พิจารณา ETHBTC
- มี 4 Case ที่เกิดขึ้นได้
    1. Close Price ชิด Upper Band: ราคาสูงเกินไปแล้ว อาจมีโอกาสราคาตกลงในเร็ว ๆ นี้
        - ถ้าเราถือ ETH ควรขายเพื่อซื้อ BTC
        - ถ้าเราถือ BTC ควรถือไว้ รอขายในอนาคต
    2. Close Price ชิด Lower Band: ราคาต่ำเกินไปแล้ว อาจมีโอกาสราคาขึ้นในเร็ว ๆ นี้
        - ถ้าเราถือ ETH ควรถือไว้ รอขายในอนาคต
        - ถ้าเราถือ BTC ควรขายเพื่อซื้อ ETH
    3. Upper Band กับ Lower Band เข้ามาชิดกัน: ความผันผวนต่ำ เตรียมเคลื่อนไหว
        - ถ้า Close Price สูงกว่า Upper Band อาจเป็นสัญญาณซื้อ ETH ขาย BTC
        - ถ้า Close Price ต่ำกว่า Lower Band อาจเป็นสัญญาณขาย ETH ซื้อ BTC
    4. Upper Band กับ Lower Band ห่างออกจากกัน: ความผันผวนสูง
        - ถ้า Close Price สูงขึ้น และอยู่ใกล้ Upper Band ควรถือ ETH ต่อเนื่อง หรือซื้อเพิ่ม
        - ถ้า Close Price ลดลง และอยู่ใกล้ Lower Band → ควรขาย ETH ซื้อ BTC

In [22]:
def add_bollinger(df, period=20, std_dev=2):
    df['bb_middle'] = df['close'].rolling(window=period).mean()
    df['bb_std'] = df['close'].rolling(window=period).std()
    
    df['bb_upper'] = df['bb_middle'] + std_dev * df['bb_std']
    df['bb_lower'] = df['bb_middle'] - std_dev * df['bb_std']

    return df

In [23]:
def add_all_indicators(df):
    df = df.copy() 
    df = add_moving_average(df)
    df = add_rsi(df)
    df = add_macd(df)
    df = add_bollinger(df)
    return df

# 4. Create Model

## EDA

In [24]:
ind_train_df = add_all_indicators(train_df)
ind_train_df.set_index("timestamp", inplace=True)

ind_test_df = add_all_indicators(test_df)
ind_test_df.set_index("timestamp", inplace=True)

In [25]:
ind_train_df

Unnamed: 0_level_0,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,...,bullish_alignment,rsi,ema_fast,ema_slow,macd,macd_signal,bb_middle,bb_std,bb_upper,bb_lower
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-07-11 10:50:00,0.02547,0.02547,0.02545,0.02545,12.0021,2025-07-11 10:54:59.999,0.305582,66,6.0698,0.154549,...,0,,0.025450,0.025450,0.000000e+00,0.000000e+00,,,,
2025-07-11 10:55:00,0.02544,0.02546,0.02540,0.02546,70.9895,2025-07-11 10:59:59.999,1.804568,216,46.6160,1.184970,...,0,100.000000,0.025452,0.025451,7.977208e-07,1.595442e-07,,,,
2025-07-11 11:00:00,0.02546,0.02546,0.02542,0.02543,117.8030,2025-07-11 11:04:59.999,2.995644,147,17.0268,0.433090,...,0,23.636364,0.025448,0.025449,-9.795375e-07,-6.827217e-08,,,,
2025-07-11 11:05:00,0.02544,0.02544,0.02540,0.02541,44.0224,2025-07-11 11:09:59.999,1.119159,214,13.7060,0.348416,...,0,15.266486,0.025442,0.025446,-3.956255e-06,-8.458688e-07,,,,
2025-07-11 11:10:00,0.02541,0.02543,0.02537,0.02539,59.4120,2025-07-11 11:14:59.999,1.508781,346,31.9429,0.811351,...,0,11.051864,0.025434,0.025442,-7.838797e-06,-2.244454e-06,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-14 21:45:00,0.02509,0.02510,0.02506,0.02508,13.4166,2025-07-14 21:49:59.999,0.336455,111,0.2226,0.005584,...,1,60.946723,0.025060,0.025044,1.622803e-05,8.616811e-06,0.025035,0.000034,0.025104,0.024967
2025-07-14 21:50:00,0.02508,0.02508,0.02506,0.02507,4.3710,2025-07-14 21:54:59.999,0.109575,85,0.1134,0.002844,...,1,57.476935,0.025062,0.025046,1.579645e-05,1.005274e-05,0.025040,0.000032,0.025105,0.024975
2025-07-14 21:55:00,0.02507,0.02508,0.02506,0.02507,12.9144,2025-07-14 21:59:59.999,0.323750,83,0.6171,0.015475,...,1,57.476935,0.025063,0.025048,1.527830e-05,1.109785e-05,0.025043,0.000032,0.025108,0.024978
2025-07-14 22:00:00,0.02508,0.02508,0.02505,0.02505,15.6178,2025-07-14 22:04:59.999,0.391309,80,0.6396,0.016034,...,1,50.772252,0.025061,0.025048,1.310279e-05,1.149884e-05,0.025045,0.000031,0.025107,0.024984


In [26]:
ind_test_df

Unnamed: 0_level_0,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,...,bullish_alignment,rsi,ema_fast,ema_slow,macd,macd_signal,bb_middle,bb_std,bb_upper,bb_lower
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-07-14 22:10:00,0.02505,0.02506,0.02502,0.02503,36.2219,2025-07-14 22:14:59.999,0.907190,139,3.9978,0.100144,...,0,,0.025030,0.025030,0.000000e+00,0.000000e+00,,,,
2025-07-14 22:15:00,0.02504,0.02504,0.02501,0.02502,14.7091,2025-07-14 22:19:59.999,0.367993,129,2.2767,0.056963,...,0,0.000000,0.025028,0.025029,-7.977208e-07,-1.595442e-07,,,,
2025-07-14 22:20:00,0.02503,0.02506,0.02503,0.02506,47.2625,2025-07-14 22:24:59.999,1.184301,42,47.2047,1.182854,...,0,81.159420,0.025033,0.025032,1.777258e-06,2.278163e-07,,,,
2025-07-14 22:25:00,0.02506,0.02506,0.02505,0.02505,20.3876,2025-07-14 22:29:59.999,0.510880,62,18.6496,0.467342,...,0,66.605672,0.025036,0.025033,2.976718e-06,7.775966e-07,,,,
2025-07-14 22:30:00,0.02505,0.02510,0.02505,0.02510,19.5405,2025-07-14 22:34:59.999,0.490008,147,18.1954,0.456283,...,0,83.010491,0.025046,0.025038,7.871146e-06,2.196307e-06,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-15 10:25:00,0.02545,0.02545,0.02544,0.02544,11.3730,2025-07-15 10:29:59.999,0.289336,139,6.8851,0.175164,...,0,43.211855,0.025469,0.025474,-4.432925e-06,2.106381e-07,0.025478,0.000020,0.025517,0.025439
2025-07-15 10:30:00,0.02544,0.02544,0.02537,0.02537,56.7119,2025-07-15 10:34:59.999,1.440211,461,9.2341,0.234488,...,0,33.673791,0.025454,0.025466,-1.203363e-05,-2.238215e-06,0.025472,0.000031,0.025535,0.025410
2025-07-15 10:35:00,0.02538,0.02540,0.02537,0.02537,17.9712,2025-07-15 10:39:59.999,0.456079,196,4.1814,0.106128,...,0,33.673791,0.025441,0.025459,-1.785146e-05,-5.360865e-06,0.025467,0.000039,0.025545,0.025390
2025-07-15 10:40:00,0.02537,0.02538,0.02535,0.02536,46.6813,2025-07-15 10:44:59.999,1.183809,307,4.8514,0.123075,...,0,32.485776,0.025429,0.025452,-2.300388e-05,-8.889467e-06,0.025460,0.000045,0.025550,0.025371


In [27]:
ind_train_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1000 entries, 2025-07-11 10:50:00 to 2025-07-14 22:05:00
Data columns (total 37 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   open                          1000 non-null   float64       
 1   high                          1000 non-null   float64       
 2   low                           1000 non-null   float64       
 3   close                         1000 non-null   float64       
 4   volume                        1000 non-null   float64       
 5   close_time                    1000 non-null   datetime64[ns]
 6   quote_asset_volume            1000 non-null   float64       
 7   number_of_trades              1000 non-null   int64         
 8   taker_buy_base_asset_volume   1000 non-null   float64       
 9   taker_buy_quote_asset_volume  1000 non-null   float64       
 10  SMA_20                        1000 non-null   float64       

In [28]:
ind_train_df.isna().sum()

open                             0
high                             0
low                              0
close                            0
volume                           0
close_time                       0
quote_asset_volume               0
number_of_trades                 0
taker_buy_base_asset_volume      0
taker_buy_quote_asset_volume     0
SMA_20                           0
SMA_50                           0
SMA_200                          0
EMA_12                           0
EMA_26                           0
golden_cross                     0
death_cross                      0
bullish_cross                    0
bearish_cross                    0
ema_bullish_cross                0
ema_bearish_cross                0
trend_strength                   0
price_sma20_dist                 0
price_sma50_dist                 0
price_sma200_dist                0
price_ema12_dist                 0
price_ema26_dist                 0
bullish_alignment                0
rsi                 

In [29]:
print("bb_middle\n", ind_train_df["bb_middle"])
print("bb_std\n", ind_train_df["bb_std"])
print("bb_upper\n", ind_train_df["bb_upper"])
print("bb_lower\n", ind_train_df["bb_lower"])
print("rsi\n", ind_train_df["rsi"])

bb_middle
 timestamp
2025-07-11 10:50:00         NaN
2025-07-11 10:55:00         NaN
2025-07-11 11:00:00         NaN
2025-07-11 11:05:00         NaN
2025-07-11 11:10:00         NaN
                         ...   
2025-07-14 21:45:00    0.025035
2025-07-14 21:50:00    0.025040
2025-07-14 21:55:00    0.025043
2025-07-14 22:00:00    0.025045
2025-07-14 22:05:00    0.025047
Name: bb_middle, Length: 1000, dtype: float64
bb_std
 timestamp
2025-07-11 10:50:00         NaN
2025-07-11 10:55:00         NaN
2025-07-11 11:00:00         NaN
2025-07-11 11:05:00         NaN
2025-07-11 11:10:00         NaN
                         ...   
2025-07-14 21:45:00    0.000034
2025-07-14 21:50:00    0.000032
2025-07-14 21:55:00    0.000032
2025-07-14 22:00:00    0.000031
2025-07-14 22:05:00    0.000030
Name: bb_std, Length: 1000, dtype: float64
bb_upper
 timestamp
2025-07-11 10:50:00         NaN
2025-07-11 10:55:00         NaN
2025-07-11 11:00:00         NaN
2025-07-11 11:05:00         NaN
2025-07-11 11:10:00 

In [30]:
ind_train_df.dropna(subset=['bb_lower', 'bb_upper', 'bb_middle', 'bb_std', 'rsi'], inplace=True)

In [31]:
ind_train_df.isna().sum()

open                            0
high                            0
low                             0
close                           0
volume                          0
close_time                      0
quote_asset_volume              0
number_of_trades                0
taker_buy_base_asset_volume     0
taker_buy_quote_asset_volume    0
SMA_20                          0
SMA_50                          0
SMA_200                         0
EMA_12                          0
EMA_26                          0
golden_cross                    0
death_cross                     0
bullish_cross                   0
bearish_cross                   0
ema_bullish_cross               0
ema_bearish_cross               0
trend_strength                  0
price_sma20_dist                0
price_sma50_dist                0
price_sma200_dist               0
price_ema12_dist                0
price_ema26_dist                0
bullish_alignment               0
rsi                             0
ema_fast      

In [32]:
ind_train_df['price_direction'] = np.where(
    ind_train_df['close'].shift(-1) < ind_train_df['close'], -1, 1
)
ind_train_df['price_direction'].value_counts(normalize=True)

price_direction
 1    0.605505
-1    0.394495
Name: proportion, dtype: float64

- -1 คือ ราคาปิดในอนาคต ***น้อยกว่า*** ราคาปิดปัจจุบัน -> ในตอนนี้ควรขาย
- 1 คือ ราคาปิดในอนาคต ***มากกว่าหรือเท่ากับ*** ราคาปิดปัจจุบัน -> ในตอนนี้ควรซื้อ

In [33]:
ind_test_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 152 entries, 2025-07-14 22:10:00 to 2025-07-15 10:45:00
Data columns (total 37 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   open                          152 non-null    float64       
 1   high                          152 non-null    float64       
 2   low                           152 non-null    float64       
 3   close                         152 non-null    float64       
 4   volume                        152 non-null    float64       
 5   close_time                    152 non-null    datetime64[ns]
 6   quote_asset_volume            152 non-null    float64       
 7   number_of_trades              152 non-null    int64         
 8   taker_buy_base_asset_volume   152 non-null    float64       
 9   taker_buy_quote_asset_volume  152 non-null    float64       
 10  SMA_20                        152 non-null    float64       


In [34]:
ind_test_df.isna().sum()

open                             0
high                             0
low                              0
close                            0
volume                           0
close_time                       0
quote_asset_volume               0
number_of_trades                 0
taker_buy_base_asset_volume      0
taker_buy_quote_asset_volume     0
SMA_20                           0
SMA_50                           0
SMA_200                          0
EMA_12                           0
EMA_26                           0
golden_cross                     0
death_cross                      0
bullish_cross                    0
bearish_cross                    0
ema_bullish_cross                0
ema_bearish_cross                0
trend_strength                   0
price_sma20_dist                 0
price_sma50_dist                 0
price_sma200_dist                0
price_ema12_dist                 0
price_ema26_dist                 0
bullish_alignment                0
rsi                 

In [35]:
print("bb_middle\n", ind_test_df["bb_middle"])
print("bb_std\n", ind_test_df["bb_std"])
print("bb_upper\n", ind_test_df["bb_upper"])
print("bb_lower\n", ind_test_df["bb_lower"])
print("rsi\n", ind_test_df["rsi"])

bb_middle
 timestamp
2025-07-14 22:10:00         NaN
2025-07-14 22:15:00         NaN
2025-07-14 22:20:00         NaN
2025-07-14 22:25:00         NaN
2025-07-14 22:30:00         NaN
                         ...   
2025-07-15 10:25:00    0.025478
2025-07-15 10:30:00    0.025472
2025-07-15 10:35:00    0.025467
2025-07-15 10:40:00    0.025460
2025-07-15 10:45:00    0.025453
Name: bb_middle, Length: 152, dtype: float64
bb_std
 timestamp
2025-07-14 22:10:00         NaN
2025-07-14 22:15:00         NaN
2025-07-14 22:20:00         NaN
2025-07-14 22:25:00         NaN
2025-07-14 22:30:00         NaN
                         ...   
2025-07-15 10:25:00    0.000020
2025-07-15 10:30:00    0.000031
2025-07-15 10:35:00    0.000039
2025-07-15 10:40:00    0.000045
2025-07-15 10:45:00    0.000050
Name: bb_std, Length: 152, dtype: float64
bb_upper
 timestamp
2025-07-14 22:10:00         NaN
2025-07-14 22:15:00         NaN
2025-07-14 22:20:00         NaN
2025-07-14 22:25:00         NaN
2025-07-14 22:30:00   

In [36]:
ind_test_df.dropna(subset=['bb_lower', 'bb_upper', 'bb_middle', 'bb_std', 'rsi'], inplace=True)

In [37]:
ind_test_df.isna().sum()

open                            0
high                            0
low                             0
close                           0
volume                          0
close_time                      0
quote_asset_volume              0
number_of_trades                0
taker_buy_base_asset_volume     0
taker_buy_quote_asset_volume    0
SMA_20                          0
SMA_50                          0
SMA_200                         0
EMA_12                          0
EMA_26                          0
golden_cross                    0
death_cross                     0
bullish_cross                   0
bearish_cross                   0
ema_bullish_cross               0
ema_bearish_cross               0
trend_strength                  0
price_sma20_dist                0
price_sma50_dist                0
price_sma200_dist               0
price_ema12_dist                0
price_ema26_dist                0
bullish_alignment               0
rsi                             0
ema_fast      

In [38]:
ind_test_df['price_direction'] = np.where(
    ind_test_df['close'].shift(-1) < ind_test_df['close'], -1, 1
)
ind_test_df['price_direction'].value_counts(normalize=True)

price_direction
 1    0.639098
-1    0.360902
Name: proportion, dtype: float64

- -1 คือ ราคาปิดในอนาคต ***น้อยกว่า*** ราคาปิดปัจจุบัน -> ในตอนนี้ควรขาย
- 1 คือ ราคาปิดในอนาคต ***มากกว่าหรือเท่ากับ*** ราคาปิดปัจจุบัน -> ในตอนนี้ควรซื้อ

## Preparing to Train Model

In [39]:
ind_train_df.columns

Index(['open', 'high', 'low', 'close', 'volume', 'close_time',
       'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume',
       'taker_buy_quote_asset_volume', 'SMA_20', 'SMA_50', 'SMA_200', 'EMA_12',
       'EMA_26', 'golden_cross', 'death_cross', 'bullish_cross',
       'bearish_cross', 'ema_bullish_cross', 'ema_bearish_cross',
       'trend_strength', 'price_sma20_dist', 'price_sma50_dist',
       'price_sma200_dist', 'price_ema12_dist', 'price_ema26_dist',
       'bullish_alignment', 'rsi', 'ema_fast', 'ema_slow', 'macd',
       'macd_signal', 'bb_middle', 'bb_std', 'bb_upper', 'bb_lower',
       'price_direction'],
      dtype='object')

In [40]:
ind_train_df['price_direction'].value_counts(normalize=True)

price_direction
 1    0.605505
-1    0.394495
Name: proportion, dtype: float64

In [41]:
random_state = 2025

In [42]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE

features = [
    'SMA_20', 'SMA_50', 'SMA_200',
    'EMA_12', 'EMA_26',
    'rsi', 
    'macd', 'macd_signal',
    'bb_upper', 'bb_middle', 'bb_lower',
    'trend_strength', 'price_sma20_dist', 'price_sma50_dist',
    'price_sma200_dist',
    'price_ema12_dist', 'price_ema26_dist', 'bullish_alignment'
]

target = 'price_direction'  

In [43]:
def create_empty_evaluation(model, features, target):
    return {
        "model": model,
        "features": features,
        "target": target,
        "accuracy": 0.0,
        "precision_sell": 0.0,
        "precision_buy": 0.0,
        "recall_sell": 0.0,
        "recall_buy": 0.0,
        "f1_sell": 0.0,
        "f1_buy": 0.0,
        "roc_auc": 0.5
    }

In [44]:
def model_evaluation(model, features, target, X_test, y_test):
    try:
        if len(X_test) == 0:
            return create_empty_evaluation(model, features, target)
        
        y_pred = model.predict(X_test)
        
        if set(np.unique(y_test)).issubset({-1, 1}) and set(np.unique(y_pred)).issubset({0, 1}):
            y_pred = np.where(y_pred == 0, -1, 1)
            
        if set(np.unique(y_test)).issubset({0, 1}):
            y_pred = np.where(y_pred == 0, -1, 1)
            y_test = np.where(y_test == 0, -1, 1)
        
        if len(np.unique(y_pred)) == 1:
            return create_empty_evaluation(model, features, target)
            
        accuracy = accuracy_score(y_test, y_pred)
        
        precision_all = precision_score(y_test, y_pred, average=None, labels=[-1, 1], zero_division=0)
        recall_all = recall_score(y_test, y_pred, average=None, labels=[-1, 1], zero_division=0)
        f1_all = f1_score(y_test, y_pred, average=None, labels=[-1, 1], zero_division=0)
        
        precision_sell = precision_all[0] if len(precision_all) > 0 else 0
        precision_buy = precision_all[1] if len(precision_all) > 1 else 0
        recall_sell = recall_all[0] if len(recall_all) > 0 else 0
        recall_buy = recall_all[1] if len(recall_all) > 1 else 0
        f1_sell = f1_all[0] if len(f1_all) > 0 else 0
        f1_buy = f1_all[1] if len(f1_all) > 1 else 0
        
        try:
            if hasattr(model, "predict_proba"):
                y_score = model.predict_proba(X_test)
                if y_score.shape[1] == 2:
                    y_score = y_score[:, 1]
                else:
                    y_score = y_score.ravel()
            else:
                y_score = model.decision_function(X_test)
            
            if len(np.unique(y_test)) == 2:
                roc_auc = roc_auc_score(y_test, y_score)
            else:
                roc_auc = 0.5
        except Exception as e:
            roc_auc = 0.5
        
        return {
            "model": model,
            "features": features,
            "target": target,
            "accuracy": accuracy,
            "precision_sell": precision_sell,
            "precision_buy": precision_buy,  
            "recall_sell": recall_sell,
            "recall_buy": recall_buy,
            "f1_sell": f1_sell,
            "f1_buy": f1_buy,
            "roc_auc": roc_auc
        }
        
    except Exception as e:
        return create_empty_evaluation(model, features, target)


In [45]:
def show_model_results(lr_evaluation, rf_evaluation, xgb_evaluation, knn_evaluation, svm_evaluation): 
    data = {
        "model": ["LR", "RF", "XGB", "KNN", "SVM"],
        "accuracy": [lr_evaluation["accuracy"], rf_evaluation["accuracy"], 
                    xgb_evaluation["accuracy"], knn_evaluation["accuracy"], 
                    svm_evaluation["accuracy"]],
        "precision_sell": [lr_evaluation["precision_sell"], rf_evaluation["precision_sell"],
                            xgb_evaluation["precision_sell"], knn_evaluation["precision_sell"], 
                            svm_evaluation["precision_sell"]],
        "precision_buy": [lr_evaluation["precision_buy"], rf_evaluation["precision_buy"],
                            xgb_evaluation["precision_buy"], knn_evaluation["precision_buy"],
                            svm_evaluation["precision_buy"]],
        "recall_sell": [lr_evaluation["recall_sell"], rf_evaluation["recall_sell"],
                            xgb_evaluation["recall_sell"], knn_evaluation["recall_sell"],
                            svm_evaluation["recall_sell"]],
        "recall_buy": [lr_evaluation["recall_buy"], rf_evaluation["recall_buy"],
                            xgb_evaluation["recall_buy"], knn_evaluation["recall_buy"],
                            svm_evaluation["recall_buy"]],
        "f1_sell": [lr_evaluation["f1_sell"], rf_evaluation["f1_sell"],
                        xgb_evaluation["f1_sell"], knn_evaluation["f1_sell"],
                        svm_evaluation["f1_sell"]],
        "f1_buy": [lr_evaluation["f1_buy"], rf_evaluation["f1_buy"],
                        xgb_evaluation["f1_buy"], knn_evaluation["f1_buy"],
                        svm_evaluation["f1_buy"]],
        "roc_auc": [lr_evaluation["roc_auc"], rf_evaluation["roc_auc"],
                    xgb_evaluation["roc_auc"], knn_evaluation["roc_auc"],
                    svm_evaluation["roc_auc"]]
    }
    
    results_df = pd.DataFrame(data)
    results_df.set_index("model", inplace=True)
    return results_df, {"LR": lr_evaluation["features"], "RF": rf_evaluation["features"],
                        "XGB": xgb_evaluation["features"], "KNN": knn_evaluation["features"],
                        "SVM": svm_evaluation["features"]}

In [46]:
def custom_train_test_split(X, y, test_size=0.2):    
    min_test_size = max(1, int(len(X) * 0.1))
    actual_test_size = min(test_size, min_test_size / len(X))
    
    return train_test_split(X, y, test_size=actual_test_size, shuffle=False, random_state=random_state)

## Model Training V1

### Logistic Regression

In [47]:
def lr_model_v1(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)
    model = LogisticRegression(random_state=random_state)
    model.fit(X_train, y_train)

    # 3. Scoring
    lr_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return lr_evaluation, model

### Random Forest Classification

In [48]:
def rf_model_v1(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)
    model = RandomForestClassifier(random_state=random_state)
    model.fit(X_train, y_train)

    # 3. Scoring
    rf_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return rf_evaluation, model

### XGBoost

In [49]:
def xgb_model_v1(df):
    X = df[features]
    y = df[target].map({-1: 0, 1: 1})
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    model = XGBClassifier(random_state=random_state)
    model.fit(X_train, y_train)

    # 3. Scoring
    xgb_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return xgb_evaluation, model

### K-Nearest Neighbors

In [50]:
def knn_model_v1(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)
    model = KNeighborsClassifier()
    model.fit(X_train, y_train)

    # 3. Scoring
    knn_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return knn_evaluation, model

### Support Vector Machine

In [51]:
def svm_model_v1(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)
    model = SVC(random_state=random_state)
    model.fit(X_train, y_train)

    # 3. Scoring
    svm_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return svm_evaluation, model

## Model Summary V1

In [52]:
lr_evaluation, lr_model = lr_model_v1(ind_train_df)
rf_evaluation, rf_model = rf_model_v1(ind_train_df)
xgb_evaluation, xgb_model = xgb_model_v1(ind_train_df)
knn_evaluation, knn_model = knn_model_v1(ind_train_df)
svm_evaluation, svm_model = svm_model_v1(ind_train_df)

results_df_v1, result_features_v1 = show_model_results(lr_evaluation, rf_evaluation, xgb_evaluation, 
                                                       knn_evaluation, svm_evaluation)

results_df_v1

Unnamed: 0_level_0,accuracy,precision_sell,precision_buy,recall_sell,recall_buy,f1_sell,f1_buy,roc_auc
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
LR,0.561224,0.571429,0.56044,0.090909,0.944444,0.156863,0.703448,0.552609
RF,0.591837,0.555556,0.612903,0.454545,0.703704,0.5,0.655172,0.547559
XGB,0.520408,0.457143,0.555556,0.363636,0.648148,0.405063,0.598291,0.494108
KNN,0.510204,0.428571,0.542857,0.272727,0.703704,0.333333,0.612903,0.455177
SVM,0.571429,0.666667,0.565217,0.090909,0.962963,0.16,0.712329,0.521044


## Model Training V2


- เพิ่ม Hyperparameter Tuning 
- เพิ่ม SMOTE

In [53]:
from collections import Counter

features = [
    'SMA_20', 'SMA_50', 'SMA_200',
    'EMA_12', 'EMA_26',
    'rsi', 
    'macd', 'macd_signal',
    'bb_upper', 'bb_middle', 'bb_lower',
    'trend_strength', 'price_sma20_dist', 'price_sma50_dist',
    'price_sma200_dist',
    'price_ema12_dist', 'price_ema26_dist', 'bullish_alignment'
]

target = 'price_direction' 

In [54]:
def custom_smote(X_train, y_train):
    try:
        counter = Counter(y_train)
        min_class_count = min(counter.values())
        
        if min_class_count <= 1:
            return X_train, y_train
            
        k_neighbors = min(5, min_class_count - 1)
        sm = SMOTE(random_state=random_state, k_neighbors=k_neighbors)
        return sm.fit_resample(X_train, y_train)
        
    except Exception as e:
        return X_train, y_train

### Logistic Regression

In [55]:
def lr_model_v2(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Hyperparameter Tuning
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'solver': ['lbfgs', 'liblinear'],
        'class_weight': ['balanced']
    }
    grid = GridSearchCV(
        LogisticRegression(max_iter=1000, random_state=random_state), 
        param_grid, scoring='accuracy'
    )
    grid.fit(X_train_res, y_train_res)
    # print("Best Params:", grid.best_params_)
    # print("Best CV Score:", grid.best_score_)

    # 5. Train model
    model = LogisticRegression(max_iter=1000, **grid.best_params_, random_state=random_state)
    model.fit(X_train_res, y_train_res)

    # 6. Scoring
    lr_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return lr_evaluation, model

### Random Forest Regression

In [56]:
def rf_model_v2(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Hyperparameter Tuning
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [4, 8, 16, None],
        'min_samples_split': [2, 5, 10],
        'class_weight': ['balanced']
    }

    grid = GridSearchCV(RandomForestClassifier(random_state=random_state), param_grid,
                        scoring='accuracy')
    grid.fit(X_train_res, y_train_res)
    # print("Best Params:", grid.best_params_)
    # print("Best CV Score:", grid.best_score_)

    # 5. Train model
    model = RandomForestClassifier(random_state=random_state, **grid.best_params_)
    model.fit(X_train_res, y_train_res)

    # 6. Scoring
    rf_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return rf_evaluation, model

### XGBoost

In [57]:
def xgb_model_v2(df):
    X = df[features]
    y = df[target].map({-1: 0, 1: 1})
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Hyperparameter Tuning
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [3, 6, 10],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 1.0],
    }

    grid = GridSearchCV(XGBClassifier(eval_metric='logloss', random_state=random_state),
        param_grid, scoring='accuracy'
    )
    grid.fit(X_train_res, y_train_res)
    # print("Best Params:", grid.best_params_)
    # print("Best CV Score:", grid.best_score_)

    # 5. Train model
    model = XGBClassifier(use_label_encoder=False, eval_metric='logloss',
                        random_state=random_state, **grid.best_params_)
    model.fit(X_train_res, y_train_res)

    # 6. Scoring
    xgb_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return xgb_evaluation, model

### K-Nearest Neighbors

In [58]:
def knn_model_v2(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Hyperparameter Tuning
    param_grid = {
        'n_neighbors': [3, 5, 7, 9],
        'weights': ['uniform', 'distance'],
        'metric': ['euclidean', 'manhattan'],
    }
    grid = GridSearchCV(KNeighborsClassifier(), param_grid, scoring='accuracy')
    grid.fit(X_train_res, y_train_res)
    # print("Best Params:", grid.best_params_)
    # print("Best CV Score:", grid.best_score_)

    # 5. Train model
    model = KNeighborsClassifier(**grid.best_params_)
    model.fit(X_train_res, y_train_res)

    # 6. Scoring
    knn_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return knn_evaluation, model

### Support Vector Machine

In [59]:
def svm_model_v2(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Hyperparameter Tuning
    param_grid = {
        'C': [1, 10, 100],
        'kernel': ['linear', 'rbf', 'poly'],
        'gamma': ['scale', 'auto'],
        'class_weight': ['balanced']
    }
    grid = GridSearchCV(SVC(), param_grid, scoring='accuracy')
    grid.fit(X_train_res, y_train_res)
    # print("Best Params:", grid.best_params_)
    # print("Best CV Score:", grid.best_score_)

    # 5. Train model
    model = SVC(**grid.best_params_, random_state=random_state)
    model.fit(X_train_res, y_train_res)

    # 6. Scoring
    svm_evaluation = model_evaluation(model, features, target, X_test, y_test)
    return svm_evaluation, model

## Model Result V2

In [60]:
lr_evaluation_v2, lr_model2 = lr_model_v2(ind_train_df)
rf_evaluation_v2, rf_model2 = rf_model_v2(ind_train_df)
xgb_evaluation_v2, xgb_model2 = xgb_model_v2(ind_train_df)
knn_evaluation_v2, knn_model2 = knn_model_v2(ind_train_df)
svm_evaluation_v2, svm_model2 = svm_model_v2(ind_train_df)

results_df_v2, result_features_v2 = show_model_results(lr_evaluation_v2, 
                                                       rf_evaluation_v2, xgb_evaluation_v2, 
                                                       knn_evaluation_v2, svm_evaluation_v2)

results_df_v2

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Unnamed: 0_level_0,accuracy,precision_sell,precision_buy,recall_sell,recall_buy,f1_sell,f1_buy,roc_auc
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
LR,0.408163,0.402778,0.423077,0.659091,0.203704,0.5,0.275,0.533249
RF,0.520408,0.457143,0.555556,0.363636,0.648148,0.405063,0.598291,0.527988
XGB,0.55102,0.5,0.573529,0.340909,0.722222,0.405405,0.639344,0.547138
KNN,0.520408,0.469388,0.571429,0.522727,0.518519,0.494624,0.543689,0.490951
SVM,0.510204,0.464286,0.571429,0.590909,0.444444,0.52,0.5,0.572391


## Model Training V3

- เพิ่มการทำ Dynamic Features Selection
- เพิ่ม Class Weight Hyperparameter

In [61]:
features = [
    'SMA_20', 'SMA_50', 'SMA_200',
    'EMA_12', 'EMA_26',
    'rsi', 
    'macd', 'macd_signal',
    'bb_upper', 'bb_middle', 'bb_lower',
    'trend_strength', 'price_sma20_dist', 'price_sma50_dist',
    'price_sma200_dist',
    'price_ema12_dist', 'price_ema26_dist', 'bullish_alignment'
]

target = 'price_direction' 

In [62]:
def get_top_features_by_coef(model, top_n=10):
    coef_abs = np.abs(model.coef_[0])
    feature_importance = pd.Series(coef_abs, index=features)
    top_feature_importance = feature_importance.sort_values(ascending=False).head(top_n)
    return top_feature_importance.index.tolist()

In [63]:
def get_top_features_by_importance(model, top_n=10):
    importances = model.feature_importances_
    feature_importance = pd.Series(importances, index=features)
    top_feature_importance = feature_importance.sort_values(ascending=False).head(top_n)
    return top_feature_importance.index.tolist()

### Logistic Regression

In [64]:
def lr_model_v3(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Fit model ด้วย features ทั้งหมด
    model_all = LogisticRegression(max_iter=1000, random_state=random_state, class_weight='balanced')
    model_all.fit(X_train_res, y_train_res)

    # 5. เลือก top N feature
    top_features = get_top_features_by_coef(model_all)

    # 6. เตรียมข้อมูลใหม่เฉพาะ top feature
    X_top = df[top_features]
    X_top_scaled = scaler.fit_transform(X_top)
    X_train_top, X_test_top, y_train_top, y_test_top = custom_train_test_split(X_top_scaled, y)

    # 7. Balance Data (SMOTE) สำหรับ Top Features
    X_train_res_top, y_train_res_top = custom_smote(X_train_top, y_train_top)

    # 8. Hyperparameter Tuning
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'solver': ['lbfgs', 'liblinear'],
        'class_weight': [{-1: 3, 1: 1}, "balanced", {-1: 1, 1: 3}]
    }
    grid = GridSearchCV(
        LogisticRegression(max_iter=1000, random_state=random_state), 
        param_grid, scoring='accuracy'
    )
    grid.fit(X_train_res_top, y_train_res_top)

    # 9. Train model final
    model = LogisticRegression(max_iter=1000, **grid.best_params_, random_state=random_state)
    model.fit(X_train_res_top, y_train_res_top)

    lr_evaluation = model_evaluation(model, top_features, target, X_test_top, y_test_top)
    
    return lr_evaluation, model

### Random Forest Classification

In [65]:
def rf_model_v3(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Fit model ด้วย features ทั้งหมด
    model_all = RandomForestClassifier(random_state=random_state, class_weight='balanced')
    model_all.fit(X_train_res, y_train_res)

    # 5. เลือก top N feature
    top_features = get_top_features_by_importance(model_all)

    # 6. เตรียมข้อมูลใหม่เฉพาะ top feature
    X_top = df[top_features]
    X_top_scaled = scaler.fit_transform(X_top)
    X_train_top, X_test_top, y_train_top, y_test_top = custom_train_test_split(X_top_scaled, y)

    # 7. Balance Data (SMOTE) สำหรับ Top Features
    X_train_res_top, y_train_res_top = custom_smote(X_train_top, y_train_top)

    # 8. Hyperparameter Tuning
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [4, 8, 16, None],
        'min_samples_split': [2, 5, 10],
        'class_weight': [{-1: 3, 1: 1}, "balanced", {-1: 1, 1: 3}]
    }
    grid = GridSearchCV(
        RandomForestClassifier(random_state=random_state),
        param_grid, scoring='accuracy'
    )
    grid.fit(X_train_res_top, y_train_res_top)

    # 9. Train model final
    model = RandomForestClassifier(random_state=random_state, **grid.best_params_)
    model.fit(X_train_res_top, y_train_res_top)
    
    rf_evaluation = model_evaluation(model, top_features, target, X_test_top, y_test_top)

    return rf_evaluation, model

### XGBoost

In [66]:
def xgb_model_v3(df):
    X = df[features]
    y = df[target].map({-1: 0, 1: 1})
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Fit model ด้วย features ทั้งหมด
    model_all = XGBClassifier(eval_metric='logloss', random_state=random_state)
    model_all.fit(X_train_res, y_train_res)

    # 5. เลือก top N feature (ถ้าเท่ากับ features ทั้งหมด ให้ลด top_n)
    top_n = 10
    top_features = get_top_features_by_importance(model_all, top_n=top_n)
    while len(top_features) == len(features) and top_n > 1:
        top_n -= 1
        top_features = get_top_features_by_importance(model_all, top_n=top_n)

    # 6. เตรียมข้อมูลใหม่เฉพาะ top feature
    X_top = df[top_features]
    scaler_top = StandardScaler()
    X_top_scaled = scaler_top.fit_transform(X_top)
    X_train_top, X_test_top, y_train_top, y_test_top = custom_train_test_split(X_top_scaled, y)

    # 7. Balance Data (SMOTE) สำหรับ Top Features
    X_train_res_top, y_train_res_top = custom_smote(X_train_top, y_train_top)

    # 8. Hyperparameter Tuning
    neg_count = sum(y_train_res_top == 0)
    pos_count = sum(y_train_res_top == 1)
    scale_pos_weight = neg_count / pos_count if pos_count > 0 else 1
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [3, 6, 10],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 1.0],
        'scale_pos_weight': [1, scale_pos_weight, scale_pos_weight * 2],
    }
    grid = GridSearchCV(
        XGBClassifier(eval_metric='logloss', random_state=random_state),
        param_grid, scoring='accuracy'
    )
    grid.fit(X_train_res_top, y_train_res_top)

    # 9. Train model final
    model = XGBClassifier(eval_metric='logloss', random_state=random_state, **grid.best_params_)
    model.fit(X_train_res_top, y_train_res_top)

    xgb_evaluation = model_evaluation(model, top_features, target, X_test_top, y_test_top)

    return xgb_evaluation, model

### K-Nearest Neighbors

In [67]:
from sklearn.feature_selection import SelectKBest, f_classif

def knn_model_v3(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Fit model ด้วย features ทั้งหมด
    top_n = 10
    selector = SelectKBest(score_func=f_classif, k=min(top_n, X_train_res.shape[1]))
    selector.fit(X_train_res, y_train_res)
    mask = selector.get_support()
    top_features = [feature for feature, selected in zip(features, mask) if selected]

    # 6. เตรียมข้อมูลใหม่เฉพาะ top feature
    X_top = df[top_features]
    X_top_scaled = scaler.fit_transform(X_top)
    X_train_top, X_test_top, y_train_top, y_test_top = custom_train_test_split(X_top_scaled, y)

    # 7. Balance Data (SMOTE) สำหรับ Top Features
    X_train_res_top, y_train_res_top = custom_smote(X_train_top, y_train_top)

    # 8. Hyperparameter Tuning
    param_grid = {
        'n_neighbors': [3, 5, 7, 9],
        'weights': ['uniform', 'distance'],
        'metric': ['euclidean', 'manhattan'],        
    }
    grid = GridSearchCV(KNeighborsClassifier(), param_grid, scoring='accuracy')
    grid.fit(X_train_res_top, y_train_res_top)

    # 9. Train model final
    model = KNeighborsClassifier(**grid.best_params_)
    model.fit(X_train_res_top, y_train_res_top)

    knn_evaluation = model_evaluation(model, top_features, target, X_test_top, y_test_top)

    return knn_evaluation, model

### Support Vector Machine

In [68]:
def svm_model_v3(df):
    X = df[features]
    y = df[target]
    
    # 1. Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 2. Train/Test split
    X_train, X_test, y_train, y_test = custom_train_test_split(X_scaled, y)

    # 3. Balance Data (SMOTE)
    X_train_res, y_train_res = custom_smote(X_train, y_train)

    # 4. Fit model ด้วย features ทั้งหมด
    model_all = SVC(kernel='linear', class_weight='balanced', random_state=random_state)
    model_all.fit(X_train_res, y_train_res)

    # 5. เลือก top N feature (ใช้ absolute coef_)
    top_features = get_top_features_by_coef(model_all)

    # 6. เตรียมข้อมูลใหม่เฉพาะ top feature
    X_top = df[top_features]
    X_top_scaled = scaler.fit_transform(X_top)
    X_train_top, X_test_top, y_train_top, y_test_top = custom_train_test_split(X_top_scaled, y)

    # 7. Balance Data (SMOTE) สำหรับ Top Features
    X_train_res_top, y_train_res_top = custom_smote(X_train_top, y_train_top)

    # 8. Hyperparameter Tuning
    param_grid = {
        'C': [1, 10, 100],
        'kernel': ['linear', 'rbf', 'poly'],
        'gamma': ['scale', 'auto'],
        'class_weight': [{-1: 3, 1: 1}, "balanced", {-1: 1, 1: 3}]
    }
    grid = GridSearchCV(SVC(), param_grid, scoring='accuracy')
    grid.fit(X_train_res_top, y_train_res_top)

    # 9. Train model final
    model = SVC(**grid.best_params_, random_state=random_state)
    model.fit(X_train_res_top, y_train_res_top)

    svm_evaluation = model_evaluation(model, top_features, target, X_test_top, y_test_top)
    return svm_evaluation, model

## Model Result V3

In [None]:
lr_evaluation_v3, lr_model3 = lr_model_v3(ind_train_df)
rf_evaluation_v3, rf_model3 = rf_model_v3(ind_train_df)
xgb_evaluation_v3, xgb_model3 = xgb_model_v3(ind_train_df)
knn_evaluation_v3, knn_model3 = knn_model_v3(ind_train_df)
svm_evaluation_v3, svm_model3 = svm_model_v3(ind_train_df)

results_df_v3, result_features_v3 = show_model_results(lr_evaluation_v3, rf_evaluation_v3, 
                                                       xgb_evaluation_v3, knn_evaluation_v3, 
                                                       svm_evaluation_v3)

results_df_v3

## Comparison

In [None]:
metrics = ['accuracy', 'precision_sell', 'precision_buy', 'recall_sell', 
           'recall_buy', 'f1_sell', 'f1_buy', 'roc_auc']

models = results_df_v1.index.tolist()
x = np.arange(len(models))
width = 0.25

fig, axes = plt.subplots(len(metrics), 1, figsize=(14, 4 * len(metrics)), sharey=True)

for i, metric in enumerate(metrics):
    axes[i].bar(x - width, results_df_v1[metric], width, label='V1')
    axes[i].bar(x, results_df_v2[metric], width, label='V2')
    axes[i].bar(x + width, results_df_v3[metric], width, label='V3')
    axes[i].set_title(metric)
    axes[i].set_xticks(x)
    axes[i].set_xticklabels(models, rotation=20)
    axes[i].legend()
    
plt.tight_layout()
plt.show()

# 5. Picking Model

ทดสอบกับหลายๆ เหรียญ แล้วหา Model ที่ดีที่สุด

In [None]:
def create_train_test_ind_df(symbol):
    api = BinanceAPI(api_key, api_secret)
    train_df = collect_historical_data(api, symbol, interval="5m", days=4)
    test_df = collect_historical_data(api, symbol, interval="5m", days=1)
    test_df = test_df[test_df['timestamp'] > train_df['timestamp'].max()]
    
    ind_train_df = add_all_indicators(train_df)
    ind_train_df.set_index("timestamp", inplace=True)
    ind_train_df.dropna(subset=['bb_lower', 'bb_upper', 'bb_middle', 'bb_std', 'rsi'], inplace=True)
    ind_train_df['price_direction'] = np.where(
        ind_train_df['close'].shift(-1) < ind_train_df['close'], -1, 1
    )
    
    ind_test_df = add_all_indicators(test_df)
    ind_test_df.set_index("timestamp", inplace=True)
    ind_test_df.dropna(subset=['bb_lower', 'bb_upper', 'bb_middle', 'bb_std', 'rsi'], inplace=True)
    ind_test_df['price_direction'] = np.where(
        ind_test_df['close'].shift(-1) < ind_test_df['close'], -1, 1
    )
    
    return ind_train_df, ind_test_df

In [None]:
def choose_metric_weights(buy_prop):
    if buy_prop < 0.2:
        return {'f1_buy': 0.6, 'recall_buy': 0.25, 'precision_buy': 0.1, 'roc_auc': 0.05}
    elif buy_prop < 0.3:
        return {'f1_buy': 0.4, 'recall_buy': 0.2, 'f1_sell': 0.2, 'roc_auc': 0.2}
    elif buy_prop > 0.8:
        return {'f1_sell': 0.6, 'recall_sell': 0.25, 'precision_sell': 0.1, 'roc_auc': 0.05}
    elif buy_prop > 0.7:
        return {'f1_sell': 0.4, 'recall_sell': 0.2, 'f1_buy': 0.2, 'roc_auc': 0.2}
    else:
        return {'f1_sell': 0.3, 'f1_buy': 0.3, 'roc_auc': 0.4}

In [None]:
def select_best_model(result_df, train_df, 
                      imbalance_threshold=0.15, 
                      severe_imbalance_penalty=0.5, 
                      mild_imbalance_penalty=0.8):
    
    class_counts = train_df['price_direction'].value_counts(normalize=True).sort_index()
    buy_prop = class_counts.get(1, 0.0)
    metric_weights = choose_metric_weights(buy_prop)

    def score_row(row):
        if row['recall_sell'] == 0 or row['recall_buy'] == 0:
            return -999
        
        if (row['precision_sell'] < 0.1 or row['precision_buy'] < 0.1 or 
            row['recall_sell'] < 0.1 or row['recall_buy'] < 0.1):
            return -999
            
        score = sum(row[m] * w for m, w in metric_weights.items() if m in row)
        
        if buy_prop < imbalance_threshold or buy_prop > (1 - imbalance_threshold):
            score *= severe_imbalance_penalty
        elif buy_prop < 0.3 or buy_prop > 0.7:
            score *= mild_imbalance_penalty
            
        return score

    scores = result_df.apply(score_row, axis=1)
    
    if scores.max() == -999:
        return result_df['accuracy'].idxmax()
    
    return scores.idxmax()

In [None]:
def get_best_model(ind_train_df):
    
    if ind_train_df.shape[0] != 0:
        # --- V1 ---
        lr_eval_v1, lr_model_v1_ = lr_model_v1(ind_train_df)
        rf_eval_v1, rf_model_v1_ = rf_model_v1(ind_train_df)
        xgb_eval_v1, xgb_model_v1_ = xgb_model_v1(ind_train_df)
        knn_eval_v1, knn_model_v1_ = knn_model_v1(ind_train_df)
        svm_eval_v1, svm_model_v1_ = svm_model_v1(ind_train_df)

        result_df_v1, result_features_v1 = show_model_results(
            lr_eval_v1, rf_eval_v1, xgb_eval_v1, knn_eval_v1, svm_eval_v1
        )
        result_df_v1.index = [f"{name}_V1" for name in result_df_v1.index]

        # --- V2 ---
        lr_eval_v2, lr_model_v2_ = lr_model_v2(ind_train_df)
        rf_eval_v2, rf_model_v2_ = rf_model_v2(ind_train_df)
        xgb_eval_v2, xgb_model_v2_ = xgb_model_v2(ind_train_df)
        knn_eval_v2, knn_model_v2_ = knn_model_v2(ind_train_df)
        svm_eval_v2, svm_model_v2_ = svm_model_v2(ind_train_df)

        result_df_v2, result_features_v2 = show_model_results(
            lr_eval_v2, rf_eval_v2, xgb_eval_v2, knn_eval_v2, svm_eval_v2
        )
        result_df_v2.index = [f"{name}_V2" for name in result_df_v2.index]

        # --- V3 ---
        lr_eval_v3, lr_model_v3_ = lr_model_v3(ind_train_df)
        rf_eval_v3, rf_model_v3_ = rf_model_v3(ind_train_df)
        xgb_eval_v3, xgb_model_v3_ = xgb_model_v3(ind_train_df)
        knn_eval_v3, knn_model_v3_ = knn_model_v3(ind_train_df)
        svm_eval_v3, svm_model_v3_ = svm_model_v3(ind_train_df)

        result_df_v3, result_features_v3 = show_model_results(
            lr_eval_v3, rf_eval_v3, xgb_eval_v3, knn_eval_v3, svm_eval_v3
        )
        result_df_v3.index = [f"{name}_V3" for name in result_df_v3.index]

        result_df = pd.concat([result_df_v1, result_df_v2, result_df_v3])
        best_model = select_best_model(result_df, ind_train_df)
        
        map_model = {
            "LR_V1": [lr_model_v1_, result_features_v1["LR"]],
            "LR_V2": [lr_model_v2_, result_features_v2["LR"]],
            "LR_V3": [lr_model_v3_, result_features_v3["LR"]],
            "RF_V1": [rf_model_v1_, result_features_v1["RF"]],
            "RF_V2": [rf_model_v2_, result_features_v2["RF"]],
            "RF_V3": [rf_model_v3_, result_features_v3["RF"]],
            "XGB_V1": [xgb_model_v1_, result_features_v1["XGB"]],
            "XGB_V2": [xgb_model_v2_, result_features_v2["XGB"]],
            "XGB_V3": [xgb_model_v3_, result_features_v3["XGB"]],
            "KNN_V1": [knn_model_v1_, result_features_v1["KNN"]],
            "KNN_V2": [knn_model_v2_, result_features_v2["KNN"]],
            "KNN_V3": [knn_model_v3_, result_features_v3["KNN"]],
            "SVM_V1": [svm_model_v1_, result_features_v1["SVM"]],
            "SVM_V2": [svm_model_v2_, result_features_v2["SVM"]],
            "SVM_V3": [svm_model_v3_, result_features_v3["SVM"]],
        }
        
        return map_model[best_model]

    return None

# 6. Simulation

In [None]:
def simulate(symbol=None):
    api = BinanceAPI(api_key, api_secret)
    
    while not symbol:
        print("Available symbols (Some might not work...):")
        for s in api.get_n_symbol(10):
            print(s)
    
        symbol = input("Enter a symbol: ").strip().upper()
        print(f"\nSelected symbol: {symbol}")

        ind_train_df, ind_test_df = create_train_test_ind_df(symbol)
        if ind_train_df.shape[0] == 0 or ind_test_df.shape[0] == 0:
            print(f"{symbol} is invalid or has insufficient data. Try again")
            symbol = None
            
    ind_train_df, ind_test_df = create_train_test_ind_df(symbol)
    print(f"\nTrain set size: {ind_train_df.shape}, Test set size: {ind_test_df.shape}")
    
    print("\nTrain target distribution:")
    print(ind_train_df['price_direction'].value_counts(normalize=True))
    print("\nTest target distribution:")
    print(ind_test_df['price_direction'].value_counts(normalize=True))

    best_model, best_features = get_best_model(ind_train_df)

    x_test = ind_test_df[best_features]
    y_test = ind_test_df['price_direction']
    y_pred = best_model.predict(x_test)

    print("\n=== Test Result ===")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    
    # Simulation
    print("\n=== Simple Trading Strategy Simulation ===")
    initial_balance = 1000
    balance = initial_balance
    position = 0 
    
    for i in range(len(y_pred)):
        predicted_direction = y_pred[i]
        actual_direction = y_test.iloc[i]
        
        if predicted_direction == 1 and position <= 0:  # Buy signal
            position = 1
        elif predicted_direction == -1 and position >= 0:  # Sell signal
            position = -1
            
        if position == actual_direction:
            balance += 10
        else:
            balance -= 5
    
    total_return = (balance - initial_balance) / initial_balance * 100
    print(f"Initial balance: ${initial_balance}")
    print(f"Final balance: ${balance:.2f}")
    print(f"Total return: {total_return:.2f}%")

In [None]:
simulate("ETHBTC")