In [21]:
import yfinance as yf
import pandas as pd
import numpy as np
import ta  # Technical Analysis library
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib 
import gc
from lightgbm import early_stopping, log_evaluation


# implement price prediction using lightgbm and LSTM

#  FOR NEW IMPLEMENTING LIGHTGBM


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 2000)  # max characters per column

# Download stock data (example: Apple)
ticker = "AAPL"
df = yf.download(ticker, start="2015-01-01", end="2023-12-31")
df = df.reset_index()

# df.head(5)     # should be (2264,)

new_df = pd.DataFrame()

new_df['Close'] = df['Close']['AAPL']
new_df['High'] = df['High']['AAPL']
new_df['Open'] = df['Open']['AAPL']
new_df['Low'] = df['Low']['AAPL']
new_df['Volume'] = df['Volume']['AAPL']
new_df['Date'] = pd.to_datetime(df['Date'])
df = new_df
# -------------------------
# 2. Basic Price Features
# -------------------------
df['return'] = df['Close'].pct_change()
df['log_return'] = np.log(df['Close'] / df['Close'].shift(1))
df['daily_range'] = df['High'] - df['Low']
df['close_open_diff'] = df['Close'] - df['Open']

# Rolling statistics
for window in [5, 10, 20, 50]:
    df[f'MA_{window}'] = df['Close'].rolling(window).mean()
    df[f'EMA_{window}'] = df['Close'].ewm(span=window, adjust=False).mean()
    df[f'std_{window}'] = df['Close'].rolling(window).std()


# -------------------------
# 3. Volume-Based Features
# -------------------------
for window in [5, 10, 20]:
    df[f'vol_ma_{window}'] = df['Volume'].rolling(window).mean()
df['vol_change'] = df['Volume'].pct_change()
df['VWAP'] = (df['Close'] * df['Volume']).cumsum() / df['Volume'].cumsum()


# -------------------------
# 4. Technical Indicators
# -------------------------
# RSI
df['RSI_14'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
# MACD
macd = ta.trend.MACD(df['Close'])
df['MACD'] = macd.macd()
df['MACD_signal'] = macd.macd_signal()
# Bollinger Bands
bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
df['BB_high'] = bb.bollinger_hband()
df['BB_low'] = bb.bollinger_lband()
df['BB_width'] = bb.bollinger_hband() - bb.bollinger_lband()
# ATR
if len(df) >= 14:
    df['ATR_14'] = ta.volatility.AverageTrueRange(df['High'], df['Low'], df['Close'], window=14).average_true_range()
else:
    df['ATR_14'] = np.nan  # fallback for too-short datasets
# OBV
df['OBV'] = ta.volume.OnBalanceVolumeIndicator(df['Close'], df['Volume']).on_balance_volume()
# MFI
df['MFI_14'] = ta.volume.MFIIndicator(df['High'], df['Low'], df['Close'], df['Volume'], window=14).money_flow_index()


# -------------------------
# 5. Lag Features
# -------------------------
for lag in range(1, 11):
    df[f'Close_lag_{lag}'] = df['Close'].shift(lag)
    df[f'Volume_lag_{lag}'] = df['Volume'].shift(lag)
    df[f'return_lag_{lag}'] = df['return'].shift(lag)

# Rolling min/max
for window in [5, 10, 20]:
    df[f'rolling_max_{window}'] = df['Close'].rolling(window).max()
    df[f'rolling_min_{window}'] = df['Close'].rolling(window).min()
    df[f'rolling_corr_{window}'] = df['Close'].rolling(window).corr(df['Volume'])


# -------------------------
# 6. Date/Time Features
# -------------------------
df['day_of_week'] = df['Date'].dt.dayofweek
df['month'] = df['Date'].dt.month

# -------------------------
# 7. Advanced Features
# -------------------------
# Rate of Change
for window in [5, 10, 20]:
    df[f'ROC_{window}'] = df['Close'].pct_change(periods=window)
# Z-score
for window in [5, 10, 20]:
    df[f'zscore_{window}'] = (df['Close'] - df['Close'].rolling(window).mean()) / df['Close'].rolling(window).std()

# Candlestick pattern example: Bullish Hammer (simplified)
df['bullish_hammer'] = np.where((df['Close'] > df['Open']) & ((df['High'] - df['Low']) > 2*(df['Close'] - df['Open'])), 1, 0)


# -------------------------
# 8. Target Variable
# -------------------------
# Regression target
df['target_close_next'] = df['Close'].shift(-1)
# Classification target: 1 if price goes up next day, else 0
df['target_up'] = (df['target_close_next'] > df['Close']).astype(int)


df.dropna(inplace=True)
df = df.set_index('Date')

print(df.head())


# Drop columns that are not features
drop_cols = ['target_close_next', 'target_up']  # remove targets from X
X = df.drop(columns=drop_cols)

# Regression target
y = df['target_close_next']



X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False  # shuffle=False to keep time order
)


lgb_model = lgb.LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.05,
    num_leaves=31,
    max_depth=-1,
    random_state=42
)

lgb_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='rmse',
    callbacks=[early_stopping(stopping_rounds=50), log_evaluation(50)],
)

y_pred = lgb_model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test.values, y_pred))
r2 = r2_score(y_test.values, y_pred)
print(f'RMSE: {rmse:.4f}, R2: {r2:.4f}')
print(y_pred)
print(y_test)


joblib.dump(lgb_model, 'lightgbm_stock_model.pkl')
print("Model saved as lightgbm_stock_model.pkl")

loaded_model = joblib.load('lightgbm_stock_model.pkl')
pred = loaded_model.predict(X_test)



# -----------------------------
# 16. Cleanup
# -----------------------------
del lgb_model
gc.collect()



  df = yf.download(ticker, start="2015-01-01", end="2023-12-31")
[*********************100%***********************]  1 of 1 completed


                Close       High       Open        Low     Volume    return  log_return  daily_range  close_open_diff       MA_5      EMA_5     std_5      MA_10     EMA_10    std_10      MA_20     EMA_20    std_20      MA_50     EMA_50    std_50     vol_ma_5    vol_ma_10    vol_ma_20  vol_change       VWAP     RSI_14      MACD  MACD_signal    BB_high     BB_low  BB_width    ATR_14         OBV     MFI_14  Close_lag_1  Volume_lag_1  return_lag_1  Close_lag_2  Volume_lag_2  return_lag_2  Close_lag_3  Volume_lag_3  return_lag_3  Close_lag_4  Volume_lag_4  return_lag_4  Close_lag_5  Volume_lag_5  return_lag_5  Close_lag_6  Volume_lag_6  return_lag_6  Close_lag_7  Volume_lag_7  return_lag_7  Close_lag_8  Volume_lag_8  return_lag_8  Close_lag_9  Volume_lag_9  return_lag_9  Close_lag_10  Volume_lag_10  return_lag_10  rolling_max_5  rolling_min_5  rolling_corr_5  rolling_max_10  rolling_min_10  rolling_corr_10  rolling_max_20  rolling_min_20  rolling_corr_20  day_of_week  month     ROC_5    ROC

37