In [24]:
import yfinance as yf
import pandas as pd
import numpy as np
import ta  # Technical Analysis library
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib 
import gc
from lightgbm import early_stopping, log_evaluation


# implement price prediction using lightgbm and LSTM

#  FOR NEW IMPLEMENTING LIGHTGBM


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 2000)  # max characters per column

# Download stock data (example: Apple)
ticker = "AAPL"
df = yf.download(ticker, start="2024-01-01", end="2024-12-10")
df = df.reset_index()

# df.head(5)     # should be (2264,)

new_df = pd.DataFrame()

new_df['Close'] = df['Close']['AAPL']
new_df['High'] = df['High']['AAPL']
new_df['Open'] = df['Open']['AAPL']
new_df['Low'] = df['Low']['AAPL']
new_df['Volume'] = df['Volume']['AAPL']
new_df['Date'] = pd.to_datetime(df['Date'])
df = new_df
# -------------------------
# 2. Basic Price Features
# -------------------------
df['return'] = df['Close'].pct_change()
df['log_return'] = np.log(df['Close'] / df['Close'].shift(1))
df['daily_range'] = df['High'] - df['Low']
df['close_open_diff'] = df['Close'] - df['Open']

# Rolling statistics
for window in [5, 10, 20, 50]:
    df[f'MA_{window}'] = df['Close'].rolling(window).mean()
    df[f'EMA_{window}'] = df['Close'].ewm(span=window, adjust=False).mean()
    df[f'std_{window}'] = df['Close'].rolling(window).std()


# -------------------------
# 3. Volume-Based Features
# -------------------------
for window in [5, 10, 20]:
    df[f'vol_ma_{window}'] = df['Volume'].rolling(window).mean()
df['vol_change'] = df['Volume'].pct_change()
df['VWAP'] = (df['Close'] * df['Volume']).cumsum() / df['Volume'].cumsum()


# -------------------------
# 4. Technical Indicators
# -------------------------
# RSI
df['RSI_14'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
# MACD
macd = ta.trend.MACD(df['Close'])
df['MACD'] = macd.macd()
df['MACD_signal'] = macd.macd_signal()
# Bollinger Bands
bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
df['BB_high'] = bb.bollinger_hband()
df['BB_low'] = bb.bollinger_lband()
df['BB_width'] = bb.bollinger_hband() - bb.bollinger_lband()
# ATR
if len(df) >= 14:
    df['ATR_14'] = ta.volatility.AverageTrueRange(df['High'], df['Low'], df['Close'], window=14).average_true_range()
else:
    df['ATR_14'] = np.nan  # fallback for too-short datasets

# OBV
df['OBV'] = ta.volume.OnBalanceVolumeIndicator(df['Close'], df['Volume']).on_balance_volume()
# MFI
df['MFI_14'] = ta.volume.MFIIndicator(df['High'], df['Low'], df['Close'], df['Volume'], window=14).money_flow_index()


# -------------------------
# 5. Lag Features
# -------------------------
for lag in range(1, 11):
    df[f'Close_lag_{lag}'] = df['Close'].shift(lag)
    df[f'Volume_lag_{lag}'] = df['Volume'].shift(lag)
    df[f'return_lag_{lag}'] = df['return'].shift(lag)

# Rolling min/max
for window in [5, 10, 20]:
    df[f'rolling_max_{window}'] = df['Close'].rolling(window).max()
    df[f'rolling_min_{window}'] = df['Close'].rolling(window).min()
    df[f'rolling_corr_{window}'] = df['Close'].rolling(window).corr(df['Volume'])


# -------------------------
# 6. Date/Time Features
# -------------------------
df['day_of_week'] = df['Date'].dt.dayofweek
df['month'] = df['Date'].dt.month

# -------------------------
# 7. Advanced Features
# -------------------------
# Rate of Change
for window in [5, 10, 20]:
    df[f'ROC_{window}'] = df['Close'].pct_change(periods=window)
# Z-score
for window in [5, 10, 20]:
    df[f'zscore_{window}'] = (df['Close'] - df['Close'].rolling(window).mean()) / df['Close'].rolling(window).std()

# Candlestick pattern example: Bullish Hammer (simplified)
df['bullish_hammer'] = np.where((df['Close'] > df['Open']) & ((df['High'] - df['Low']) > 2*(df['Close'] - df['Open'])), 1, 0)


# -------------------------
# 8. Target Variable
# -------------------------
# Regression target
df['target_close_next'] = df['Close'].shift(-1)
# Classification target: 1 if price goes up next day, else 0
df['target_up'] = (df['target_close_next'] > df['Close']).astype(int)


df.dropna(inplace=True)
df = df.set_index('Date')


# Drop columns that are not features
drop_cols = ['target_close_next', 'target_up']  # remove targets from X
X = df.drop(columns=drop_cols)

# # Load the saved model
lgb_model = joblib.load("lightgbm_stock_model.pkl")
print("✅ Model loaded!")

y_pred = lgb_model.predict(X)
print("Predictions:", len(y_pred))
print(df['target_close_next'].values)
print(y_pred)


rmse = np.sqrt(mean_squared_error(df['target_close_next'].values, y_pred))
r2 = r2_score(df['target_close_next'].values, y_pred)
print(f'RMSE: {rmse:.4f}, R2: {r2:.4f}')




  df = yf.download(ticker, start="2024-01-01", end="2024-12-10")
[*********************100%***********************]  1 of 1 completed

✅ Model loaded!
Predictions: 187
[171.76652527 171.38920593 172.48138428 174.82458496 177.39608765
 170.14813232 171.051651   169.63186646 168.49998474 172.07429504
 170.25735474 168.81770325 167.63616943 168.44041443 167.61633301
 168.37091064 167.2489624  168.46025085 166.58374023 173.79196167
 175.29121399 171.45870972 168.17234802 166.80218506 165.84898376
 163.82357788 164.65756226 165.71002197 167.81491089 168.67871094
 168.09291077 172.26295471 169.11553955 168.09291077 171.79631042
 182.07252502 180.41442871 181.09950256 181.43707275 183.25404358
 181.99136353 185.20271301 186.34603882 188.62280273 188.74211121
 188.77192688 189.93515015 191.2375946  189.79597473 185.79922485
 188.88128662 188.89123535 189.1894989  190.18371582 191.13815308
 192.90788269 193.22602844 194.73721313 193.35525513 195.75132751
 192.00312805 205.95198059 211.83776855 213.00099182 211.26112366
 215.41694641 213.0506897  208.46736145 206.29005432 206.9362793
 207.86090088 212.01670837 212.86180115 209.


