In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import ta  # Technical Analysis library
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib 
import gc
from lightgbm import early_stopping, log_evaluation


# implement price prediction using lightgbm and LSTM

#  FOR NEW IMPLEMENTING LIGHTGBM


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 2000)  # max characters per column

# Download stock data (example: Apple)
ticker = "AAPL"
df = yf.download(ticker, start="2015-01-01", end="2023-12-31")
df = df.reset_index()

# df.head(5)     # should be (2264,)

new_df = pd.DataFrame()

new_df['Close'] = df['Close']['AAPL']
new_df['High'] = df['High']['AAPL']
new_df['Open'] = df['Open']['AAPL']
new_df['Low'] = df['Low']['AAPL']
new_df['Volume'] = df['Volume']['AAPL']
new_df['Date'] = pd.to_datetime(df['Date'])
df = new_df
# -------------------------
# 2. Basic Price Features
# -------------------------
df['return'] = df['Close'].pct_change()
df['daily_range'] = df['High'] - df['Low']
df['close_open_diff'] = df['Close'] - df['Open']


# Lag features (past only)
for lag in range(1, 30):
    df[f'Close_lag_{lag}'] = df['Close'].shift(lag)
    df[f'Volume_lag_{lag}'] = df['Volume'].shift(lag)
    df[f'return_lag_{lag}'] = df['return'].shift(lag)

    
# Rolling features (past only)
for window in [5, 10, 20]:
    df[f'MA_{window}'] = df['Close'].shift(1).rolling(window).mean()
    df[f'std_{window}'] = df['Close'].shift(1).rolling(window).std()


    
# RSI (past only)
df['RSI_14'] = ta.momentum.RSIIndicator(df['Close'].shift(1), window=14).rsi()



# -------------------------
# 6. Date/Time Features
# -------------------------
df['day_of_week'] = df['Date'].dt.dayofweek
df['month'] = df['Date'].dt.month





# Target
df['target_close_next'] = df['Close'].shift(-1)
df.dropna(inplace=True)

df = df.set_index('Date')



print(df.head())


# Drop columns that are not features
drop_cols = ['target_close_next']  # remove targets from X
X = df.drop(columns=drop_cols)

# Regression target
y = df['target_close_next']



X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False  # shuffle=False to keep time order
)


lgb_model = lgb.LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.05,
    num_leaves=31,
    max_depth=-1,
    random_state=42
)

lgb_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='rmse',
    callbacks=[early_stopping(stopping_rounds=50), log_evaluation(50)],
)

y_pred = lgb_model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test.values, y_pred))
r2 = r2_score(y_test.values, y_pred)
print(f'RMSE: {rmse:.4f}, R2: {r2:.4f}')
print(y_pred)
print(y_test)


joblib.dump(lgb_model, 'lightgbm_stock_model.pkl')
print("Model saved as lightgbm_stock_model.pkl")

loaded_model = joblib.load('lightgbm_stock_model.pkl')
pred = loaded_model.predict(X_test)



# -----------------------------
# 16. Cleanup
# -----------------------------
del lgb_model
gc.collect()



  df = yf.download(ticker, start="2015-01-01", end="2023-12-31")
[*********************100%***********************]  1 of 1 completed


                Close       High       Open        Low     Volume    return  daily_range  close_open_diff  Close_lag_1  Volume_lag_1  return_lag_1  Close_lag_2  Volume_lag_2  return_lag_2  Close_lag_3  Volume_lag_3  return_lag_3  Close_lag_4  Volume_lag_4  return_lag_4  Close_lag_5  Volume_lag_5  return_lag_5       MA_5     std_5      MA_10    std_10      MA_20    std_20     RSI_14  day_of_week  month  target_close_next
Date                                                                                                                                                                                                                                                                                                                                                                                                                                   
2015-02-02  26.324780  26.444609  26.196075  25.758919  250956400  0.012547     0.685691         0.128705    25.998569   334982000.0     -0.014635    26

181