In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from xgboost import XGBRegressor
# from sklearn.ensemble import VotingRegressor
# from custom_model import RandomForestRegressor, XGBoostRegressor
import yfinance as yf
import matplotlib.pyplot as plt

In [None]:
def add_lag(dataframe, days):
    df_copy = dataframe.copy()
    
    for feature in ['Open', 'High', 'Low', 'Volume']:
        df_copy[f'{feature.lower()}_lag'] = dataframe[feature].shift(periods=days)
    
    return df_copy

In [None]:
def prepare_future_data(df, days_to_predict):
    last_date = pd.to_datetime(df.index.max())
    
    future_dates = pd.date_range(start=last_date+pd.Timedelta(days=1), periods=days_to_predict, freq='B')
    
    # Create future dataframe with NaN values
    future_df = pd.DataFrame(index=future_dates,
                           columns=df.columns)
    
    # Combine historical and future data
    combined_df = pd.concat([df, future_df])
    
    # Add lag features
    combined_df = add_lag(combined_df, days=days_to_predict)
    
    # For future dates, keep lag features
    future_feature_df = combined_df.loc[future_dates]
    
    # Keep only lag features for prediction
    features_for_prediction = ['open_lag', 'high_lag', 'low_lag', 'volume_lag']
    
    # print(f'df: {df}')
    # print(f'future: {future_df}')
    # print(f'combined: {combined_df}')
    print(f'future_df: {future_feature_df[features_for_prediction]}')
    
    return future_feature_df[features_for_prediction], future_dates

In [None]:
ticker_market = yf.Ticker('BMRI.JK')
df = ticker_market.history(period='5y')
df.index = pd.to_datetime(df.index, format='%Y-%m-%d')
df = df.drop(['Dividends', 'Stock Splits'], axis=1)

In [None]:
df

In [None]:
df_with_lags = add_lag(df, days=30)
df_with_lags.dropna()

In [None]:
features = ['open_lag','high_lag','low_lag','volume_lag']
target = ['Close']

X = df_with_lags[features].values
y = df_with_lags[target].values.ravel()

In [None]:
rf = RandomForestRegressor(n_estimators=250,
                                max_depth=20,
                                max_features=4,
                                min_samples_leaf=2,
                                min_samples_split=2)
# xgb = XGBoostRegressor(n_estimators=250,
#                 eta=0.01,
#                 max_depth=12,
#                 subsample=0.3)
xgb = XGBRegressor(n_estimators=250,
                eta=0.01,
                max_depth=12,
                subsample=0.3)
        
model = VotingRegressor(estimators=[
            ('rf', rf),
            ('xgb', xgb)
        ], weights=None)

In [None]:
model.fit(X, y)

In [None]:
last_dates = df.index.max()

In [None]:
days_to_predict = 30
future_features, future_dates = prepare_future_data(df, days_to_predict)

In [None]:
prediction_features = ['open_lag', 'high_lag', 'low_lag', 'volume_lag']
future_features[prediction_features] = future_features[prediction_features].astype('float64')
y_pred_future = model.predict(future_features[prediction_features])

In [None]:
y_pred_future

In [None]:
future_predictions = pd.DataFrame(
    y_pred_future,
    index=future_dates,
    columns=['Close']
)

historical_data = pd.DataFrame(df['Close'])
combined_df = pd.concat([historical_data, future_predictions])

combined_dates = combined_df.index
combined_close = combined_df['Close'].values

In [None]:
# # rmse = root_mean_squared_error(y_test, y_pred)
# # r2 = r2_score(y_test, y_pred)
# rmse = root_mean_squared_error(y, y_pred)
# r2 = r2_score(y, y_pred)

# print(f'Nilai RMSE: {rmse}')
# print(f'Nilai R2: {r2}')

In [None]:
plt.figure(figsize=(15,12))
plt.plot(combined_dates, combined_close)
plt.show()

In [None]:
# ticker_market = yf.Ticker('BBCA.JK')

# df = ticker_market.history(period='5y')
# df.index = pd.to_datetime(df.index, format='%Y-%m-%d')

# dates = df.index.strftime('%Y-%m-%d').tolist()

# close_actual = df['Close'].values.tolist()

In [None]:
# df = df.drop(['Dividends','Stock Splits'], axis=1)
# df

In [None]:
# def add_lag(dataframe, n_past):
#     # period = round((days * 5)/7)
#     # dataframe['lag'] = dataframe['Close'].shift(periods=period, freq='B')
#     # target_map = dataframe['Close'].to_dict()
#     # dataframe['lag'] = (dataframe.index - pd.Timedelta('364 days')).map(target_map)
    
#     df_copy = dataframe.copy()
#     # lag_offset = pd.offsets.BDay(days)
#     # lagged_index = df_copy.index - lag_offset

#     # for feature in ['Open', 'High', 'Low', 'Volume']:
#     #     target_map = dict(zip(df_copy.index, df_copy[feature]))
#     #     df_copy[f'{feature.lower()}_lag'] = lagged_index.map(target_map)
#     df_copy = df_copy.dropna()
    
#     price_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
#     for column in price_columns:
#         for lag in range(1, n_past + 1):
#             df_copy[f'{column}_lag{lag}'] = df_copy[column].shift(periods=lag, freq='B')
    
#     df_copy = df_copy.dropna()
    
#     return df_copy

In [None]:
# df_featured = add_lag(df,7)
# df_featured
# features = df_featured.drop(columns=['Close'], axis=1)
# target = df_featured[['Close']]

# X = features
# y = target.values.ravel()

In [None]:
# rf = RandomForestRegressor(n_estimators=100,
#                                 max_depth=20,
#                                 max_features=4,
#                                 min_samples_leaf=2,
#                                 min_samples_split=2)
# xgb = XGBRegressor(n_estimators=100,
#                 eta=0.2,
#                 max_depth=6,
#                 subsample=0.3)
        
# model = VotingRegressor(estimators=[
#             ('rf', rf),
#             ('xgb', xgb)
#         ], weights=None)

In [None]:
model.fit(X,y)

In [None]:
# n_past = 30
# days = 30

# last_data = df.tail(n_past + days).copy()
# predictions = []

# last_date = pd.to_datetime(df_featured.index[-1])
# dates_future = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=days, freq='B')

# for _ in range(days):
#     features_df = add_lag(last_data, n_past)
#     last_features = features_df.iloc[[-1]]
    
    
#     # Make prediction
#     pred = model.predict(X)
#     predictions.append(pred)
    
#     # Update the data for the next prediction
#     new_row = last_data.iloc[-1].copy()
#     new_row['Close'] = pred
#     last_data = pd.concat([last_data, pd.DataFrame([new_row])])
    
# predictions = np.array(predictions)