In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**SUMMARY OF STATEMENT**

The JPX Tokyo Stock Exchange Prediction Hackathon is a competitive event where data scientists and financial experts come together to create predictive models for stock performance on the Tokyo Stock Exchange (TSE). Participants use historical stock data and other relevant information to develop algorithms and machine learning models that can forecast stock price movements. This hackathon fosters innovation, collaboration, and the development of valuable tools for investors and traders in the Japanese stock market. It offers participants the chance to win prizes and gain recognition for their predictive models and strategies.


**Exploratory Data Analysis**

Exploratory Data Analysis (EDA) of the JPX Tokyo Stock Exchange prediction involves the systematic examination and visualization of historical stock market data related to the Tokyo Stock Exchange (JPX). EDA techniques are employed to gain insights into patterns, trends, and potential predictors that can inform the development of predictive models for stock price movements. By examining variables like trading volume, price trends, volatility, and macroeconomic indicators, EDA aims to identify key factors influencing stock market behavior in the JPX and lay the groundwork for more advanced predictive modeling and trading strategies. This process helps analysts and investors better understand the dynamics of the Tokyo Stock Exchange and make informed decisions based on data-driven insights.

In [None]:
import warnings, gc
import numpy as np 
import pandas as pd
import matplotlib.colors
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode
from datetime import datetime, timedelta
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error,mean_absolute_error
from lightgbm import LGBMRegressor
from decimal import ROUND_HALF_UP, Decimal
warnings.filterwarnings("ignore")
import plotly.figure_factory as ff
import tqdm
import random
import matplotlib.pyplot as plt
import matplotlib.tri as tri
import seaborn as sns
from scipy.optimize import minimize, Bounds, LinearConstraint, linprog

init_notebook_mode(connected=True)
temp = dict(layout=go.Layout(font=dict(family="Franklin Gothic", size=12), width=800))
colors=px.colors.qualitative.Plotly

train=pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv", parse_dates=['Date'])
stock_list=pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/stock_list.csv")

print("The training data begins on {} and ends on {}.\n".format(train.Date.min(),train.Date.max()))
display(train.describe().style.format('{:,.2f}'))

In [None]:
train_date=train.Date.unique()
returns=train.groupby('Date')['Target'].mean().mul(100).rename('Average Return')
close_avg=train.groupby('Date')['Close'].mean().rename('Closing Price')
vol_avg=train.groupby('Date')['Volume'].mean().rename('Volume')

fig = make_subplots(rows=3, cols=1, 
                    shared_xaxes=True)
for i, j in enumerate([returns, close_avg, vol_avg]):
    fig.add_trace(go.Scatter(x=train_date, y=j, mode='lines',
                             name=j.name, marker_color=colors[i]), row=i+1, col=1)
fig.update_xaxes(rangeslider_visible=False,
                 rangeselector=dict(
                     buttons=list([
                         dict(count=6, label="6m", step="month", stepmode="backward"),
                         dict(count=1, label="1y", step="year", stepmode="backward"),
                         dict(count=2, label="2y", step="year", stepmode="backward"),
                         dict(step="all")])),
                 row=1,col=1)
fig.update_layout(template=temp,title='JPX Market Average Stock Return, Closing Price, and Shares Traded', 
                  hovermode='x unified', height=700, 
                  yaxis1=dict(title='Stock Return', ticksuffix='%'), 
                  yaxis2_title='Closing Price', yaxis3_title='Shares Traded',
                  showlegend=False)
fig.show()

In [None]:
stock_list['SectorName']=[i.rstrip().lower().capitalize() for i in stock_list['17SectorName']]
stock_list['Name']=[i.rstrip().lower().capitalize() for i in stock_list['Name']]
train_df = train.merge(stock_list[['SecuritiesCode','Name','SectorName']], on='SecuritiesCode', how='left')
train_df['Year'] = train_df['Date'].dt.year
years = {year: pd.DataFrame() for year in train_df.Year.unique()[::-1]}
for key in years.keys():
    df=train_df[train_df.Year == key]
    years[key] = df.groupby('SectorName')['Target'].mean().mul(100).rename("Avg_return_{}".format(key))
df=pd.concat((years[i].to_frame() for i in years.keys()), axis=1)
df=df.sort_values(by="Avg_return_2021")

fig = make_subplots(rows=1, cols=5, shared_yaxes=True)
for i, col in enumerate(df.columns):
    x = df[col]
    mask = x<=0
    fig.add_trace(go.Bar(x=x[mask], y=df.index[mask],orientation='h', 
                         text=x[mask], texttemplate='%{text:.2f}%',textposition='auto',
                         hovertemplate='Average Return in %{y} Stocks = %{x:.4f}%',
                         marker=dict(color='red', opacity=0.7),name=col[-4:]), 
                  row=1, col=i+1)
    fig.add_trace(go.Bar(x=x[~mask], y=df.index[~mask],orientation='h', 
                         text=x[~mask], texttemplate='%{text:.2f}%', textposition='auto', 
                         hovertemplate='Average Return in %{y} Stocks = %{x:.4f}%',
                         marker=dict(color='green', opacity=0.7),name=col[-4:]), 
                  row=1, col=i+1)
    fig.update_xaxes(range=(x.min()-.15,x.max()+.15), title='{} Returns'.format(col[-4:]), 
                     showticklabels=False, row=1, col=i+1)
fig.update_layout(template=temp,title='Yearly Average Stock Returns by Sector', 
                  hovermode='closest',margin=dict(l=250,r=50),
                  height=600, width=1000, showlegend=False)
fig.show()

In [None]:
train_df=train_df[train_df.Date>'2020-12-23']
print("New Train Shape {}.\nMissing values in Target = {}".format(train_df.shape,train_df['Target'].isna().sum()))

In [None]:
fig = go.Figure()
x_hist=train_df['Target']
fig.add_trace(go.Histogram(x=x_hist*100,
                           marker=dict(color=colors[0], opacity=0.7, 
                                       line=dict(width=1, color=colors[0])),
                           xbins=dict(start=-40,end=40,size=1)))
fig.update_layout(template=temp,title='Target Distribution', 
                  xaxis=dict(title='Stock Return',ticksuffix='%'), height=450)
fig.show()

In [None]:
pal = ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0, 360, 18)]
fig = go.Figure()
for i, sector in enumerate(df.index[::-1]):
    y_data=train_df[train_df['SectorName']==sector]['Target']
    fig.add_trace(go.Box(y=y_data*100, name=sector,
                         marker_color=pal[i], showlegend=False))
fig.update_layout(template=temp, title='Target Distribution by Sector',
                  yaxis=dict(title='Stock Return',ticksuffix='%'),
                  margin=dict(b=150), height=750, width=900)
fig.show()

In [None]:
train_date=train_df.Date.unique()
sectors=train_df.SectorName.unique().tolist()
sectors.insert(0, 'All')
open_avg=train_df.groupby('Date')['Open'].mean()
high_avg=train_df.groupby('Date')['High'].mean()
low_avg=train_df.groupby('Date')['Low'].mean()
close_avg=train_df.groupby('Date')['Close'].mean() 
buttons=[]

fig = go.Figure()
for i in range(18):
    if i != 0:
        open_avg=train_df[train_df.SectorName==sectors[i]].groupby('Date')['Open'].mean()
        high_avg=train_df[train_df.SectorName==sectors[i]].groupby('Date')['High'].mean()
        low_avg=train_df[train_df.SectorName==sectors[i]].groupby('Date')['Low'].mean()
        close_avg=train_df[train_df.SectorName==sectors[i]].groupby('Date')['Close'].mean()        
    
    fig.add_trace(go.Candlestick(x=train_date, open=open_avg, high=high_avg,
                                 low=low_avg, close=close_avg, name=sectors[i],
                                 visible=(True if i==0 else False)))
    
    visibility=[False]*len(sectors)
    visibility[i]=True
    button = dict(label = sectors[i],
                  method = "update",
                  args=[{"visible": visibility}])
    buttons.append(button)
    
fig.update_xaxes(rangeslider_visible=True,
                 rangeselector=dict(
                     buttons=list([
                         dict(count=3, label="3m", step="month", stepmode="backward"),
                         dict(count=6, label="6m", step="month", stepmode="backward"),
                         dict(step="all")]), xanchor='left',yanchor='bottom', y=1.16, x=.01))
fig.update_layout(template=temp,title='Stock Price Movements by Sector', 
                  hovermode='x unified', showlegend=False, width=1000,
                  updatemenus=[dict(active=0, type="dropdown",
                                    buttons=buttons, xanchor='left',
                                    yanchor='bottom', y=1.01, x=.01)],
                  yaxis=dict(title='Stock Price'))
fig.show()

In [None]:
stocks=train_df[train_df.SecuritiesCode.isin([4169,7089,4582,2158,7036])]
df_pivot=stocks.pivot_table(index='Date', columns='Name', values='Close').reset_index()
pal=['rgb'+str(i) for i in sns.color_palette("coolwarm", len(df_pivot))]

fig = ff.create_scatterplotmatrix(df_pivot.iloc[:,1:], diag='histogram', name='')
fig.update_traces(marker=dict(color=pal, opacity=0.9, line_color='white', line_width=.5))
fig.update_layout(template=temp, title='Scatterplots of Highest Performing Stocks', 
                  height=1000, width=1000, showlegend=False)
fig.show()

In [None]:
corr=train_df.groupby('SecuritiesCode')[['Target','Close']].corr().unstack().iloc[:,1]
stocks=corr.nlargest(10).rename("Return").reset_index()
stocks=stocks.merge(train_df[['Name','SecuritiesCode']], on='SecuritiesCode').drop_duplicates()
pal=sns.color_palette("magma_r", 14).as_hex()
rgb=['rgba'+str(matplotlib.colors.to_rgba(i,0.7)) for i in pal]

fig = go.Figure()
fig.add_trace(go.Bar(x=stocks.Name, y=stocks.Return, text=stocks.Return, 
                     texttemplate='%{text:.2f}', name='', width=0.8,
                     textposition='outside',marker=dict(color=rgb, line=dict(color=pal,width=1)),
                     hovertemplate='Correlation of %{x} with target = %{y:.3f}'))
fig.update_layout(template=temp, title='Most Correlated Stocks with Target Variable',
                  yaxis=dict(title='Correlation',showticklabels=False), 
                  xaxis=dict(title='Stock',tickangle=45), margin=dict(b=100),
                  width=800,height=500)
fig.show()

In [None]:
df_pivot=train_df.pivot_table(index='Date', columns='SectorName', values='Close').reset_index()
corr=df_pivot.corr().round(2)
mask=np.triu(np.ones_like(corr, dtype=bool))
c_mask = np.where(~mask, corr, 100)
c=[]
for i in c_mask.tolist()[1:]:
    c.append([x for x in i if x != 100])
    
cor=c[::-1]
x=corr.index.tolist()[:-1]
y=corr.columns.tolist()[1:][::-1]
fig=ff.create_annotated_heatmap(z=cor, x=x, y=y, 
                                hovertemplate='Correlation between %{x} and %{y} stocks = %{z}',
                                colorscale='viridis', name='')
fig.update_layout(template=temp, title='Stock Correlation between Sectors',
                  margin=dict(l=250,t=270),height=800,width=900,
                  yaxis=dict(showgrid=False, autorange='reversed'),
                  xaxis=dict(showgrid=False))
fig.show()

**Feature Engineering**

Feature engineering for JPX Tokyo Stock Exchange prediction involves the careful selection and transformation of relevant data attributes to enhance the performance of predictive models. This process aims to create informative, meaningful features from raw market data, such as stock prices, trading volumes, and economic indicators. Techniques may include creating lag variables, rolling averages, and technical indicators like moving averages or relative strength indexes. Feature engineering is crucial for capturing hidden patterns and relationships in the data, improving the predictive accuracy of models, and ultimately assisting in making more accurate forecasts of stock price movements in the JPX. It plays a pivotal role in developing sophisticated trading strategies and risk management in the realm of stock market prediction.

In [None]:
def adjust_price(price):
    """
    Args:
        price (pd.DataFrame)  : pd.DataFrame include stock_price
    Returns:
        price DataFrame (pd.DataFrame): stock_price with generated AdjustedClose
    """
    # transform Date column into datetime
    price.loc[: ,"Date"] = pd.to_datetime(price.loc[: ,"Date"], format="%Y-%m-%d")

    def generate_adjusted_close(df):
        """
        Args:
            df (pd.DataFrame)  : stock_price for a single SecuritiesCode
        Returns:
            df (pd.DataFrame): stock_price with AdjustedClose for a single SecuritiesCode
        """
        # sort data to generate CumulativeAdjustmentFactor
        df = df.sort_values("Date", ascending=False)
        # generate CumulativeAdjustmentFactor
        df.loc[:, "CumulativeAdjustmentFactor"] = df["AdjustmentFactor"].cumprod()
        # generate AdjustedClose
        df.loc[:, "AdjustedClose"] = (
            df["CumulativeAdjustmentFactor"] * df["Close"]
        ).map(lambda x: float(
            Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
        ))
        # reverse order
        df = df.sort_values("Date")
        # to fill AdjustedClose, replace 0 into np.nan
        df.loc[df["AdjustedClose"] == 0, "AdjustedClose"] = np.nan
        # forward fill AdjustedClose
        df.loc[:, "AdjustedClose"] = df.loc[:, "AdjustedClose"].ffill()
        return df
    
    # generate AdjustedClose
    price = price.sort_values(["SecuritiesCode", "Date"])
    price = price.groupby("SecuritiesCode").apply(generate_adjusted_close).reset_index(drop=True)
    return price

train=train.drop('ExpectedDividend',axis=1).fillna(0)
prices=adjust_price(train)

In [None]:
def create_features(df):
    df=df.copy()
    col='AdjustedClose'
    periods=[5,10,20,30,50]
    for period in periods:
        df.loc[:,"Return_{}Day".format(period)] = df.groupby("SecuritiesCode")[col].pct_change(period)
        df.loc[:,"MovingAvg_{}Day".format(period)] = df.groupby("SecuritiesCode")[col].rolling(window=period).mean().values
        df.loc[:,"ExpMovingAvg_{}Day".format(period)] = df.groupby("SecuritiesCode")[col].ewm(span=period,adjust=False).mean().values
        df.loc[:,"Volatility_{}Day".format(period)] = np.log(df[col]).groupby(df["SecuritiesCode"]).diff().rolling(period).std()
    return df

price_features=create_features(df=prices)
price_features.drop(['RowId','SupervisionFlag','AdjustmentFactor','CumulativeAdjustmentFactor','Close'],axis=1,inplace=True)

In [None]:
price_names=price_features.merge(stock_list[['SecuritiesCode','Name','SectorName']], on='SecuritiesCode').set_index('Date')
price_names=price_names[price_names.index>='2020-12-29']
price_names.fillna(0, inplace=True)

features=['MovingAvg','ExpMovingAvg','Return', 'Volatility']
names=['Average', 'Exp. Moving Average', 'Period', 'Volatility']
buttons=[]

fig = make_subplots(rows=2, cols=2, 
                    shared_xaxes=True, 
                    vertical_spacing=0.1,
                    subplot_titles=('Adjusted Close Moving Average',
                                    'Exponential Moving Average',
                                    'Stock Return', 'Stock Volatility'))

for i, sector in enumerate(price_names.SectorName.unique()):
    
    sector_df=price_names[price_names.SectorName==sector]
    periods=[0,10,30,50]
    colors=px.colors.qualitative.Vivid
    dash=['solid','dash', 'longdash', 'dashdot', 'longdashdot']
    row,col=1,1
    
    for j, (feature, name) in enumerate(zip(features, names)):
        if j>=2:
            row,periods=2,[10,30,50]
            colors=px.colors.qualitative.Bold[1:]
        if j%2==0:
            col=1
        else:
            col=2
        
        for k, period in enumerate(periods):
            if (k==0)&(j<2):
                plot_data=sector_df.groupby(sector_df.index)['AdjustedClose'].mean().rename('Adjusted Close')
            elif j>=2:
                plot_data=sector_df.groupby(sector_df.index)['{}_{}Day'.format(feature,period)].mean().mul(100).rename('{}-day {}'.format(period,name))
            else:
                plot_data=sector_df.groupby(sector_df.index)['{}_{}Day'.format(feature,period)].mean().rename('{}-day {}'.format(period,name))
            fig.add_trace(go.Scatter(x=plot_data.index, y=plot_data, mode='lines',
                                     name=plot_data.name, marker_color=colors[k+1],
                                     line=dict(width=2,dash=(dash[k] if j<2 else 'solid')), 
                                     showlegend=(True if (j==0) or (j==2) else False), legendgroup=row,
                                     visible=(False if i != 0 else True)), row=row, col=col)
            
    visibility=[False]*14*len(price_names.SectorName.unique())
    for l in range(i*14, i*14+14):
        visibility[l]=True
    button = dict(label = sector,
                  method = "update",
                  args=[{"visible": visibility}])
    buttons.append(button)

fig.update_layout(title='Stock Price Moving Average, Return,<br>and Volatility by Sector',
                  template=temp, yaxis3_ticksuffix='%', yaxis4_ticksuffix='%',
                  legend_title_text='Period', legend_tracegroupgap=250,
                  updatemenus=[dict(active=0, type="dropdown",
                                    buttons=buttons, xanchor='left',
                                    yanchor='bottom', y=1.105, x=.01)], 
                  hovermode='x unified', height=800,width=1200, margin=dict(t=150))
fig.show()

**STOCK PRICE PREDICTION**

Stock prediction for the JPX Tokyo Stock Exchange is a complex and data-driven process that involves using various techniques and models to forecast the future price movements of stocks traded on the exchange. The goal of stock prediction is to assist investors, traders, and financial analysts in making informed decisions and managing their portfolios more effectively.

Several methods can be employed for stock prediction on the JPX Tokyo Stock Exchange, including time series analysis, machine learning algorithms, and deep learning models. These approaches leverage historical stock price data, trading volumes, economic indicators, news sentiment analysis, and other relevant information to make predictions. Feature engineering and exploratory data analysis are crucial steps to extract valuable insights from the data.

The accuracy of stock prediction models varies, and it is important to understand that stock markets are influenced by a multitude of factors, making precise predictions challenging. Nevertheless, these models can provide valuable insights, helping stakeholders identify trends, risks, and opportunities in the market.

Investors and financial professionals often use stock predictions to inform their trading strategies, risk management decisions, and asset allocation. It is important to keep in mind that stock predictions are inherently uncertain and should be used in conjunction with thorough research and financial analysis to make well-informed investment choices.

In [None]:
def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size: int = 200, toprank_weight_ratio: float = 2) -> float:
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): sharpe ratio
    """
    def _calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
        """
        Args:
            df (pd.DataFrame): predicted results
            portfolio_size (int): # of equities to buy/sell
            toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
        Returns:
            (float): spread return
        """
        assert df['Rank'].min() == 0
        assert df['Rank'].max() == len(df['Rank']) - 1
        weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
        purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
        short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        return purchase - short

    buf = df.groupby('Date').apply(_calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio

In [None]:
ts_fold = TimeSeriesSplit(n_splits=10, gap=10000)
prices=price_features.dropna().sort_values(['Date','SecuritiesCode'])
y=prices['Target'].to_numpy()
X=prices.drop(['Target'],axis=1)

feat_importance=pd.DataFrame()
sharpe_ratio=[]
    
for fold, (train_idx, val_idx) in enumerate(ts_fold.split(X, y)):
    
    print("\n========================== Fold {} ==========================".format(fold+1))
    X_train, y_train = X.iloc[train_idx,:], y[train_idx]
    X_valid, y_val = X.iloc[val_idx,:], y[val_idx]
    
    print("Train Date range: {} to {}".format(X_train.Date.min(),X_train.Date.max()))
    print("Valid Date range: {} to {}".format(X_valid.Date.min(),X_valid.Date.max()))
    
    X_train.drop(['Date','SecuritiesCode'], axis=1, inplace=True)
    X_val=X_valid[X_valid.columns[~X_valid.columns.isin(['Date','SecuritiesCode'])]]
    val_dates=X_valid.Date.unique()[1:-1]
    print("\nTrain Shape: {} {}, Valid Shape: {} {}".format(X_train.shape, y_train.shape, X_val.shape, y_val.shape))
    
    params = {'n_estimators': 500,
              'num_leaves' : 100,
              'learning_rate': 0.1,
              'colsample_bytree': 0.9,
              'subsample': 0.8,
              'reg_alpha': 0.4,
              'metric': 'mae',
              'random_state': 21}
    
    gbm = LGBMRegressor(**params).fit(X_train, y_train, 
                                      eval_set=[(X_train, y_train), (X_val, y_val)],
                                      verbose=300, 
                                      eval_metric=['mae','mse'])
    y_pred = gbm.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    mae = mean_absolute_error(y_val, y_pred)
    feat_importance["Importance_Fold"+str(fold)]=gbm.feature_importances_
    feat_importance.set_index(X_train.columns, inplace=True)
    
    rank=[]
    X_val_df=X_valid[X_valid.Date.isin(val_dates)]
    for i in X_val_df.Date.unique():
        temp_df = X_val_df[X_val_df.Date == i].drop(['Date','SecuritiesCode'],axis=1)
        temp_df["pred"] = gbm.predict(temp_df)
        temp_df["Rank"] = (temp_df["pred"].rank(method="first", ascending=False)-1).astype(int)
        rank.append(temp_df["Rank"].values)

    stock_rank=pd.Series([x for y in rank for x in y], name="Rank")
    df=pd.concat([X_val_df.reset_index(drop=True),stock_rank,
                  prices[prices.Date.isin(val_dates)]['Target'].reset_index(drop=True)], axis=1)
    sharpe=calc_spread_return_sharpe(df)
    sharpe_ratio.append(sharpe)
    print("Valid Sharpe: {}, RMSE: {}, MAE: {}".format(sharpe,rmse,mae))
    
    del X_train, y_train,  X_val, y_val
    gc.collect()
    
print("\nAverage cross-validation Sharpe Ratio: {:.4f}, standard deviation = {:.2f}.".format(np.mean(sharpe_ratio),np.std(sharpe_ratio)))

In [None]:

feat_importance['avg'] = feat_importance.mean(axis=1)
feat_importance = feat_importance.sort_values(by='avg',ascending=True)
pal=sns.color_palette("plasma_r", 29).as_hex()[2:]

fig=go.Figure()
for i in range(len(feat_importance.index)):
    fig.add_shape(dict(type="line", y0=i, y1=i, x0=0, x1=feat_importance['avg'][i], 
                       line_color=pal[::-1][i],opacity=0.7,line_width=4))
fig.add_trace(go.Scatter(x=feat_importance['avg'], y=feat_importance.index, mode='markers', 
                         marker_color=pal[::-1], marker_size=8,
                         hovertemplate='%{y} Importance = %{x:.0f}<extra></extra>'))
fig.update_layout(template=temp,title='Overall Feature Importance', 
                  xaxis=dict(title='Average Importance',zeroline=False),
                  yaxis_showgrid=False, margin=dict(l=120,t=80),
                  height=700, width=800)
fig.show()

In [None]:
cols_fin=feat_importance.avg.nlargest(3).index.tolist()
cols_fin.extend(('Open','High','Low'))
X_train=prices[cols_fin]
y_train=prices['Target']
gbm = LGBMRegressor(**params).fit(X_train, y_train)