In [None]:
import warnings, gc
import os
import numpy as np 
import pandas as pd
import matplotlib.colors
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode
from datetime import datetime, timedelta
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error,mean_absolute_error
from lightgbm import LGBMRegressor
from decimal import ROUND_HALF_UP, Decimal
warnings.filterwarnings("ignore")
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 500)

init_notebook_mode(connected=True)
temp = dict(layout=go.Layout(font=dict(family="Franklin Gothic", size=12), width=800))
colors=px.colors.qualitative.Plotly

train=pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv", parse_dates=['Date'])
TRAIN_DIR = "../input/jpx-tokyo-stock-exchange-prediction/train_files"
SUP_DIR = "../input/jpx-tokyo-stock-exchange-prediction/supplemental_files"

print(f"The training data begins on {train.Date.min()} and ends on {train.Date.max()}.\n")
display(train.describe().style.format('{:,.2f}'))

In [None]:
%%time

df_prices = pd.read_csv(os.path.join(TRAIN_DIR, 'stock_prices.csv'), parse_dates=['Date'])
df_prices_sec = pd.read_csv(os.path.join(TRAIN_DIR, 'secondary_stock_prices.csv'), parse_dates=['Date'])
#df_fins = pd.read_csv(os.path.join(TRAIN_DIR, 'financials.csv'))
df_opts = pd.read_csv(os.path.join(TRAIN_DIR, 'options.csv'),parse_dates=['Date'])
#df_trades = pd.read_csv(os.path.join(TRAIN_DIR, 'trades.csv'))
stock_list = pd.read_csv(os.path.join("../input/jpx-tokyo-stock-exchange-prediction/stock_list.csv"))

Supplemental files

In [None]:
supplemental_prices=pd.read_csv(os.path.join(SUP_DIR, 'stock_prices.csv'), parse_dates=['Date'])
supplemental_prices_sec = pd.read_csv(os.path.join(SUP_DIR, 'secondary_stock_prices.csv'), parse_dates=['Date'])
supplemental_opts = pd.read_csv(os.path.join(SUP_DIR, 'options.csv'),parse_dates=['Date'])


In [None]:
df_prices = pd.concat([df_prices, supplemental_prices])
df_prices_sec = pd.concat([df_prices_sec, supplemental_prices_sec])
df_opts= pd.concat([df_opts, supplemental_opts])

In [None]:
def summarize(df, file_name, n_rows_to_show=5):
    """Simply summarize the given DataFrame.
    
    Parameters:
        df: pd.DataFrame, raw DataFrame
        file_name: str, name of the file
        n_rows_to_show: int, number of rows to show 
    """
    print(f"=====Summary of {file_name}=====")
    print(f"Shape: {df.shape}")
    print(f"The column data types are as follows: \n{df.dtypes}\n")
    
    nan_ratio = pd.isna(df).sum() / len(df) * 100
    nan_ratio.sort_values(ascending=False, inplace=True)
    nan_ratio = nan_ratio.to_frame(name='NaN Ratio').T
    print("NaN ratio:")
    display(nan_ratio)   
    
    display(df.head(n_rows_to_show))

In [None]:
summarize(df_prices, "stock_prices.csv")

Add new **Price Difference** Column

In [None]:
df_prices['PriceDifference'] = df_prices.apply(lambda x: x.Open - x.Close, axis=1)

Look at the expected dividend and the securities that have dividends paid out

In [None]:
df_ExDiv= df_prices[df_prices["ExpectedDividend"].notnull()]
print(df_ExDiv.shape)
print(df_ExDiv["SecuritiesCode"].nunique())
df_ExDiv.sort_values(by=['ExpectedDividend']).head(20)
df_prices[ (df_prices["ExpectedDividend"]>0) ].sort_values(by=['ExpectedDividend']).head(5)

In [None]:
#Plot too small. Barely visible.



##x=df_prices["Date"].unique()
x=df_prices[df_prices['SecuritiesCode']== 2590]["Date"]
y=df_prices[df_prices['SecuritiesCode']== 1301]["Target"]
            
y2=df_prices[df_prices['SecuritiesCode']==1301]["ExpectedDividend"]/1000

plt.plot(x, y, label = "Target", linestyle="-")
plt.bar(x, y2, alpha=1,width=10,  label="Expected Dividend",color="Orange" )
#plt.plot(x, y2, label = "ExpectedDividend", linestyle="solid")
plt.legend()
plt.rcParams['figure.figsize'] = [20, 20]
plt.show()

In [None]:
#!!!!!!!Not working properly. Not seeing individual stocks 


#x = df_prices[df_prices['SecuritiesCode']== 1301]["Date"]
#y=df_prices[df_prices['SecuritiesCode']== 1301]["Target"]       
#y2=df_prices[df_prices['SecuritiesCode']==1301]["ExpectedDividend"]/1000
x=df_prices["Date"].unique()
securities=df_prices.SecuritiesCode.unique().tolist()
securities.insert(0, 'All')

 
Target=df_prices.groupby('Date')['Target'].median()
ExpectedDiv=df_prices.groupby('Date')['ExpectedDividend'].median()

# Target=df_prices[df_prices['SecuritiesCode']== 1301]["Target"] 
# ExpectedDiv =df_prices[df_prices['SecuritiesCode']==1301]["ExpectedDividend"]/1000
buttons=[]

fig = go.Figure()
fig = make_subplots(rows=1, cols=1, 
                    shared_xaxes=True,shared_yaxes=True)

#Remove grouping by date and mean
for i in range(5):
    if i != 0:

        Target=df_prices[df_prices['SecuritiesCode']== securities[i]]["Target"]
        ExpectedDiv =df_prices[df_prices['SecuritiesCode']==securities[i]]["ExpectedDividend"]
    

    
    trace0 = go.Scatter(
        x = x,
        y = Target,
        mode = "lines",
        line = dict(color = 'green'),# type of plot like marker, line or line + markers
        name = "Target",#name of the plots
        opacity=0.4, 
        text= Target)
    trace1 = go.Bar(
        x= x,
        y= ExpectedDiv,
        name= 'Expected Dividend',
        marker_color='red',
        opacity=1,
          #width = 100,
        text= ExpectedDiv)
    
    visibility=[False]*len(securities)
    visibility[i]=True
    button = dict(label = securities[i],
                  method = "update",
                  args=[{"visible": visibility}])
    buttons.append(button)



#data = [trace0, trace1];

"""layout = {
  'xaxis': {'title': 'Date'},
  'title': 'Target and Expected Dividend',
  'plot_bgcolor' :'white'
   
};
"""
fig.update_xaxes(rangeslider_visible=True,
                 rangeselector=dict(
                     buttons=list([
                         dict(count=6, label="6m", step="month", stepmode="backward"),
                         dict(count=1, label="1y", step="year", stepmode="backward"),
                         dict(count=2, label="2y", step="year", stepmode="backward"),
                         dict(step="all")]),
                  xanchor='left',yanchor='bottom', y=1.19, x=.01))

#fig = go.Figure(dict(data = data, layout = layout))

fig.update_layout(template=temp,title='Stock Price Movements by Sector', 
                  hovermode='x unified', showlegend=True, width=1000,
                  updatemenus=[dict(active=0, type="dropdown",
                                    buttons=buttons, xanchor='left',
                                    yanchor='bottom', y=1, x=.01)],
                  yaxis=dict(title='Target/Expected Dividend'))

fig.add_trace(trace0, row=1, col=1)
fig.add_trace(trace1, row=1, col=1)

#fig.update_layout(layout)

fig.show()

In [None]:
#not working either


x=df_prices[df_prices['SecuritiesCode']== 2590]["Date"]
y=df_prices[df_prices['SecuritiesCode']== 1301]["Target"]       
y2=df_prices[df_prices['SecuritiesCode']==1301]["ExpectedDividend"]/1000
#sectors=train_df.SectorName.unique().tolist()

train_date=train.Date.unique()
returns=train.groupby('Date')['Target'].mean().mul(100).rename('Average Return')
close_avg=train.groupby('Date')['Close'].mean().rename('Closing Price')
vol_avg=train.groupby('Date')['Volume'].mean().rename('Volume')

fig = make_subplots(rows=2, cols=1, 
                    shared_xaxes=True)
for i, j in enumerate([y,y2]):
    fig.add_trace(go.Scatter(x=x, y=j, mode='lines',
                             name=j.name, marker_color=colors[i]), row=i+1, col=1)
fig.update_xaxes(rangeslider_visible=False,
                 rangeselector=dict(
                     buttons=list([
                         dict(count=6, label="6m", step="month", stepmode="backward"),
                         dict(count=1, label="1y", step="year", stepmode="backward"),
                         dict(count=2, label="2y", step="year", stepmode="backward"),
                         dict(step="all")])),
                 row=1,col=1)
fig.update_layout(template=temp,title='Target and Expected Divided ploted for the security', 
                  hovermode='x unified', height=700, 
                  yaxis1=dict(title='Target', ticksuffix='%'), 
                  yaxis2_title='Expected Dividend', 
                  showlegend=False)
fig.show()

Get the Max stock ratio which is the ratio of stocks with maximum dates over all the stocks

In [None]:
DatesPerStock= df_prices.groupby(["SecuritiesCode"])["Date"].count().sort_values()
StocksWithMax= (DatesPerStock== DatesPerStock.max()).sum()
MaxStockRatio=StocksWithMax/len(DatesPerStock)*100
print(
    f"The number of stocks with the maximum dates are {StocksWithMax}.\n"
     f"The Max Stock Ratio is {MaxStockRatio}")


Get the maximum date ratio which is a ratio of the Dates when all the stocks are listed over all the dates

In [None]:
StocksPerDate= df_prices.groupby(["Date"])["SecuritiesCode"].count().sort_values()
DatesWithAll= (StocksPerDate == StocksPerDate.max()).sum()
DatesWithMin=(StocksPerDate == StocksPerDate.min()).sum()
MaxDatesRatio=DatesWithAll/len(StocksPerDate)*100
print(f"The max number of stocks in a day is {StocksPerDate.max()}.\n"
    f"The min number of stocks in a day is {StocksPerDate.min()}.\n"
    f"The number of dates with the maximum stocks listed are {DatesWithAll}.\n"
     f"The Max Date Ratio is {MaxDatesRatio}")

Get the number of samples without a Close price

In [None]:
df_no_close_prices = df_prices[df_prices['Close'].isna()]
print(f"Number of samples without prices: {len(df_no_close_prices)}")

Drop all rows without a close proce as this happened on Oct 1st when the Japan Stock Exchange system was down hence no trading

In [None]:
df_train = df_prices.dropna(subset=['Close', 'Target'])
print(f"Close and Target columns have been dropped")

In [None]:
summarize(df_train, "Train Data")

Add code to view the effect of the supervison flag

In [None]:
#View effect of supervision flag

# **Stock List**

In [None]:
summarize(stock_list,"stock List")

In [None]:
sorted(stock_list['33SectorName'].unique())

Add the section/products column,33SectorName, and Issued shares from the stocklist to df_train

In [None]:
# stock_list['SectorName']=[i.rstrip().lower().capitalize() for i in stock_list['17SectorName']]
# stock_list['Name']=[i.rstrip().lower().capitalize() for i in stock_list['Name']]
df_train = df_train.merge(stock_list[['SecuritiesCode','Section/Products','33SectorName','IssuedShares']], on='SecuritiesCode', how='left')

In [None]:
summarize(df_train,"new df_train")

# Options Table

In [None]:
df_opts[df_opts["Date"]>"2020-01-08"].head(3)

In [None]:
summarize(df_opts,"options")

In [None]:
df_opts["DailyVolumeAverage"]=df_opts.groupby('Date')['TradingVolume'].transform('mean')
df_opts["DailyTheoPriceAvg"]=df_opts.groupby('Date')['TheoreticalPrice'].transform('mean')
#Drop duplicate Dates
df_optsT = df_opts.drop_duplicates(subset = ["Date"])

df_optsT["5RollingVolume"]=df_optsT['DailyVolumeAverage'].transform(lambda x: x.rolling(5).mean())
df_optsT["VolumeChange"]=df_optsT['DailyVolumeAverage'].transform('pct_change')
df_optsT["5ExpRolVolume"]=df_optsT['DailyVolumeAverage'].transform(lambda x: x.ewm(span=5,adjust=False).mean())


df_optsT["5RollingTheoPrice"]=df_optsT['DailyTheoPriceAvg'].transform(lambda x: x.rolling(5).mean())
df_optsT["25RollingTheoPrice"]=df_optsT['DailyTheoPriceAvg'].transform(lambda x: x.rolling(25).mean())
df_optsT["TheoPriceChange"]=df_optsT['DailyTheoPriceAvg'].transform(lambda x: x.pct_change())
df_optsT["5ExpTheoPrice"]=df_optsT['DailyTheoPriceAvg'].transform(lambda x: x.ewm(span=5,adjust=False).mean())
df_optsT["25ExpTheoPrice"]=df_optsT['DailyTheoPriceAvg'].transform(lambda x: x.ewm(span=25,adjust=False).mean())

df_optsT

#df_opts.loc[:,"Volume Change1"] = df_opts.groupby("Date")["DailyVolumeAverage"].pct_change()
#df_opts.loc[:,"5MovingAvg"] = df_opts.groupby("Date")["DailyVolumeAverage"].rolling(window=5).mean().values
#df.loc[:,f"ExpMovingAvg_{period}Day"] = df.groupby("SecuritiesCode")[col].ewm(span=period,adjust=False).mean().values

# Rolling mean calculation does not seem right at all!!!!!!!!
# Confirm if rolling first then average and average the roll give same value??????


#df_opts["5RollingVolMean"]=df_opts.groupby('OptionsCode')['TradingVolume'].transform(lambda x: x.rolling(5).mean())
#df_opts["Volume Change1"]=df_opts['DailyVolumeAverage'].transform(lambda x: x.rolling(5).mean())
#df_opts["AllRollVolumeAverage"]=df_opts.groupby('Date')['5RollingVolMean'].transform('mean')


In [None]:
summarize(df_optsT, "df_optsT")

In [None]:
df_train = df_train.merge(df_optsT[['Date','25ExpTheoPrice','5ExpTheoPrice','TheoPriceChange','25RollingTheoPrice','5RollingTheoPrice','5ExpRolVolume','VolumeChange','5RollingVolume','DailyTheoPriceAvg','DailyVolumeAverage']], on='Date', how='left')

# Secondary Stocks

In [None]:
summarize(df_prices_sec, "secondary_stock_prices.csv")

In [None]:
n_dates = df_prices_sec['Date'].nunique()
date_min, date_max = df_prices_sec['Date'].min(), df_prices['Date'].max()
print(f"Number of unique dates: {n_dates} ({date_min} ~ {date_max})")

In [None]:
def adjust_price(price):
    """
    Args:
        price (pd.DataFrame)  : pd.DataFrame include stock_price
    Returns:
        price DataFrame (pd.DataFrame): stock_price with generated AdjustedClose
    """
    # transform Date column into datetime
    price.loc[: ,"Date"] = pd.to_datetime(price.loc[: ,"Date"], format="%Y-%m-%d")

    def generate_adjusted_close(df):
        """
        Args:
            df (pd.DataFrame)  : stock_price for a single SecuritiesCode
        Returns:
            df (pd.DataFrame): stock_price with AdjustedClose for a single SecuritiesCode
        """
        # sort data to generate CumulativeAdjustmentFactor
        df = df.sort_values("Date", ascending=False)
        # generate CumulativeAdjustmentFactor
        df.loc[:, "CumulativeAdjustmentFactor"] = df["AdjustmentFactor"].cumprod()
        # generate AdjustedClose
        df.loc[:, "AdjustedClose"] = (
            df["CumulativeAdjustmentFactor"] * df["Close"]
        ).map(lambda x: float(
            Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
        ))
        # reverse order
        df = df.sort_values("Date")
        # to fill AdjustedClose, replace 0 into np.nan
        df.loc[df["AdjustedClose"] == 0, "AdjustedClose"] = np.nan
        # forward fill AdjustedClose
        df.loc[:, "AdjustedClose"] = df.loc[:, "AdjustedClose"].ffill()
        return df
    
    # generate AdjustedClose
    price = price.sort_values(["SecuritiesCode", "Date"])
    price = price.groupby("SecuritiesCode").apply(generate_adjusted_close).reset_index(drop=True)
    return price


df_prices_sec=adjust_price(df_prices_sec)

In [None]:
df_prices_sec["DailyAverageClose"]=df_prices_sec.groupby('Date')['AdjustedClose'].transform('mean')

#df_prices_sec["DailyAverageTarget"]=df_prices_sec.groupby('Date')['AdjustedClose'].transform('median')

In [None]:
df_prices_secT = df_prices_sec.drop_duplicates(subset = ["Date"])

#df_prices_secT

In [None]:
df_prices_sec = df_prices_sec.dropna(subset=['Close', 'Target'])

In [None]:
def create_features(df):
    df=df.copy()
    col="DailyAverageClose"
    periods=[5,20,50]
    for period in periods:
        df[f"Sec{period}MovingAvgClose"]=df[col].transform(lambda x: x.rolling(period).mean())
        df[f"Sec{period}CloseChange"]=df[col].transform(lambda x: x.pct_change(period))
        df[f"Sec{period}ExpMovClose"]=df[col].transform(lambda x: x.ewm(span=period,adjust=False).mean())
        #df[f"Sec{period}ExpMovCloseMedian"]=df[col].transform(lambda x: x.ewm(span=period,adjust=False).median())
        df[f"Sec{period}MovingMedianClose"]=df[col].transform(lambda x: x.rolling(period).median())
      
    return df

df_prices_secT=create_features(df=df_prices_sec)

In [None]:
df_prices_secT.columns

Already Droped all duplicate date records to allow for easy merge

In [None]:
'''
df_train = df_train.merge(df_prices_secT[['Date','DailyAverageClose', 'Sec5MovingAvgClose', 'Sec5CloseChange',
       'Sec5ExpMovClose', 'Sec5MovingMedianClose', 'Sec20MovingAvgClose',
       'Sec20CloseChange', 'Sec20ExpMovClose', 'Sec20MovingMedianClose',
       'Sec50MovingAvgClose', 'Sec50CloseChange', 'Sec50ExpMovClose',
       'Sec50MovingMedianClose']], on='Date', how='left')
       
'''

In [None]:
price_features = df_train

# **Model Training**

In [None]:
def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size: int = 200, toprank_weight_ratio: float = 2) -> float:
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): sharpe ratio
    """
    def _calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
        """
        Args:
            df (pd.DataFrame): predicted results
            portfolio_size (int): # of equities to buy/sell
            toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
        Returns:
            (float): spread return
        """
        assert df['Rank'].min() == 0
        assert df['Rank'].max() == len(df['Rank']) - 1
        weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
        purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
        short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        return purchase - short

    buf = df.groupby('Date').apply(_calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio

In [None]:
price_features=price_features[price_features.Date>"2020-12-23"]

In [None]:
price_features.drop(columns= ['25ExpTheoPrice','5ExpTheoPrice','TheoPriceChange','25RollingTheoPrice','5RollingTheoPrice','5ExpRolVolume','VolumeChange','5RollingVolume','DailyTheoPriceAvg','DailyVolumeAverage','Section/Products','33SectorName','IssuedShares','PriceDifference','RowId', 'Section/Products', '33SectorName'],inplace=True)


In [None]:
summarize(price_features,"test")

In [None]:
price_features['Date']=price_features['Date'].astype(str)

In [None]:
price_features.shape

In [None]:
ts_fold = TimeSeriesSplit(n_splits=10)
prices=price_features.dropna().sort_values(['Date','SecuritiesCode'])
y=prices['Target'].to_numpy()
X=prices.drop(['Target'],axis=1)

feat_importance=pd.DataFrame()
sharpe_ratio=[]
    
for fold, (train_idx, val_idx) in enumerate(ts_fold.split(X, y)):
    
    print("\n========================== Fold {} ==========================".format(fold+1))
    X_train, y_train = X.iloc[train_idx,:], y[train_idx]
    X_valid, y_val = X.iloc[val_idx,:], y[val_idx]
    
    print("Train Date range: {} to {}".format(X_train.Date.min(),X_train.Date.max()))
    print("Valid Date range: {} to {}".format(X_valid.Date.min(),X_valid.Date.max()))
    
    X_train.drop(['Date','SecuritiesCode'], axis=1, inplace=True)
    X_val=X_valid[X_valid.columns[~X_valid.columns.isin(['Date','SecuritiesCode'])]]
    val_dates=X_valid.Date.unique()[1:-1]
    print("\nTrain Shape: {} {}, Valid Shape: {} {}".format(X_train.shape, y_train.shape, X_val.shape, y_val.shape))
    
    params = {'n_estimators': 500,
              'num_leaves' : 100,
              'learning_rate': 0.1,
              'colsample_bytree': 0.9,
              'subsample': 0.8,
              'reg_alpha': 0.4,
              'metric': 'mae',
              'random_state': 21}
    
    gbm = LGBMRegressor(**params).fit(X_train, y_train, 
                                      eval_set=[(X_train, y_train), (X_val, y_val)],
                                      verbose=300, 
                                      eval_metric=['mae','mse'])
    y_pred = gbm.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    mae = mean_absolute_error(y_val, y_pred)
    feat_importance["Importance_Fold"+str(fold)]=gbm.feature_importances_
    feat_importance.set_index(X_train.columns, inplace=True)
    
    rank=[]
    X_val_df=X_valid[X_valid.Date.isin(val_dates)]
    for i in X_val_df.Date.unique():
        temp_df = X_val_df[X_val_df.Date == i].drop(['Date','SecuritiesCode'],axis=1)
        temp_df["pred"] = gbm.predict(temp_df)
        temp_df["Rank"] = (temp_df["pred"].rank(method="first", ascending=False)-1).astype(int)
        rank.append(temp_df["Rank"].values)

    stock_rank=pd.Series([x for y in rank for x in y], name="Rank")
    df=pd.concat([X_val_df.reset_index(drop=True),stock_rank,
                  prices[prices.Date.isin(val_dates)]['Target'].reset_index(drop=True)], axis=1)
    sharpe=calc_spread_return_sharpe(df)
    sharpe_ratio.append(sharpe)
    print("Valid Sharpe: {}, RMSE: {}, MAE: {}".format(sharpe,rmse,mae))
    
    del X_train, y_train,  X_val, y_val
    gc.collect()
    
print("\nAverage cross-validation Sharpe Ratio: {:.4f}, standard deviation = {:.2f}.".format(np.mean(sharpe_ratio),np.std(sharpe_ratio)))

In [None]:
import jpx_tokyo_market_prediction
env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()

cols=['Date','SecuritiesCode','Open','High','Low','Close','Volume','AdjustmentFactor']
train=train[train.Date>='2021-08-01'][cols]

counter = 0
for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:

    current_date = prices["Date"].iloc[0]
    if counter == 0:
        df_price_raw = train.loc[train["Date"] < current_date]
    df_price_raw = pd.concat([df_price_raw, prices[cols]]).reset_index(drop=True)
    df_price = adjust_price(df_price_raw)
    features = create_features(df=df_price)
    feat = features[features.Date == current_date][cols_fin]
    feat["pred"] = gbm.predict(feat)
    feat["Rank"] = (feat["pred"].rank(method="first", ascending=False)-1).astype(int)
    sample_prediction["Rank"] = feat["Rank"].values
    display(sample_prediction.head())
    
    assert sample_prediction["Rank"].notna().all()
    assert sample_prediction["Rank"].min() == 0
    assert sample_prediction["Rank"].max() == len(sample_prediction["Rank"]) - 1
    
    env.predict(sample_prediction)
    counter += 1