In [1]:
import os
import shutil
import sys
import pickle

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import lightgbm as lgbm

In [2]:
if not os.path.exists(r"./Features.py"):
    shutil.copyfile(r"../input/codejpx/Features.py", r"./Features.py")
if not os.path.exists(r"./Preprocessing.py"):
    shutil.copyfile(r"../input/codejpx/Preprocessing.py", r"./Preprocessing.py")
if not os.path.exists(r"./Trackers.py"):
    shutil.copyfile(r"../input/codejpx/Trackers.py", r"./Trackers.py")
if not os.path.exists(r"./Validation.py"):
    shutil.copyfile(r"../input/codejpx/Validation.py", r"./Validation.py")

In [3]:
import Features
from Preprocessing import StockDataPreprocessor
from Trackers import StateTracker

## Specify Features

In [4]:
features = [Features.Amplitude(), Features.OpenCloseReturn(), Features.Return(), 
            Features.Volatility(10), Features.Volatility(30), Features.Volatility(50), 
            Features.SMA("Close", 3), Features.SMA("Close", 5), Features.SMA("Close", 10),
            Features.SMA("Close", 30),
            Features.SMA("Return", 3), Features.SMA("Return", 5), 
            Features.SMA("Return", 10), Features.SMA("Return", 30),
           ]

In [5]:
st = StateTracker(features)

In [6]:
## Specify training columns and specify categorical columns
training_cols = ['SecuritiesCode', 'Open', 'High', 'Low', 'Close',
                 'Volume', 'AdjustmentFactor', 'ExpectedDividend', 
                 'SupervisionFlag']

for feature in features:
    training_cols.append(feature.name)

categorical_cols = ["SecuritiesCode", "SupervisionFlag"]
target_col = ["Target"]

In [7]:
with open(r"../input/lgbm001/lgbm.pickle", "rb") as file:
    model = pickle.load(file)

In [8]:
def get_ranks(prices):
    prices = st.online_update_apply(prices)[training_cols]
    if not prices["SecuritiesCode"].is_monotonic_increasing:
        prices = prices.sort_values(by="SecuritiesCode")
    predictions = model.predict(prices[training_cols])
    ranks = np.arange(1999, -1, -1)
    zipped = list(zip(predictions, ranks))
    zipped.sort(key=lambda x: -x[0])
    sorted_predictions, sorted_ranks = map(list, zip(*zipped))
    
    return sorted_predictions, sorted_ranks

In [9]:
class Algo:
    
    def __init__(self, model, state_tracker):
        self.model = model
        self.st = state_tracker
        self.cols = ['SecuritiesCode', 'Open', 'High', 'Low', 'Close',
                 'Volume', 'AdjustmentFactor', 'ExpectedDividend', 
                 'SupervisionFlag']
        
        for feature in self.st.local_features[1301]:
            self.cols.append(feature.name)
    
    def add_rank(self, df):
        df = df.sort_values(by = "Prediction", ascending=False)
        df['Rank'] = np.arange(len(df))
        df = df.sort_values(by = "SecuritiesCode", ascending=True)
        return df
    
    def predict_offline(self, prices):
        prices = prices[self.cols]
        prices["Prediction"] = self.model.predict(prices)
        return self.add_rank(prices)
    
    def predict_offline_full(self, prices):
        return prices.groupby("Date").apply(lambda subdf: self.predict_offline(subdf))
    
    def add_rank1(self, df):
        predictions = df["Prediction"]
        ranks = np.arange(2000)
        zipped = list(zip(predictions, ranks))
        zipped.sort(key=lambda x: -x[0])
        sorted_predictions, sorted_ranks = map(list, zip(*zipped))
        df["Rank"] = sorted_ranks
        return df
    
    def predict_online(self, prices, options, financials, trades, secondary_prices):
        prices = st.online_update_apply(prices)[self.cols]
        if not prices["SecuritiesCode"].is_monotonic_increasing:
            prices = prices.sort_values(by="SecuritiesCode")
        prices["Prediction"] = self.model.predict(prices)
        return self.add_rank1(prices)

In [10]:
algo = Algo(model, st)

In [11]:
import jpx_tokyo_market_prediction

In [12]:
env = jpx_tokyo_market_prediction.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test files

for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    
    if not sample_prediction["SecuritiesCode"].is_monotonic_increasing:
        sample_prediction = sample_prediction.sort_values("SecuritiesCode")
    
    sample_prediction['Rank'] = algo.predict_online(prices, options, financials,
                                                    trades, secondary_prices)['Rank']  # make your predictions here
    env.predict(sample_prediction)   # register your predictions

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


In [13]:
df_stocks

NameError: name 'df_stocks' is not defined