# Workflow

In [1]:
%load_ext autoreload

%autoreload 2

In [2]:
import warnings

import pandas as pd

from scripts.data_repo import DataRepository
from scripts.transform import TransformData
from scripts.train import SimulationParams, TrainModel

Workflow configuration

In [3]:
# Set the local data folder
LOCAL_DATA_FOLDER = "<path to local folder>/data/"

In [4]:
# Set the way of data load: set FETCH_REPO = True for full data load, if False - existing file from disk will be loaded.
FETCH_REPO = False

In [5]:
# if True, data will be transformed into a single dataset, if False, the dataset will be loaded from the local storage
TRANSFORM_DATA = False

In [6]:
# if True, the model will be trained, if False, the model will be loaded from the local storage
TRAIN_MODEL = False

## Step 1: Getting data from APIs or Load from disk

In [7]:
repo = DataRepository()

if FETCH_REPO:
    # Fetch All 3 datasets for all dates from APIs
    repo.fetch()
    # save data to a local dir
    repo.persist(data_dir=LOCAL_DATA_FOLDER)
else:
    # OR Load from disk
    repo.load(data_dir=LOCAL_DATA_FOLDER)

## Step2: Transform data into one dataframe

In [8]:
transformed = TransformData(repo=repo)

if TRANSFORM_DATA:
    transformed.transform()
    transformed.persist(data_dir=LOCAL_DATA_FOLDER)
else:
    transformed.load(data_dir=LOCAL_DATA_FOLDER)

## Step3: Train/Load Model

In [9]:
# Suppress all warnings (not recommended in production unless necessary)
warnings.filterwarnings("ignore")

trained = TrainModel(transformed=transformed)

if TRAIN_MODEL:
    trained.prepare_dataframe()  # prepare dataframes
    trained.train_random_forest()  # train the model
    trained.persist(data_dir=LOCAL_DATA_FOLDER)  # save the model to disk
else:
    trained.prepare_dataframe()  # prepare dataframes (incl. for inference)
    trained.load(data_dir=LOCAL_DATA_FOLDER)

Prepare the dataframe: define feature sets, add dummies, temporal split
length: X_train (310371, 352),  X_validation (76372, 352), X_test (76947, 352)
  X_train_valid = (386743, 352),  all combined: X_all (463690, 352)


## Step4: Inference

Inference and simulation settings

In [10]:
sim_params = SimulationParams(
    initial_capital = 10000,        # initial capital = $10k
    threshold = 0.55,               # select all binary predictions with probability>=0.55
    fees = 0.002,                   # trading fees = 0.2% (buy+sell)
    top_k = 5,                      # select top_k predictions
    portfolio_optimization=False,   # DOES NOT WORK now
    stop_loss = 0.8,                # automatic sell (with loss) if price (any of next 5 days) is lower than -5% from Adj.Close
    take_profit = 1.02,             # automatic sell (with profit) if price (any of next 5 days) is higher than +20% from Adj.Close
    lower_entry = 0.99       # buy next day with the price = [Adj.Close] * 0.995 (try to buy cheaper)
)

In [11]:
trained.make_inference(sim_params)

Making inference


In [12]:
print('Results of the estimation (last 10):')
# Set display options to prevent truncation
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

predicted_signals = trained.get_last_signals(num=10)

predicted_signals.tail(10)

Results of the estimation (last 10):


Unnamed: 0,Adj Close,Ticker,Date,pred_rf_best,pred_rf_best_rank
5458,3531.600098,LT.NS,2024-06-24 00:00:00+00:00,1,4.0
6396,126.089996,NVDA,2024-06-25 00:00:00+00:00,1,1.0
9650,139.785324,ORCL,2024-06-27 00:00:00+00:00,1,1.0
9650,446.950012,MSFT,2024-06-28 00:00:00+00:00,1,1.0
7373,1022.72998,ASML,2024-06-28 00:00:00+00:00,1,2.0
6299,408.0,OR.PA,2024-07-04 00:00:00+00:00,1,1.0
5463,1423.050049,BHARTIARTL.NS,2024-07-04 00:00:00+00:00,1,2.0
7159,839.299988,SBIN.NS,2024-07-04 00:00:00+00:00,1,3.0
6305,724.799988,MC.PA,2024-07-12 00:00:00+00:00,1,1.0
8375,675.5,CDI.PA,2024-07-12 00:00:00+00:00,1,2.0


In [13]:
res, capital = trained.simulate(sim_params)

SIMULATION STARTED
Simulations params: SimulationParams(initial_capital=10000, threshold=0.55, fees=0.002, top_k=5, portfolio_optimization=False, stop_loss=0.8, take_profit=1.02, lower_entry=0.99)
 Count bids 2344 in total, avg.bids per day 4.0,  filled bids 556, fill bids percent = 0.23720136518771331
  Stop loss events: count = 5, net loss = -61.65552172436992 
  Take profit events: count = 408, net profit = 6326.845394636592 
  Start capital = 10000, Resulting capital: 14232.156879391594 
  CAGR in 4 years: 1.092 or 9.22 % of avg. growth per year
