# Paper Trading

This notebook is a skeleton for paper trading.  See issue https://github.com/CAMPSMITH/sabot/issues/10


## Assumptions
* there is an external wallet
* there is an external provider of data
* there is an external machine learning model (inference engine) that provides prediction / decision from 
* this program will be periodically invoked by a scheduler

## Design
on invocation
1 - get data for prediction
2 - get prediction / decision from machine learning model (inference engine)
4 - if trade is indicated
5 - get balance from wallet
6 - if funds available, execute trade
7 - if purchase executed, deduct wallet
8 - if sale executed, increment wallet

how much to transact in each trade (25%) of wallet?
amount adjustment for short position

In [4]:
# TODO: refactor into library file
# TODO: implement wallet in DDB

# The following is a facade for the wallet
class Wallet():

  def __init__(self, initial_balances):
        self.balances = {}
        for initial_balance in initial_balances:
            self.balances[initial_balance['currency']] = initial_balance['currency']
    
  def get_balances():
    return self.balances

  def __str__(self):
    for balance in self.balances:
        return f"Current {balance['currency']} balance is  {balance['amount']}"

  def add(self,currency,amount):
    if currency in self.balances:
        self.balances[currency] = {
            "currency": currency,
            "amount":self.balances[currency]['amount'] + amount
        }   
    else:
        raise Exception(f"{currency} is not a valid currency")

  def withdraw(self,currency,amount):
    if currency in self.balances:
        self.balances[currency] = {
            "currency": currency,
            "amount":self.balances[currency]['amount'] - amount
        }
    else:
        raise Exception(f"{currency} is not a valid currency")


In [None]:
# TODO: implement this

def get_trading_data():
    # return data needed to get a prediction from inference engine
    # should be properly scaled to make prediction
    return {} 

def get_prediction():
    return 1 # return data needed to get a prediction from inference engine

In [None]:
# Simulated paper trade

def run():
    # this method is called by the scheduler to run this process
    get_trading_data() # gets the data needed to make a prediction

In [None]:
# Reusable functions to create signals for ML training

# TODO: these will need to be refactored based on the shape of the dataframe
# TODO: refactor these functions to library file
# create helper methods to facilitate assessing permutations

def make_signals_df(short_window,long_window):
    # Filter the date index and close columns
    signals_df = ohlcv_df.loc[:, ["close"]]

    # Use the pct_change function to generate  returns from close prices
    signals_df["Actual Returns"] = signals_df["close"].pct_change()

    # Generate the fast and slow simple moving averages (4 and 100 days, respectively)
    signals_df['SMA_Fast'] = signals_df['close'].rolling(window=short_window).mean()
    signals_df['SMA_Slow'] = signals_df['close'].rolling(window=long_window).mean()
    
    # Drop all NaN values from the DataFrame
    signals_df = signals_df.dropna()
    
    # Initialize the new Signal column
    signals_df['Signal'] = 0.0

    # When Actual Returns are greater than or equal to 0, generate signal to buy stock long
    signals_df.loc[(signals_df['Actual Returns'] >= 0), 'Signal'] = 1

    # When Actual Returns are less than 0, generate signal to sell stock short
    signals_df.loc[(signals_df['Actual Returns'] < 0), 'Signal'] = -1    
    
    # Calculate the strategy returns and add them to the signals_df DataFrame
    signals_df['Strategy Returns'] = signals_df['Actual Returns'] * signals_df['Signal'].shift()
    
    return signals_df

def create_train_test_datasets(months):
    # calculate the trainind start and end based on the given training months
    training_begin = X.index.min()
    training_end = X.index.min() + DateOffset(months=months)

    # create the training features dataset X_train and training classigication labels y_train for the training timeframe
    X_train = X.loc[training_begin:training_end]
    y_train = y.loc[training_begin:training_end]

    # create the testing features dataset X_test and testing classigication labels y_test following the training timeframe
    X_test = X.loc[training_end+DateOffset(hours=1):]
    y_test = y.loc[training_end+DateOffset(hours=1):]
    
    # Use StandardScaler to scale the data.
    # Scale the features DataFrames

    # Create a StandardScaler instance
    scaler = StandardScaler()

    # Apply the scaler model to fit the X-train data
    X_scaler = scaler.fit(X_train)

    # Transform the X_train and X_test DataFrames using the X_scaler
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)  
    return X_train_scaled, y_train, X_test_scaled, y_test

def get_predictions(model,X_train_scaled,y_train,X_test_scaled):
    
    # Fit the model using the training data
    model = model.fit(X_train_scaled,y_train)

    # Use the testing dataset to generate the predictions for the new model
    # store these in disctionary associated with the model architecture being evaluated
    predictions = model.predict(X_test_scaled)

    return predictions
    
def backtest_model(model_name, y_test, y_predictions, actual_returns):
    
    # Use a classification report to evaluate the model using the predictions and testing data
    model_classification_report = classification_report(y_test, y_predictions)

    # Backtest model performance

    # Create a new empty predictions DataFrame.
    # Create a predictions DataFrame
    df=pd.DataFrame(index=y_test.index)

    # Add the alternate model predictions to the DataFrame
    df['Predicted'] = y_predictions

    # Add the actual returns to the DataFrame
    df['Actual Returns'] = actual_returns

    df = df.dropna()

    # Add the strategy returns to the DataFrame
    df['Strategy Returns'] = (df['Actual Returns'] * df['Predicted'])

    # Calculate the cumulative stategy return
    df['strategy_cum_return'] = (1 + df["Strategy Returns"]).cumprod()

    # Calculate the actual stategy return
    df['actual_cum_return'] = (1 + df["Actual Returns"]).cumprod()

#     # Plot the actual returns versus the strategy returns
#     cum_actual_strategy_plot = df[['actual_cum_return','strategy_cum_return']].plot(
#         figsize=(15,7),
#         title=f'Cumulative Returns Actual vs Strategy for model {model_name}'
#     )
#     # save plot
#     cum_actual_strategy_plot.figure.savefig(f'images/{model_name}_actual_vs_strategy_cum_returns.png', bbox_inches='tight')
    
    return df

In [None]:
# main loop
# TODO: Modify to fit data and looping needs

models = {}
# construct the model permutations and evaluate the model permutations
for name in alternate_models:
    returns = None
    max_return = 0
    selected_model = None
    for training_months in training_dataset_months:
        for short_window_size in short_window_sizes:
            for long_window_size in long_window_sizes:
                # create a key for this model permutation
                model_key = f"{name}-tr({training_months})-sw({short_window_size})-lw({long_window_size})"
                
                # configure model permutation
                models[model_key] = {
                    "model_name":name,
                    "training_months":training_months,
                    "short_window_size":short_window_size,
                    "long_window_size":long_window_size,
                    "model":alternate_models[name]["model"],
                }
                
                # create the signals data set with the actual returns, fast and slow SMA, signal and strategy returns
                models[model_key]['signals_df'] = make_signals_df(short_window_size,long_window_size)

                # create training and testing datasets
                models[model_key]['X_train_scaled'], models[model_key]['y_train'], models[model_key]['X_test_scaled'], models[model_key]['y_test'] = create_train_test_datasets(training_months)

                # get predictions
                models[model_key]['y_predictions'] =  get_predictions(
                    models[model_key]['model'], 
                    models[model_key]['X_train_scaled'], 
                    models[model_key]['y_train'], 
                    models[model_key]['X_test_scaled'])

                # Classification reports
                models[model_key]['classification_report'] = classification_report( models[model_key]['y_test'], models[model_key]['y_predictions'])

                # Print the classification report
                print(f"""
                {model_key} classification report: 
                ---------------------------------------------------------------------------
                {models[model_key]['classification_report']}
                ---------------------------------------------------------------------------
                """)                

                # backtest model
                models[model_key]['backtest'] = backtest_model(
                    model_key,
                    models[model_key]['y_test'],
                    models[model_key]['y_predictions'],
                    models[model_key]['signals_df'].loc[models[model_key]['y_test'].index.min():,'Actual Returns'],
                )   
                
                # add the cumulative return to the list of returns for plotting
                # add the actual and signal returns if the 
                if returns is None:
                    # This is the permutation for the model
                    # create the returns dataframe and add the actual returns and the signal returns
                    returns = {
                        "actual": (1 + models[model_key]['signals_df']['Actual Returns']).cumprod(),
                        "signal": (1 + models[model_key]['signals_df']['Strategy Returns']).cumprod()
                    }
                returns[model_key] = models[model_key]['backtest']['strategy_cum_return']
                if returns[model_key].iloc[-1] > max_return:
                    max_return = returns[model_key].iloc[-1]
                    selected_model = model_key
                    
    # create a plot for the family of returns for the range of training monts, and SMA window sizes
    returns_df = pd.DataFrame(returns)
    model_family_plot = returns_df.plot(
        figsize=(15,15),
        title=f'{name} Cumulative Returns for various training and SMA window sizes'
    )
    
    # save plot
    model_family_plot.figure.savefig(f'images/{name}_returns.png', bbox_inches='tight')

    # show the final returns for the family
    display(returns_df.iloc[-1:,:].T)

    # show the max return achieved with the model
    print(f"maximum cumulative return for {name} models was {max_return} from model permutation {selected_model}")