In [1]:
from typing import List, Tuple, Optional, Mapping
import pandas as pd
import numpy as np

In [None]:
class IndicesSelector():
    def __init__(self):
        pass

    def select(self, df: pd.DataFrame) -> List[str]:
        pass

In [None]:
class PairsSelector():
    def __init__(self):
        pass

    def select(self, df: pd.DataFrame) -> List[List[str]]:
        pass

In [None]:
class Predictor():
    def __init__(self):
        pass
    
    def train(self, data: pd.DataFrame, params: Optional[Mapping] = None):
        pass

    def predict(self, data: pd.DataFrame, params: Optional[Mapping] = None) -> pd.DataFrame:
        pass

    def periodic_train_predict(self, data: pd.DataFrame, params: Optional[Mapping] = None) -> pd.DataFrame:
        pass

In [None]:
class NaivePredictor(Predictor):
    def __init__(self):
        pass

    def train(self, data, params: Optional[Mapping] = None):
        pass

    def predict(self, data: pd.DataFrame, params: Optional[Mapping] = None) -> pd.DataFrame:
        if params is None:
            params  = {
                'period': 5
            }
        return data.shift(params.period)

    def periodic_train_predict(self, data: pd.DataFrame, params: Optional[Mapping] = None) -> pd.DataFrame:
        return self.predict(data, params)
    

In [None]:
class SignalGenerator():
    def __init__(self):
        pass

    def generate(self, pairs: List[str], price_df: pd.DataFrame, predict_df: pd.DataFrame, params: Optional[Mapping] = None) -> pd.DataFrame:
        pass

In [None]:
class PercentileCurrent(SignalGenerator):
    def __init__(self):
        pass

    def generate(self, pairs: List[str], price_df: pd.DataFrame, predict_df: pd.DataFrame, params: Optional[Mapping] = None) -> pd.DataFrame:
        if params is None:
            params = {
                'holding_period': '1M',
                'distribution_period': '2M'
            }
        holding_period = params.holding_period
        distribution_period = params.distribution_period
        signal_df = pd.DataFrame()
        for pair in pairs:
            col = ','.join(pair)
            signal_df[col] = price_df[pair[0]] - price_df[pair[1]]
            signal_df[col] = signal_df[col].rolling(distribution_period).apply(lambda x: pd.Series(x).rank(pct=True).iloc[-1]) - 0.5
            signal_df[col].resample(holding_period).first()
        signal_df['pair'] = np.abs(signal_df).idxmax(axis=1)    
        
        assets = price_df.columns
        strategy = [[None] * len(assets)]

        def process_signal(x):
            if x['pair'] not in assets:
                strategy.append([None] * len(assets))
                buy_symbol = x['pair'].split(',')[0]
                sell_symbol = x['pair'].split(',')[1]
            if x['pair'] > 0:
                strategy[-1][assets.index(buy_symbol)] = True 
                strategy[-1][assets.index(sell_symbol)] = False 
                strategy.append([None] * len(assets))
                strategy[-1][assets.index(buy_symbol)] = False 
                strategy[-1][assets.index(sell_symbol)] = True 
            else:
                strategy[-1][assets.index(buy_symbol)] = False 
                strategy[-1][assets.index(sell_symbol)] = True 
                strategy.append([None] * len(assets))
                strategy[-1][assets.index(buy_symbol)] = True 
                strategy[-1][assets.index(sell_symbol)] = False

        signal_df.apply(process_signal, axis=1) 
        pd.DataFrame(strategy, price_df.columns, index=signal_df.index)
        pass

In [None]:
class PairTradingPipeline():
    """
    @param price_df DataFrame with symbol columns and price values
    @param training_ratio ratio of the subset of the price_df to be used to select paris
    """
    def __init__(self, price_df: pd.DataFrame, training_ratio: float=0.5):
        self.price_df = price_df.copy()
        training_idx = int(len(price_df) * training_ratio)
        self.index_selection_df = self.price_df[:training_idx]
        self.pair_selection_df = self.price_df[training_idx:]
        self.selected_indices = None
        self.indicies_selector = IndicesSelector()
        self.pairs_selector = PairsSelector()
        self.predictors = {
            'naive': NaivePredictor()
        }
        self.predict_result = {
            'naive': None
        }
        self.signal_generator = {
            'percentile_current': PercentileCurrent()
        }

    def select_indicies(self) -> List(str):
        self.selected_indices = self.indicies_selector.select(self.index_selection_df)

    def select_pairs(self) -> List[List[str]]:
        if self.selected_indices is None:
            self.select_indicies()
        self.selected_pairs = self.pairs_selector.select(self.index_selection_df[[self.selected_indices]])

    def predict(self, predictor: str, pair: List[str], params: Optional[Mapping] = None, periodic: Optional[bool] = False):
        if periodic:
            self.predict_result[predictor] = self.predictors[predictor].periodic_train_predict(self.pair_selection_df[pair])
        else:
            self.predictors[predictor].train(self.pair_selection_df)
            self.predict_result[predictor] = self.predictors[predictor].predict(self.pair_selection_df[pair])
    
    def create_signal(self, signalGenerator: str, predict_df: pd.DataFrame, params: Optional[Mapping] = None):
        if self.selected_pairs is None:
            self.select_pairs()
        self.predictors[signalGenerator].generate(self.selected_pairs, self.price_df, predict_df, params)
        
