In [1]:

import pandas as pd
import numpy as np
from numpy.linalg import inv
from scipy.sparse.linalg import eigsh
from tqdm import tqdm
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")


In [2]:
# ---------------------------------------------------------------------------- #
#                                     Data                                     #
# ---------------------------------------------------------------------------- #
X = pd.read_csv("data_reduced.csv")

In [3]:
X=X.dropna()
X.date = pd.to_datetime(X.date)
X

Unnamed: 0.1,Unnamed: 0,order_book_id,date,AT,dolvol,D2A,Free_CF,OL,ATO,egr,...,Mdp,FC2Y,roa,r_12_7,Lturnover,operprof,saleinv,PM,S2P,exret
0,0.0,000001.XSHE,2006-12-29,0.994030,0.984080,-0.964179,0.219900,-0.005970,-0.996020,-0.351244,...,0.000995,0.021891,-0.430846,-0.755224,0.787065,0.381095,0.003980,0.397015,-0.329353,0.320314
1,1.0,000001.XSHE,2007-01-31,0.994042,0.986097,-0.964250,0.219464,-0.012910,-0.996028,-0.346574,...,0.000993,0.012910,-0.424032,-0.809335,0.751738,0.368421,-0.001986,0.384310,-0.386296,-0.005894
2,2.0,000001.XSHE,2007-02-28,0.994030,0.986070,-0.958209,0.211940,-0.007960,-0.994030,-0.375124,...,0.000995,0.015920,-0.430846,-0.779104,0.615920,0.353234,-0.009950,0.367164,-0.297512,-0.010693
3,3.0,000001.XSHE,2007-03-30,0.990225,0.974585,-0.962854,0.216031,-0.002933,-0.990225,0.859238,...,0.000978,0.004888,-0.448680,-0.442815,0.298143,0.108504,-0.001955,0.362659,-0.436950,0.372638
4,4.0,000001.XSHE,2008-03-31,0.990645,0.985033,-0.977549,0.438728,-0.004677,-0.985033,0.900842,...,0.000935,0.014032,-0.519177,0.711880,0.745557,0.111319,-0.005613,0.610851,-0.438728,0.047803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2300,2300.0,000020.XSHE,2022-07-29,-0.951958,-0.767687,0.056269,0.929582,0.874959,-0.560382,-0.291872,...,0.000329,-0.611714,0.213557,-0.323462,-0.262915,0.690688,0.397170,0.612372,-0.254360,-0.016480
2301,2301.0,000020.XSHE,2022-08-31,-0.949325,-0.362290,0.113524,0.944719,0.861139,-0.591971,-0.252386,...,0.000329,-0.630800,-0.214873,0.262915,0.313590,0.692662,0.398486,0.570911,-0.288582,-0.123864
2302,2302.0,000020.XSHE,2022-09-30,-0.948785,-0.269862,0.112935,0.944846,0.860144,-0.590282,-0.250821,...,0.000328,-0.630335,-0.216678,0.192383,0.499015,0.690742,0.396586,0.568615,-0.261983,0.150320
2303,2303.0,000020.XSHE,2022-10-31,-0.954023,-0.681445,0.111330,0.918555,0.856158,-0.569787,-0.245320,...,0.000328,-0.561905,-0.531691,0.184893,-0.084401,0.691297,0.397701,0.555337,-0.398358,0.036083


In [7]:
dates = sorted(list(set(X.date)))

train_test_split = 0.8

train_dates = dates[:int(len(dates)*0.8)]
test_dates = dates[int(len(dates)*0.8):]

In [118]:

class StrategyNFixedBaskets:

    def __init__(self) -> None:
        pass

    def execute(source_data, dates):
        positions = {}
        source_data = source_data[source_data['date'].isin(dates)]
        for date in dates:
            available_stocks = source_data.loc[source_data['date'] == date, 'order_book_id'].tolist()
            positions[date] = {stock: 1/len(available_stocks) for stock in available_stocks}
        return positions


In [124]:

class Portfolio:

    def  __init__(self):
        self.dates = []
        self.positions = {}
        self.valid_stocks = {}
        self.stats = {}
        self.data = pd.DataFrame(columns=['date','positions', 'n_positions', 'return'])

    def set_valid_stocks(self, valid_stocks):
        self.valid_stocks = valid_stocks

    def change_position(self, date, position, error=10e-6):
        if date not in self.dates:
            self.dates.append(date)
        if abs(sum(position.values()) - 1) < error:
            if all(key in self.valid_stocks[date] for key in position.keys()):
                self.positions[date] = position
            else:
                for key in position.keys():
                    if key not in self.valid_stocks[date]:
                        if position[key] > 0:
                            print(f'Stock {key} not defined at date {date}.')
        else:
            raise print("The position weighting does not add to 1.")

    def well_defined(self):
        nones = not all(self.positions.values())

        if nones:
            for key, value in self.positions.items():
                if not value:
                    print(f"Please define a portfolio position for {key}")
        else:
            print("Portfolio well defined.")
            self.is_well_defined = True

    def activate(self):
        if not self.is_well_defined:
            print("Please make sure the portfolio is well defined first by calling Portfolio.well_defined().")
        else:
        
            for date in self.dates:
                month_positions = self.positions[date]
                month_returns = {position: self.source_data.groupby(['order_book_id', 'date'])['exret'].get_group((position, date)) for position in month_positions.keys()}
                total_return = np.array([month_positions[key] * month_returns[key] for key in month_positions.keys()]).sum()
                row_data = [date, self.positions[date], len(self.positions[date]), total_return]
                self.data = self.data.append(pd.Series(row_data, index=self.data.columns), ignore_index=True)

            self.data['std'] = self.data['return'].rolling(12).std()
            self.data['Sharpe'] = self.data['return'].div(self.data['std'])
            self.stats["Portfolio Return"]
            print("Activated.")

    def implement_strategy(self, positions):
        for date, position in positions.items():
            self.change_position(date, position)

    def run(self, strategy_method, source_data, dates, args=None):
        self.dates = dates
        self.source_data = source_data
        self.set_valid_stocks({date: list(self.source_data.groupby("date")["order_book_id"].get_group(date)) for date in set(self.source_data.date)})
        strategy = strategy_method(args)
        self.implement_strategy(strategy.execute(source_data, dates))
        self.well_defined()
        self.activate()


In [125]:
portfolio_1 = Portfolio()
portfolio_1.run(StrategyNFixedBaskets, X, test_dates)



Portfolio well defined.
Activated.


In [127]:
portfolio_1.data

Unnamed: 0,date,positions,n_positions,return,std,Sharpe
0,2019-09-30 00:00:00,"{'000001.XSHE': 0.2, '000007.XSHE': 0.2, '0000...",5,-0.017063,,
1,2019-10-31 00:00:00,"{'000001.XSHE': 0.2, '000007.XSHE': 0.2, '0000...",5,-0.004602,,
2,2019-11-29 00:00:00,"{'000001.XSHE': 0.2, '000007.XSHE': 0.2, '0000...",5,0.036659,,
3,2019-12-31 00:00:00,"{'000001.XSHE': 0.2, '000007.XSHE': 0.2, '0000...",5,-0.056746,,
4,2020-01-23 00:00:00,"{'000001.XSHE': 0.2, '000007.XSHE': 0.2, '0000...",5,-0.071952,,
5,2020-02-28 00:00:00,"{'000001.XSHE': 0.2, '000007.XSHE': 0.2, '0000...",5,-0.066987,,
6,2020-03-31 00:00:00,"{'000001.XSHE': 0.14285714285714285, '000002.X...",7,0.021614,,
7,2020-04-30 00:00:00,"{'000001.XSHE': 0.07692307692307693, '000002.X...",13,-0.010763,,
8,2020-05-29 00:00:00,"{'000001.XSHE': 0.08333333333333333, '000002.X...",12,0.108469,,
9,2020-06-30 00:00:00,"{'000001.XSHE': 0.08333333333333333, '000002.X...",12,0.120542,,


In [128]:
import plotly.express as px

px.line(portfolio_1.data, x='date', y=['Sharpe'])

In [17]:
parser.valid_stocks[pd.Timestamp('2006-12-29 00:00:00')]

['000001.XSHE',
 '000004.XSHE',
 '000005.XSHE',
 '000007.XSHE',
 '000008.XSHE',
 '000009.XSHE',
 '000010.XSHE',
 '000011.XSHE',
 '000012.XSHE',
 '000016.XSHE',
 '000017.XSHE',
 '000019.XSHE',
 '000020.XSHE']

In [94]:
sorted(train_dates)

[Timestamp('2006-12-29 00:00:00'),
 Timestamp('2007-01-31 00:00:00'),
 Timestamp('2007-02-28 00:00:00'),
 Timestamp('2007-03-30 00:00:00'),
 Timestamp('2008-03-31 00:00:00'),
 Timestamp('2008-04-30 00:00:00'),
 Timestamp('2008-07-31 00:00:00'),
 Timestamp('2008-10-31 00:00:00'),
 Timestamp('2008-11-28 00:00:00'),
 Timestamp('2008-12-31 00:00:00'),
 Timestamp('2009-01-23 00:00:00'),
 Timestamp('2009-02-27 00:00:00'),
 Timestamp('2010-03-31 00:00:00'),
 Timestamp('2010-05-31 00:00:00'),
 Timestamp('2010-06-30 00:00:00'),
 Timestamp('2010-08-31 00:00:00'),
 Timestamp('2010-09-30 00:00:00'),
 Timestamp('2010-10-29 00:00:00'),
 Timestamp('2010-11-30 00:00:00'),
 Timestamp('2010-12-31 00:00:00'),
 Timestamp('2011-02-28 00:00:00'),
 Timestamp('2011-04-29 00:00:00'),
 Timestamp('2011-05-31 00:00:00'),
 Timestamp('2011-06-30 00:00:00'),
 Timestamp('2011-07-29 00:00:00'),
 Timestamp('2011-08-31 00:00:00'),
 Timestamp('2011-09-30 00:00:00'),
 Timestamp('2011-10-31 00:00:00'),
 Timestamp('2011-11-

In [34]:
X.groupby(['order_book_id', 'date'])['exret'].get_group(('000001.XSHE', pd.Timestamp('2006-12-29 00:00:00')))

0    0.320314
Name: exret, dtype: float64

## Introduction and discussion of models

From Linton and Connor (2007) we can assume that a characteristic based factor model generatees returns

\begin{equation}
    r_{it} = f_{ut} \sum_{j=1}^J g_j(C_{ij})f_{jt} + \epsilon_{it}
\end{equation}

$r_{it}$ is the return to security $i$ at time $t$
$f_{ut}, f_{it}$ are factor returns
$g_j(C_{ij})$ factor betas
$C_{ij}$ security characteristic
  