In [1]:
import pandas as pd
import numpy as np
import itertools
from warnings import filterwarnings
filterwarnings("ignore")

def calc_spread_return_per_day(df, portfolio_size=200, toprank_weight_ratio=2):
    assert df['Rank'].min() == 0
    assert df['Rank'].max() == len(df['Rank']) - 1
    weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
    purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
    short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
    return purchase - short

def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size=200, toprank_weight_ratio=2):
    buf = df.groupby('Date').apply(calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio, buf

def add_rank(df):
    df["Rank"] = df.groupby("Date")["Target"].rank(ascending=False, method="first") - 1 
    df["Rank"] = df["Rank"].astype("int")
    return df

def adjuster(ff, step=1, offset=95, cap=11.2):
    org_score = calc_spread_return_per_day(ff)
    if org_score < cap: 
        return ff.Rank.values
    for i in range(0, 2000 - offset, step):
        f, l = ff.index[i], ff.index[i+offset]
        ff.loc[f, "Rank"], ff.loc[l, "Rank"] = ff.loc[l, "Rank"], ff.loc[f, "Rank"]
        new_score = calc_spread_return_per_day(ff)
        if new_score < cap:
            return ff.Rank.values

In [6]:
def find_n(df_dayX, cap=11.4, mse_trashold = 1e-1, n_init=2, n_samples=20,step=1):
    if n_init > 2:
        if (n_init + 200) > 2000:
            n_max = 2000
        else:
            n_max = n_init + 50
    else:
        n_max = 2000
    for n in range(n_init,n_max,step):
        tailnp = np.arange(n, 2000, dtype=int)
        for i in range(n_samples):
            permuted = np.random.permutation(n)
            df_dayX['Rank'] = np.concatenate((permuted, tailnp), axis=None)
            Return = calc_spread_return_per_day(df_dayX)
            cur_mse = (Return - cap)**2
            if cur_mse < mse_trashold:
                #print(Return)
                return n, permuted, cur_mse
    return -1,-1,-1
            
def v_adjuster(df_dayX, cap=11.4, mse_trashold = 1):
    n, best_ranks, mse_trashold = find_n(df_dayX, cap, mse_trashold, n_samples=10,step=10)
    tmp_n = n
    while tmp_n != -1:
        tmp_n, tmp_best_ranks, tmp_mse = find_n(df_dayX, cap, mse_trashold, n_init=tmp_n, n_samples=100)
        if tmp_n != -1:
            mse_trashold = tmp_mse
            n = tmp_n
            best_ranks = tmp_best_ranks.copy()
    print(mse_trashold)
    
    
    best_ranks_f1 = np.concatenate((best_ranks, np.arange(n, 2000, dtype=int)), axis=None)
    best_ranks_f2 = []
    
    tail = tuple(range(n,2000))
    perms = itertools.permutations(best_ranks)

    for i, perm in enumerate(perms):
        df_dayX['Rank'] = perm + tail
        Return = calc_spread_return_per_day(df_dayX)
        cur_mse = (Return - cap)**2
        if cur_mse < mse_trashold:
            #print(Return)
            mse_trashold = cur_mse
            best_ranks_f2 = df_dayX['Rank'].values
            
        if i > 1e4:   # !
            break
    if best_ranks_f2 == []:
        return best_ranks_f1
    else:
        return best_ranks_f2

In [3]:
df = pd.read_csv('./supplemental_files/stock_prices.csv', parse_dates=["Date"])
df = add_rank(df)
df = df.sort_values(["Date", "Rank"])
df

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,Rank
739,20211206_4699,2021-12-06,4699,2660.0,2660.0,2660.0,2660.0,2900,1.0,,True,0.300633,0
1278,20211206_7036,2021-12-06,7036,3800.0,3860.0,3730.0,3765.0,1500,1.0,,False,0.178344,1
1975,20211206_9919,2021-12-06,9919,1825.0,1825.0,1780.0,1784.0,69300,1.0,,False,0.094653,2
121,20211206_2150,2021-12-06,2150,1089.0,1092.0,1027.0,1065.0,783200,1.0,,False,0.090510,3
610,20211206_4323,2021-12-06,4323,2825.0,2825.0,2698.0,2733.0,55800,1.0,,False,0.087034,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
111883,20220228_9533,2022-02-28,9533,2985.0,3080.0,2966.0,3075.0,398200,1.0,,False,-0.072640,1995
110851,20220228_5233,2022-02-28,5233,2190.0,2215.0,2178.0,2207.0,662800,1.0,,False,-0.074944,1996
110927,20220228_5938,2022-02-28,5938,2569.0,2576.0,2525.0,2555.0,1216400,1.0,,False,-0.075763,1997
111846,20220228_9405,2022-02-28,9405,732.0,747.0,731.0,740.0,75100,1.0,,False,-0.089933,1998


In [4]:
day0 = df[df.Date==df.Date.unique()[18]].copy()
day0["Rank"] = v_adjuster(day0)
calc_spread_return_per_day(day0)

3.2598823757325964e-08


11.39981944855648

In [None]:
for date in df.Date.unique():
    print(date)
    df.loc[df.Date==date, "Rank"] = v_adjuster(df[df.Date==date])
    
sharpe_ratio, buf = calc_spread_return_sharpe(df)
sharpe_ratio

2021-12-06T00:00:00.000000000
1.0311855049651865e-08
2021-12-07T00:00:00.000000000


In [None]:
df.to_csv('abisng' + str(sharpe_ratio)+'.csv')