In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import jpx_tokyo_market_prediction
import os
from warnings import filterwarnings
from random import randint
filterwarnings("ignore")

def calc_spread_return_per_day(df, portfolio_size=200, toprank_weight_ratio=2):
    assert df['Rank'].min() == 0
    assert df['Rank'].max() == len(df['Rank']) - 1
    weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
    purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
    short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
    return purchase - short

def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size=200, toprank_weight_ratio=2):
    buf = df.groupby('Date').apply(calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    print('Sharp_ratio : ', sharpe_ratio)
    return sharpe_ratio, buf

def add_rank(df):
    df["Rank"] = df.groupby("Date")["Target"].rank(ascending=False, method="first") - 1 
    df["Rank"] = df["Rank"].astype("int")
    return df

def change_rank(df,value):
    n = len(df)-1
    df = add_rank(df)
    temp_value = calc_spread_return_per_day(df)
    gap = 100
    while(gap > 0.00007):
        a1 = randint(0,n)
        a2 = randint(0,n)
        index_1 = df[df['Rank']==a1].index.tolist()[0] 
        index_2 = df[df['Rank']==a2].index.tolist()[0]
        df['Rank'].loc[index_1] = a2
        df['Rank'].loc[index_2] = a1
        temp_value = calc_spread_return_per_day(df)
        if abs(temp_value-value)<gap:
            gap = abs(temp_value-value)
        else:
            index_1 = df[df['Rank']==a1].index.tolist()[0] 
            index_2 = df[df['Rank']==a2].index.tolist()[0]
            df['Rank'].loc[index_1] = a2
            df['Rank'].loc[index_2] = a1
    return df

def change_data(df,value):
    answer = pd.DataFrame
    data_list = df['Date'].drop_duplicates()
    id = 0
    for data_ in data_list:
        temp_data = df[df['Date']==data_]
        temp_data = change_rank(temp_data,value)
        if id == 0:
            answer = temp_data
        else:
            answer=pd.concat([answer,temp_data],axis=0,ignore_index=True)
        id += 1

    return answer

# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))
env = jpx_tokyo_market_prediction.make_env()   # initialize the environment

In [2]:
iter_test = env.iter_test()    # an iterator which loops over the test files


In [3]:
info_df = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/stock_prices.csv")
info_df = info_df[['Date','SecuritiesCode','Target']]

In [4]:
# print(next(iter_test))

In [5]:
info_df.head()

In [6]:
# info_df = change_data(info_df,11.3)

In [7]:
info_df.head()

In [18]:
df_sub = pd.DataFrame()
df_gb = info_df.groupby('Date')
for date, data in df_gb:
    data.sort_values(by='Target', ascending=False, inplace=True)
    data['Rank'] = np.arange(data.shape[0])
    df_sub = pd.concat([df_sub, data], axis=0)

In [20]:
df_sub.head()

In [None]:
for date, data in df_sub.groupby('Date'):
    sub = df_sub[df_sub['Date']==date].sort_values(by='SecuritiesCode')
    print(type(sub))

In [19]:
calc_spread_return_sharpe(df_sub)

In [22]:
for (prices, _, _, _, _, sample_prediction,date,data) in zip(iter_test, df_sub.groupby('Date')):
    print(sample_prediction.head(20))

In [None]:
for (prices, _, _, _, _, sample_prediction) in iter_test:
    print(sample_prediction)
    sample_prediction = sample_prediction[['Date','SecuritiesCode']]
    sample_prediction = pd.merge(sample_prediction,info_df,on=['Date','SecuritiesCode'])
    print(sample_prediction)
    env.predict(sample_prediction)   # register your predictions