## Import

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

ModuleNotFoundError: No module named 'tensorflow'

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## Data Load

In [22]:
# Read in price data
train = pd.read_csv("./train.csv", parse_dates=True, index_col="일자")
train = train.reset_index()
train.columns = ['date', 'ticker', 'firm', 'volume', 'open', 'high', 'low', 'close']
train.set_index('date', inplace=True)
train

Unnamed: 0_level_0,ticker,firm,volume,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-06-01,A060310,3S,166690,2890,2970,2885,2920
2021-06-01,A095570,AJ네트웍스,63836,5860,5940,5750,5780
2021-06-01,A006840,AK홀딩스,103691,35500,35600,34150,34400
2021-06-01,A054620,APS,462544,14600,14950,13800,14950
2021-06-01,A265520,AP시스템,131987,29150,29150,28800,29050
...,...,...,...,...,...,...,...
2023-05-30,A189980,흥국에프엔비,272284,3005,3035,2955,2980
2023-05-30,A000540,흥국화재,50218,3250,3255,3195,3215
2023-05-30,A003280,흥아해운,130664,1344,1395,1340,1370
2023-05-30,A037440,희림,141932,9170,9260,9170,9200


In [43]:
# Assuming your DataFrame is called 'df'
pivot_df = pd.pivot_table(df, values='close', index='date', columns='ticker')

# Rename the columns based on the ticker names
pivot_df.columns = pivot_df.columns.astype(str)

# Display the resulting DataFrame
pivot_df.head()
print(pivot_df.shape)

(494, 2000)


## Model Define, Train and Inference

In [47]:
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns

# For this example, let's say we are only interested in close prices
prices = pivot_df

# Calculate expected returns and sample covariance
mu = expected_returns.mean_historical_return(prices)
S = risk_models.CovarianceShrinkage(prices).ledoit_wolf()

# Optimise for maximal Sharpe ratio
ef = EfficientFrontier(mu, S, weight_bounds=(-1, 1))
raw_weights = ef.max_sharpe()

# Get the cleaned weights for the portfolio
cleaned_weights = ef.clean_weights()

In [49]:
# Sort the weights
sorted_weights = {k: v for k, v in sorted(cleaned_weights.items(), key=lambda item: item[1], reverse=True)}

# Get the top 200 to go long
long_stocks = {k: sorted_weights[k] for k in list(sorted_weights.keys())[:200]}

# Get the bottom 200 to go short
short_stocks = {k: sorted_weights[k] for k in list(sorted_weights.keys())[-200:]}

print("Long these stocks: ", long_stocks)
print("Short these stocks: ", short_stocks)

Long these stocks:  {'A101670': 1.0, 'A086520': 0.69296, 'A073570': 0.65318, 'A001570': 0.60918, 'A114190': 0.56709, 'A002710': 0.49498, 'A005070': 0.4774, 'A006110': 0.46005, 'A035290': 0.44577, 'A010580': 0.41329, 'A002900': 0.40145, 'A009730': 0.40007, 'A277810': 0.38058, 'A108860': 0.37246, 'A003280': 0.36514, 'A042600': 0.34355, 'A095500': 0.34082, 'A222810': 0.34009, 'A234920': 0.33763, 'A007660': 0.33492, 'A121600': 0.33136, 'A029480': 0.32062, 'A281740': 0.31987, 'A005420': 0.30351, 'A317770': 0.30293, 'A336570': 0.29897, 'A085670': 0.2989, 'A178780': 0.29445, 'A106080': 0.28515, 'A182360': 0.27726, 'A003620': 0.27539, 'A096610': 0.27227, 'A005950': 0.24823, 'A006740': 0.2477, 'A007120': 0.24698, 'A035900': 0.24613, 'A064550': 0.24085, 'A001080': 0.23879, 'A214370': 0.23738, 'A011300': 0.22908, 'A173130': 0.22719, 'A003610': 0.22536, 'A093640': 0.22397, 'A035080': 0.22282, 'A145720': 0.21821, 'A058610': 0.21388, 'A335890': 0.21241, 'A093520': 0.20933, 'A041510': 0.20761, 'A0520

## Submit

In [54]:
# Create a dictionary with rankings
ranked_weights = {k: rank for rank, k in enumerate(sorted_weights.keys(), 1)}

# Convert to DataFrame
df_ranked = pd.DataFrame(list(ranked_weights.items()),columns = ['종목코드','순위']) 

# Save as csv
df_ranked.to_csv('ranked_weights.csv', index=False)
df_ranked

Unnamed: 0,종목코드,순위
0,A101670,1
1,A086520,2
2,A073570,3
3,A001570,4
4,A114190,5
...,...,...
1995,A016590,1996
1996,A183490,1997
1997,A005800,1998
1998,A181340,1999


In [55]:
sample_submission = pd.read_csv('./sample_submission.csv')
sample_submission

Unnamed: 0,종목코드,순위
0,A000020,1
1,A000040,2
2,A000050,3
3,A000070,4
4,A000080,5
...,...,...
1995,A375500,1996
1996,A378850,1997
1997,A383220,1998
1998,A383310,1999


In [56]:
baseline_submission = sample_submission[['종목코드']].merge(df_ranked[['종목코드', '순위']], on='종목코드', how='left')
baseline_submission

Unnamed: 0,종목코드,순위
0,A000020,922
1,A000040,1314
2,A000050,1005
3,A000070,1278
4,A000080,1487
...,...,...
1995,A375500,1917
1996,A378850,931
1997,A383220,1952
1998,A383310,1940


In [57]:
baseline_submission.to_csv('baseline_submission.csv', index=False)