In [1]:
import pandas as pd
import numpy as np

# Load File

In [2]:
raw = pd.read_csv("data/price_2024.csv", header=None)

# Data Preprocessing

In [3]:
# Extract rows for symbols and data
symbol_row = raw.iloc[7].tolist()  # Symbol codes
data = raw.iloc[13:].reset_index(drop=True)

dates = pd.to_datetime(data.iloc[:,0], errors="coerce")
symbols = symbol_row[1:]

prices = data.iloc[:,1:].copy()
prices.index = dates
prices.columns = symbols

In [4]:
# Clean numeric strings
prices = prices.replace({",":""}, regex=True)
prices = prices.apply(pd.to_numeric, errors="coerce")

In [5]:
# Reset index and melt to long-form
price_wide_reset = prices.reset_index().rename(columns={"index":"Date"})
price_wide_reset = price_wide_reset.rename(columns={price_wide_reset.columns[0]:"Date"})
price_long = price_wide_reset.melt(id_vars="Date", var_name="Symbol", value_name="Price")
price_long = price_long.dropna().sort_values(["Symbol","Date"])

In [6]:
# Compute monthly returns
price_long["Return"] = price_long.groupby("Symbol")["Price"].pct_change()

In [7]:
price_long.head(20)

Unnamed: 0,Date,Symbol,Price,Return
13078,2024-01-31,A000020,9650.0,
13079,2024-02-29,A000020,9720.0,0.007254
13080,2024-03-29,A000020,9400.0,-0.032922
13081,2024-04-30,A000020,8720.0,-0.07234
13082,2024-05-31,A000020,8280.0,-0.050459
13083,2024-06-28,A000020,8060.0,-0.02657
13084,2024-07-31,A000020,8340.0,0.034739
13085,2024-08-30,A000020,8070.0,-0.032374
13086,2024-09-30,A000020,7760.0,-0.038414
13087,2024-10-31,A000020,7520.0,-0.030928


# Test

In [8]:
# Reconstruct weights
months = pd.date_range("2024-01-31", "2024-12-31", freq="M")

def make_weight_df(symbols):
    return pd.DataFrame(1.0/len(symbols), index=months, columns=symbols)

In [9]:
# Recommended top20/bottom20 lists
top20 = pd.read_csv("outputs/recommend_top20.csv")
bot20 = pd.read_csv("outputs/recommend_bottom20.csv")
top20_symbols = top20["Symbol"].astype(str).tolist()
bot20_symbols = bot20["Symbol"].astype(str).tolist()
top10_symbols = top20_symbols[:10]

weights_long10 = make_weight_df(top10_symbols)
weights_long20 = make_weight_df(top20_symbols)
weights_short20 = make_weight_df(bot20_symbols) * (-1.0)
weights_ls = pd.concat([weights_long20, weights_short20], axis=1).fillna(0.0)

In [10]:
# Portfolio return calculation function
def portfolio_returns(weights: pd.DataFrame, returns_longform: pd.DataFrame):
    ret_wide = returns_longform.pivot_table(index="Date", columns="Symbol", values="Return", aggfunc="mean").sort_index()
    aligned = []
    for dt in weights.index:
        if dt not in ret_wide.index:
            aligned.append(np.nan)
            continue
        r_row = ret_wide.loc[dt]
        w_row = weights.loc[dt].reindex(r_row.index).fillna(0.0)
        mask = r_row.notna() & (w_row != 0)
        if mask.sum() == 0:
            aligned.append(np.nan)
            continue
        w_active = w_row[mask]
        r_active = r_row[mask]
        long_mask = w_active > 0
        short_mask = w_active < 0
        w_norm = pd.Series(0.0, index=w_active.index)
        if long_mask.any():
            w_norm[long_mask] = w_active[long_mask] / w_active[long_mask].sum()
        if short_mask.any():
            w_norm[short_mask] = w_active[short_mask] / abs(w_active[short_mask]).sum()
        port_ret = (w_norm * r_active).sum()
        aligned.append(port_ret)
    return pd.Series(aligned, index=weights.index)

In [11]:
# Compute returns
ret_long10 = portfolio_returns(weights_long10, price_long)
ret_ls = portfolio_returns(weights_ls, price_long)

In [12]:
# Compute performance stats
def compute_perf_stats(monthly_returns: pd.Series):
    r = monthly_returns.dropna()
    if r.empty:
        return None
    cum = (1 + r).cumprod()
    ann_return = cum.iloc[-1] - 1.0
    ann_vol = r.std(ddof=0) * np.sqrt(12)
    sharpe = ann_return / ann_vol if ann_vol != 0 else np.nan
    running_max = cum.cummax()
    max_dd = (cum / running_max - 1).min()
    hit_rate = (r > 0).mean()
    return dict(ann_return=ann_return, ann_vol=ann_vol, sharpe=sharpe, max_dd=max_dd, hit_rate=hit_rate)


In [15]:
stats_long10 = compute_perf_stats(ret_long10)
stats_ls = compute_perf_stats(ret_ls)

print("========long10========")
for key, value in stats_long10.items():
    print(f"{key}: {value}")
print("\n========longshort20========")
for key, value in stats_ls.items():
    print(f"{key}: {value}")

ann_return: 0.4938997798225242
ann_vol: 0.37737257839919724
sharpe: 1.3087855559554213
max_dd: -0.08333833646560274
hit_rate: 0.6666666666666666

ann_return: 0.3907462857808779
ann_vol: 0.22755657919703934
sharpe: 1.7171390392651928
max_dd: 0.0
hit_rate: 1.0


In [17]:
import matplotlib.pyplot as plt

# 누적 수익률 계산
cum_long10 = (1 + ret_long10.dropna()).cumprod()
cum_ls = (1 + ret_ls.dropna()).cumprod()

# Drawdown 계산 함수
def compute_drawdown(cum_returns):
    running_max = cum_returns.cummax()
    drawdown = cum_returns / running_max - 1
    return drawdown

dd_long10 = compute_drawdown(cum_long10)
dd_ls = compute_drawdown(cum_ls)

# Plot cumulative returns
plt.figure(figsize=(8,5))
plt.plot(cum_long10.index, cum_long10, label="Top10 Long-only")
plt.plot(cum_ls.index, cum_ls, label="Top20/Bottom20 Long/Short")
plt.title("2024 Cumulative Return")
plt.xlabel("Date")
plt.ylabel("Cumulative Return")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("outputs/cumulative_returns_2024.png")
plt.close()

# Plot drawdowns
plt.figure(figsize=(8,5))
plt.plot(dd_long10.index, dd_long10, label="Top10 Long-only")
plt.plot(dd_ls.index, dd_ls, label="Top20/Bottom20 Long/Short")
plt.title("2024 Drawdowns")
plt.xlabel("Date")
plt.ylabel("Drawdown")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("outputs/drawdowns_2024.png")
plt.close()

"outputs/cumulative_returns_2024.png", "outputs/drawdowns_2024.png"


('outputs/cumulative_returns_2024.png', 'outputs/drawdowns_2024.png')