# Read the cluster 7 symbol data

In [1]:
import os
import pandas as pd
from pypfopt import EfficientFrontier, risk_models, expected_returns
import warnings

warnings.filterwarnings(action="ignore", category=FutureWarning)


DATA_DIRECTORY = "./data/cluster_7"

symbols = []
symbol_dfs = {}

for file in sorted(os.listdir(DATA_DIRECTORY)):
    if file.endswith(".csv"):
        symbol = file[:-4]
        symbols.append(symbol)
        symbol_dfs[symbol] = pd.read_csv(os.path.join(DATA_DIRECTORY, file))

print("Loaded {} symbols.".format(len(symbols)))


Loaded 19 symbols.


In [2]:
merged_df = None

# Loop over each symbol and its corresponding DataFrame
for symbol, df in symbol_dfs.items():
    # Select only 'time' and 'close' columns and rename 'close' to the symbol name
    df = df[['Time', 'Close']].rename(columns={'Close': symbol})
    
    # If merged_df is None (first iteration), assign the current df to it
    if merged_df is None:
        merged_df = df
    else:
        # Merge the current df with the merged_df on 'time'
        merged_df = pd.merge(merged_df, df, on='Time', how='inner')  # 'inner' keeps only common timestamps

merged_df["Time"] = pd.to_datetime(merged_df["Time"])


# Get the optimized weights

In [3]:
def get_optimized_weights(data):
    mu = expected_returns.mean_historical_return(data)
    S = risk_models.sample_cov(data)

    ef = EfficientFrontier(mu, S)  # Create Efficient Frontier object
    weights = ef.max_sharpe(risk_free_rate=0)  # Get optimal weights for the maximum Sharpe ratio

    cleaned_weights = ef.clean_weights()

    return cleaned_weights


# Calculate the time boundaries for 3 Months interval

In [4]:
start_date = pd.Timestamp('2015-01-01')
end_date = merged_df["Time"].max() + pd.DateOffset(months=3)

# Create a list of time boundaries at 3-month intervals
time_boundaries = pd.date_range(start=start_date, end=end_date, freq='3MS')  # '3MS' for 3-month start

# Convert to a list of timestamps
time_boundaries_list = time_boundaries.to_list()

# Calculate the weights

In [7]:
weights_list = []
for time_boundary in time_boundaries_list:
    data = merged_df[merged_df["Time"] <= time_boundary]
    data.set_index("Time", inplace=True)

    weights = get_optimized_weights(data)
    weights_list.append({
        "Timestamp": time_boundary,
        **weights})

In [8]:
weights_df = pd.DataFrame(weights_list)

WEIGHTS_DIRECTORY = "./weights"
weights_df.to_csv(os.path.join(WEIGHTS_DIRECTORY, "cluster_7_weights.csv"), index=False)