In [3]:
from google.colab import drive
drive.mount('/content/drive')

folder_path = "/content/drive/MyDrive/DSE4211/"

Mounted at /content/drive


In [4]:
import os
print(os.listdir("/content/drive/MyDrive/DSE4211/"))


['btc_price_data.csv', 'apple_price_data.csv', 'JNJ_price_data.csv', 'Xiaomi_price_data.csv', 'BNB_price_data.csv', 'USDT_price_data.csv', 'Gold(Tether)_price_data.csv', 'Xiaomi_price_data.gsheet']


In [12]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load data
file_paths = {
    "Apple": "/content/drive/MyDrive/DSE4211/apple_price_data.csv",
    "BNB": "/content/drive/MyDrive/DSE4211/BNB_price_data.csv",
    "Bitcoin": "/content/drive/MyDrive/DSE4211/btc_price_data.csv",
    "JNJ": "/content/drive/MyDrive/DSE4211/JNJ_price_data.csv",
    "USDT": "/content/drive/MyDrive/DSE4211/USDT_price_data.csv",  # Fixed at 5%, excluded from training
    "Xiaomi": "/content/drive/MyDrive/DSE4211/Xiaomi_price_data.csv",
    "Gold_Tether": "/content/drive/MyDrive/DSE4211/Gold(Tether)_price_data.csv",
}

# Read and preprocess data
dfs = {}
for asset, path in file_paths.items():
    df = pd.read_csv(path)
    df.index = pd.date_range(start="2023-01-01", periods=len(df), freq="D")
    if "Close" in df.columns:
        df = df[["Close"]].rename(columns={"Close": asset})
    dfs[asset] = df

merged_df = pd.concat(dfs.values(), axis=1)  # Merge all assets
merged_df = merged_df.iloc[2:].apply(pd.to_numeric, errors='coerce')
merged_df.fillna(method='ffill', inplace=True)  # Forward-fill missing values

print("Cleaned Data (First 5 Rows):")
print(merged_df.head())

# Compute daily returns
returns = merged_df.pct_change().dropna()

# Exclude USDT from optimization (fixed at 5%)
returns_ex_usdt = returns.drop(columns=["USDT"], errors="ignore")

# Compute covariance matrix (used for risk calculation)
cov_matrix = returns_ex_usdt.cov().values
num_assets = len(returns_ex_usdt.columns)  # Number of assets to optimize

# Define the function to minimize portfolio variance
def portfolio_volatility(weights, cov_matrix):
    return np.sqrt(weights.T @ cov_matrix @ weights)  # sqrt(w^T Σ w)

# Initial equal weight allocation
init_weights = np.ones(num_assets) / num_assets


constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 0.95})

# Optimize for minimum variance
opt_result = sco.minimize(portfolio_volatility, init_weights, args=(cov_matrix,),
                          method='SLSQP', bounds=bounds, constraints=constraints)

# Extract optimized weights
allocation_weights = opt_result.x

# Create allocation DataFrame
allocation_df = pd.DataFrame({"Asset": returns_ex_usdt.columns, "Optimal Allocation (%)": allocation_weights * 100})

usdt_allocation = pd.DataFrame({"Asset": ["USDT"], "Optimal Allocation (%)": [5.0]})
allocation_df = pd.concat([allocation_df, usdt_allocation], ignore_index=True)

print("\nOptimal Portfolio Allocation (Minimum Variance Portfolio):")
print(allocation_df)


Cleaned Data (First 5 Rows):
                 Apple         BNB       Bitcoin         JNJ      USDT  \
2023-01-03  123.632515  244.136978  16625.080078  166.303696  0.999692   
2023-01-04  124.907715  245.535904  16688.470703  168.114288  0.999771   
2023-01-05  123.583092  246.133362  16679.857422  166.873016  0.999759   
2023-01-06  128.130234  259.119690  16863.238281  168.226303  0.999763   
2023-01-07  128.654144  256.422852  16836.736328  163.867798  0.999697   

            Xiaomi  Gold_Tether  
2023-01-03   11.22  1817.210083  
2023-01-04   11.50  1817.010986  
2023-01-05   11.62  1833.518555  
2023-01-06   11.24  1850.594238  
2023-01-07   12.10  1830.700317  

Optimal Portfolio Allocation (Minimum Variance Portfolio):
         Asset  Optimal Allocation (%)
0        Apple               14.992168
1          BNB                1.000000
2      Bitcoin                1.353552
3          JNJ               33.545858
4       Xiaomi                4.910711
5  Gold_Tether              

  merged_df.fillna(method='ffill', inplace=True)  # Forward-fill missing values
