In [20]:
# Import necessary libraries
import pandas as pd
from bs4 import BeautifulSoup

# Path to the HTML file
html_file = 'sssyx.html'


In [21]:
# Read the HTML file
with open(html_file, 'r') as file:
    html_content = file.read()

# Parse HTML content
soup = BeautifulSoup(html_content, 'lxml')

# Extract table data
table_rows = soup.find_all('tr')

# Prepare data for pandas DataFrame
data = []
for row in table_rows:
    row_data = []
    for cell in row.find_all(['td', 'th']):
        row_data.append(cell.text.strip())
    if row_data:  # Skip empty rows
        data.append(row_data)

# Create DataFrame
# If there's a header row, you can use data[0] as columns
# Since this HTML snippet doesn't have headers, we'll create default columns
columns = [f'Column_{i+1}' for i in range(len(data[0])) if data]
df = pd.DataFrame(data, columns=columns)

# Display the DataFrame
df


Unnamed: 0,Column_1,Column_2,Column_3,Column_4,Column_5,Column_6,Column_7
0,"Jun 25, 2025",457.43,457.43,457.43,457.43,457.43,-
1,"Jun 24, 2025",452.39,452.39,452.39,452.39,452.39,-
2,"Jun 23, 2025",452.39,452.39,452.39,452.39,452.39,-
3,"Jun 20, 2025",448.07,448.07,448.07,448.07,448.07,-
4,"Jun 18, 2025",449.03,449.03,449.03,449.03,449.03,-
...,...,...,...,...,...,...,...
2722,"Sep 24, 2014",169.80,169.80,169.80,169.80,125.19,-
2723,"Sep 23, 2014",168.50,168.50,168.50,168.50,124.23,-
2724,"Sep 22, 2014",169.40,169.40,169.40,169.40,124.90,-
2725,"Sep 19, 2014",170.80,170.80,170.80,170.80,125.93,-


In [22]:
df.describe()

Unnamed: 0,Column_1,Column_2,Column_3,Column_4,Column_5,Column_6,Column_7
count,2727,2727.0,2708.0,2708.0,2708.0,2708.0,2708
unique,2708,1868.0,1850.0,1850.0,1850.0,2055.0,1
top,"Dec 28, 2021",228.6,228.6,228.6,228.6,132.24,-
freq,3,11.0,11.0,11.0,11.0,10.0,2708


In [None]:
# Export to CSV
csv_file = 'output.csv'
df.to_csv(csv_file, index=False)
print(f'CSV file saved as: {csv_file}')


# Final Project

# Final Project

## Available Assets for Portfolio Optimization
Here is a list of the available assets to consider in the portfolio:

<table>
  <thead>
    <tr>
      <th>Fund Name</th>
      <th>Ticker</th>
      <th>Inception Date</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>JPMorgan Equity Income Fund - Class R6</td>
      <td><a href="https://finance.yahoo.com/quote/OIEJX/history/" target="_blank">OIEJX</a></td>
      <td>Jan 31, 2012</td>
    </tr>
    <tr>
      <td>State Street Equity 500 Index K</td>
      <td><a href="https://finance.yahoo.com/quote/SSSYX/history/" target="_blank">SSSYX</a></td>
      <td>Sep 18, 2014</td>
    </tr>
    <tr>
      <td>T. Rowe Price Dividend Growth</td>
      <td><a href="https://finance.yahoo.com/quote/PRDGX/history/" target="_blank">PRDGX</a></td>
      <td>Dec 31, 1992</td>
    </tr>
    <tr>
      <td>American Funds Growth Fund of Amer R6</td>
      <td><a href="https://finance.yahoo.com/quote/RGAGX/history/" target="_blank">RGAGX</a></td>
      <td>May 1, 2009</td>
    </tr>
    <tr>
      <td>Vanguard Mid Cap Index Admiral</td>
      <td><a href="https://finance.yahoo.com/quote/VIMAX/history/" target="_blank">VIMAX</a></td>
      <td>Nov 12, 2001</td>
    </tr>
    <tr>
      <td>Vanguard Small Cap Value Index Admiral</td>
      <td><a href="https://finance.yahoo.com/quote/VSIAX/history/" target="_blank">VSIAX</a></td>
      <td>Sep 27, 2011</td>
    </tr>
  </tbody>
</table>

Historical data for these assets can be found on [Yahoo Finance website](https://finance.yahoo.com)

Since the portfolio optimization problem requires grouping the time series of each asset into matrix $R$, they must be considered during the same periods. For this reason, only returns starting from the most restrictive case are considered, i.e., from Sep 18, 2014 until today (Jun 25, 2025).

Data sources:
<table>
  <thead>
    <tr>
      <th>Ticker</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td><a href="https://finance.yahoo.com/quote/OIEJX/history/?period1=1410998400&period2=1750917797" target="_blank">OIEJX</a></td>
    </tr>
    <tr>
      <td><a href="https://finance.yahoo.com/quote/SSSYX/history/?period1=1411047000&period2=1750917799" target="_blank">SSSYX</a></td>
    </tr>
    <tr>
      <td><a href="https://finance.yahoo.com/quote/PRDGX/history/?period1=1410998400&period2=1750917878" target="_blank">PRDGX</a></td>
    </tr>
    <tr>
      <td><a href="https://finance.yahoo.com/quote/RGAGX/history/?period1=1410998400&period2=1750917509" target="_blank">RGAGX</a></td>
    </tr>
    <tr>
      <td><a href="https://finance.yahoo.com/quote/VIMAX/history/?period1=1410998400&period2=1750917512" target="_blank">VIMAX</a></td>
    </tr>
    <tr>
      <td><a href="https://finance.yahoo.com/quote/VSIAX/history/?period1=1410998400&period2=1750917515" target="_blank">VSIAX</a></td>
    </tr>
  </tbody>
</table>
The service is behind a paywall since early 2025, but data is still accessible inspecting the html elements of the page.

In [3]:
import numpy as np
import cvxpy as cp

# Mock return matrix: 5 assets, 10 time periods
np.random.seed(0)
T, n = 10, 5
R = np.random.randn(T, n) * 0.01  # Simulated daily returns ~1%

# Compute average return per asset
mu = np.mean(R, axis=0)  # Shape: (n,)

# Set a mock target return
rho = 0.1 / 250  # 10% annual return

# Define optimization variable
w = cp.Variable(n)

# Define constraints
constraints = [
    cp.sum(w) == 1,       # weights sum to 1
    mu @ w == rho         # target average return
]

# Define the objective (least squares formulation)
objective = cp.Minimize(cp.norm(R @ w - rho, 2))

# Solve the problem
problem = cp.Problem(objective, constraints)
problem.solve()

# Output results
print("Optimal weights w:", w.value)
print("Achieved average return:", mu @ w.value)
print("Portfolio risk (std dev of returns):", np.std(R @ w.value))


Optimal weights w: [0.35218793 0.10524019 0.20972512 0.11947077 0.21337598]
Achieved average return: 0.0003999999999999991
Portfolio risk (std dev of returns): 0.006035366033597584
