## Optimization Model

#### Import and Data Loading

In [6]:
import numpy as np
import pandas as pd
import yfinance as yf
from typing import Dict, List, Optional
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LassoCV, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit

In [2]:
INDEX_CONSTITUENTS = {
    'CAC40': {
        'ticker': '^FCHI',
        'constituents': [
            'AIR.PA', 'AI.PA', 'MT.AS', 'CS.PA', 'BNP.PA', 'EN.PA', 
            'CAP.PA', 'CA.PA', 'ACA.PA', 'BN.PA', 'DSY.PA', 'ENGI.PA',
            'EL.PA', 'ERF.PA', 'RMS.PA', 'KER.PA', 'LR.PA', 'OR.PA',
            'MC.PA', 'ML.PA', 'ORA.PA', 'RI.PA', 'PUB.PA', 'RNO.PA',
            'SAF.PA', 'SGO.PA', 'SAN.PA', 'SU.PA', 'GLE.PA', 'STLAP.PA',
            'STMPA.PA', 'TEP.PA', 'HO.PA', 'FP.PA', 'URW.AS', 'VIE.PA',
            'DG.PA', 'VIV.PA', 'WLN.PA', 'ALO.PA'
        ]
    }
}

In [3]:
def get_index_data(index_name: str,
                   period: str = '1y',
                   constituents: Optional[List[str]] = None) -> pd.DataFrame:
    """
    Fetch daily OHLCV data for an index and its constituents, and return a single DataFrame:
    columns = ['name', 'date', 'open', 'high', 'low', 'close', 'volume']
    """
    index_name = index_name.upper()

    if index_name not in INDEX_CONSTITUENTS:
        raise ValueError(f"Index '{index_name}' not supported. "
                         f"Available: {list(INDEX_CONSTITUENTS.keys())}")

    index_ticker = INDEX_CONSTITUENTS[index_name]['ticker']
    if constituents is None:
        constituents = INDEX_CONSTITUENTS[index_name]['constituents']

    all_rows = []

    def fetch_and_append(ticker, label):
        try:
            df = yf.Ticker(ticker).history(period=period)[['Open','High','Low','Close','Volume']]
            df = df.reset_index().rename(columns=str.lower)
            df['name'] = label
            all_rows.append(df[['name','date','open','high','low','close','volume']])
            print(f"✓ {label}")
        except Exception as e:
            print(f"✗ {label} - {e}")

    # Index itself
    print(f"Fetching index: {index_name}")
    fetch_and_append(index_ticker, index_name)

    # Constituents
    print(f"Fetching {len(constituents)} constituents...")
    for t in constituents:
        fetch_and_append(t, t)

    # Combine all
    big_df = pd.concat(all_rows, ignore_index=True)
    print(f"\nFinal dataset: {big_df['name'].nunique()} assets, {len(big_df):,} rows")

    return big_df

value_df = get_index_data('CAC40', period='1y')
value_df['daily_return_pct'] = value_df.groupby('name')['close'].pct_change() * 100

Fetching index: CAC40
✓ CAC40
Fetching 40 constituents...
✓ AIR.PA
✓ AI.PA
✓ MT.AS
✓ CS.PA
✓ BNP.PA
✓ EN.PA
✓ CAP.PA
✓ CA.PA
✓ ACA.PA
✓ BN.PA
✓ DSY.PA
✓ ENGI.PA
✓ EL.PA
✓ ERF.PA
✓ RMS.PA
✓ KER.PA
✓ LR.PA
✓ OR.PA
✓ MC.PA
✓ ML.PA
✓ ORA.PA
✓ RI.PA
✓ PUB.PA
✓ RNO.PA
✓ SAF.PA
✓ SGO.PA
✓ SAN.PA
✓ SU.PA
✓ GLE.PA
✓ STLAP.PA
✓ STMPA.PA
✓ TEP.PA
✓ HO.PA


$FP.PA: possibly delisted; no price data found  (period=1y) (Yahoo error = "No data found, symbol may be delisted")


✓ FP.PA


$URW.AS: possibly delisted; no price data found  (period=1y) (Yahoo error = "No data found, symbol may be delisted")


✓ URW.AS
✓ VIE.PA
✓ DG.PA
✓ VIV.PA
✓ WLN.PA
✓ ALO.PA

Final dataset: 39 assets, 9,984 rows


#### 2

In [None]:
def sparse_portfolio_selection(data: pd.DataFrame):
    """

Unnamed: 0,name,date,open,high,low,close,volume,daily_return_pct
0,CAC40,2024-10-10 00:00:00+02:00,7543.399902,7563.359863,7515.899902,7541.589844,47654900.0,
1,CAC40,2024-10-11 00:00:00+02:00,7530.879883,7587.520020,7504.040039,7577.890137,52952500.0,0.481335
2,CAC40,2024-10-14 00:00:00+02:00,7583.479980,7613.149902,7544.540039,7602.060059,43689100.0,0.318953
3,CAC40,2024-10-15 00:00:00+02:00,7595.060059,7601.529785,7511.240234,7521.970215,73146200.0,-1.053528
4,CAC40,2024-10-16 00:00:00+02:00,7420.850098,7511.100098,7420.850098,7492.000000,64497000.0,-0.398436
...,...,...,...,...,...,...,...,...
9979,ALO.PA,2025-10-06 00:00:00+02:00,22.309999,22.370001,21.469999,22.350000,1811806.0,-0.178647
9980,ALO.PA,2025-10-07 00:00:00+02:00,22.330000,22.940001,22.290001,22.480000,700551.0,0.581652
9981,ALO.PA,2025-10-08 00:00:00+02:00,22.320000,23.020000,22.290001,22.700001,1030198.0,0.978653
9982,ALO.PA,2025-10-09 00:00:00+02:00,22.870001,23.080000,22.469999,22.600000,1102181.0,-0.440530
