In [1]:
# project contexrt and imports

import os
import pandas as pd
import numpy as np

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 120)



In [2]:
# Define project paths 

PROJECT_ROOT = os.path.abspath("..")

DATA_RAW = os.path.join(PROJECT_ROOT, "data", "raw")
DATA_PROCESSED = os.path.join(PROJECT_ROOT, "data", "processed")

DATA_RAW, DATA_PROCESSED



('c:\\Users\\pawan\\Desktop\\Notes\\CDAC_AI\\Hybrid AI–Driven Quantum Optimization Framework for Intelligent Portfolio Construction\\hybrid-quantum-portfolio\\data\\raw',
 'c:\\Users\\pawan\\Desktop\\Notes\\CDAC_AI\\Hybrid AI–Driven Quantum Optimization Framework for Intelligent Portfolio Construction\\hybrid-quantum-portfolio\\data\\processed')

In [3]:
# Freeze problem definition 

PROBLEM_DEFINITION = {
    "use_case": "Monthly portfolio rebalancing",
    "asset_count": 25,
    "portfolio_type": "Long-only",
    "rebalance_frequency": "Monthly",
    "lookback_years": 3,
    "constraints": {
        "cardinality_max": 10,
        "turnover_max": 0.20,
        "short_selling": False
    }
}

PROBLEM_DEFINITION


{'use_case': 'Monthly portfolio rebalancing',
 'asset_count': 25,
 'portfolio_type': 'Long-only',
 'rebalance_frequency': 'Monthly',
 'lookback_years': 3,
 'constraints': {'cardinality_max': 10,
  'turnover_max': 0.2,
  'short_selling': False}}

In [4]:
# define asset universe 

ASSETS = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "META",
    "TSLA", "NVDA", "JPM", "V", "MA",
    "UNH", "PG", "KO", "PEP", "XOM",
    "CVX", "JNJ", "WMT", "HD", "COST",
    "BAC", "DIS", "NFLX", "ADBE", "CRM"
]


In [5]:
asset_df = pd.DataFrame({"ticker": ASSETS})
asset_df.to_csv(os.path.join(PROJECT_ROOT, "data", "asset_list.csv"), index=False)


In [6]:

# download historical price data

import yfinance as yf
from datetime import datetime

ASSETS = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "META",
    "TSLA", "NVDA", "JPM", "V", "MA",
    "UNH", "PG", "KO", "PEP", "XOM",
    "CVX", "JNJ", "WMT", "HD", "COST",
    "BAC", "DIS", "NFLX", "ADBE", "CRM"
]

START_DATE = "2021-01-01"
END_DATE = datetime.today().strftime("%Y-%m-%d")  # e.g., "2026-01-21"

# Explicitly disable auto_adjust to preserve 'Adj Close'
data = yf.download(
    tickers=ASSETS,
    start=START_DATE,
    end=END_DATE,
    auto_adjust=False,   # ← critical
    progress=False
)

# Extract adjusted close prices
if data.empty:
    raise ValueError("Downloaded data is empty. Check tickers or internet connection.")

prices = data["Adj Close"]

print(prices.head())

Ticker            AAPL        ADBE        AMZN        BAC        COST         CRM        CVX         DIS      GOOGL  \
Date                                                                                                                  
2021-01-04  125.974480  485.339996  159.331497  26.613651  359.688507  217.665329  68.492668  173.592010  85.659294   
2021-01-05  127.531960  485.690002  160.925507  26.817488  355.515900  218.860794  70.344269  174.334534  86.350060   
2021-01-06  123.239059  466.309998  156.919006  28.492472  350.103729  213.555237  72.608223  174.998886  85.497993   
2021-01-07  127.444397  477.739990  158.108002  29.121698  348.116852  215.363266  72.947823  174.471298  88.051704   
2021-01-08  128.544388  485.100006  159.134995  28.829239  350.028046  219.374527  73.643166  174.578766  89.217400   

Ticker              HD         JNJ         JPM         KO          MA        META        MSFT       NFLX       NVDA  \
Date                                           

In [7]:
# clean and align price 

prices = prices.sort_index()
prices = prices.ffill().dropna()

prices.shape


(1270, 25)

In [8]:
# save raw prices

prices.to_csv(os.path.join(DATA_RAW, "prices.csv"))


In [9]:
# compute daily returns

returns = prices.pct_change().dropna()

returns.head()


Ticker,AAPL,ADBE,AMZN,BAC,COST,CRM,CVX,DIS,GOOGL,HD,JNJ,JPM,KO,MA,META,MSFT,NFLX,NVDA,PEP,PG,TSLA,UNH,V,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
2021-01-05,0.012363,0.000721,0.010004,0.007659,-0.011601,0.005492,0.027034,0.004277,0.008064,0.008071,0.011757,0.005441,-0.010993,-0.011579,0.007548,0.000965,-0.00394,0.02221,0.00298,0.006385,0.007317,-0.013448,-0.014925,-0.005323,0.048193
2021-01-06,-0.033661,-0.039902,-0.024897,0.062459,-0.015223,-0.024242,0.032184,0.003811,-0.009868,0.005713,0.00941,0.046956,-0.031813,0.000374,-0.028269,-0.025929,-0.038998,-0.058953,-0.012232,0.010526,0.02839,0.041966,-0.008811,0.006244,0.025517
2021-01-07,0.034123,0.024512,0.007577,0.022084,-0.005675,0.008466,0.004677,-0.003015,0.029869,-0.004896,0.003378,0.032839,-0.011085,0.007922,0.020622,0.028457,0.016784,0.05783,-0.003218,-0.009346,0.079447,0.016032,0.005597,-6.9e-05,0.007846
2021-01-08,0.008631,0.015406,0.006496,-0.010043,0.00549,0.018626,0.009532,0.000616,0.013239,0.010628,-0.002058,0.001104,0.022418,0.011405,-0.004353,0.006093,0.002967,-0.00504,0.012003,-0.000432,0.078403,-0.004492,0.00767,-0.000136,0.011121
2021-01-11,-0.023249,-0.022387,-0.021519,0.0166,-0.016029,-0.017069,0.005709,0.002239,-0.023106,0.007953,-0.004186,0.014924,-0.017228,-0.015967,-0.040102,-0.009698,-0.02214,0.025967,-0.014496,-0.006773,-0.078214,-0.007265,-0.011882,0.004501,0.030356


In [10]:
# save processed returns

returns.to_csv(os.path.join(DATA_PROCESSED, "returns.csv"))


In [11]:
# Rolling volatility calculation

rolling_vol = returns.rolling(window=60).std() * np.sqrt(252)

rolling_vol.tail()


Ticker,AAPL,ADBE,AMZN,BAC,COST,CRM,CVX,DIS,GOOGL,HD,JNJ,JPM,KO,MA,META,MSFT,NFLX,NVDA,PEP,PG,TSLA,UNH,V,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
2026-01-16,0.142341,0.293336,0.330555,0.189021,0.188413,0.328048,0.231158,0.255256,0.287643,0.25698,0.175382,0.237701,0.159665,0.198572,0.329504,0.19799,0.340008,0.347615,0.168619,0.180115,0.432137,0.269016,0.209173,0.223409,0.218853
2026-01-20,0.155306,0.294963,0.336043,0.191406,0.187831,0.329842,0.230691,0.254813,0.293726,0.258205,0.175483,0.245697,0.163677,0.200488,0.332929,0.198367,0.276277,0.359249,0.169784,0.183532,0.440291,0.272659,0.209427,0.223987,0.216607
2026-01-21,0.155237,0.296814,0.334912,0.189651,0.191624,0.330271,0.230953,0.26032,0.295437,0.262697,0.175339,0.245767,0.161649,0.200646,0.334802,0.202808,0.278659,0.363695,0.169062,0.183939,0.441887,0.278747,0.209364,0.223913,0.22047
2026-01-22,0.152974,0.300093,0.334749,0.187516,0.190837,0.336511,0.230424,0.259073,0.291736,0.263288,0.173234,0.242627,0.161533,0.201974,0.355833,0.205581,0.279647,0.361156,0.172062,0.191186,0.444381,0.281394,0.20929,0.225312,0.219946
2026-01-23,0.1448,0.298903,0.33637,0.188848,0.191152,0.336416,0.230336,0.262076,0.2851,0.263584,0.17343,0.244668,0.163618,0.204027,0.355903,0.215006,0.288717,0.357869,0.171357,0.191766,0.435621,0.280877,0.209242,0.222294,0.220412


In [12]:
# save rolling volatilities

rolling_vol.to_csv(os.path.join(DATA_PROCESSED, "volatility.csv"))


In [13]:
# correlation matrix computation

corr_matrix = returns.corr()
corr_matrix


Ticker,AAPL,ADBE,AMZN,BAC,COST,CRM,CVX,DIS,GOOGL,HD,JNJ,JPM,KO,MA,META,MSFT,NFLX,NVDA,PEP,PG,TSLA,UNH,V,WMT,XOM
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
AAPL,1.0,0.516796,0.557505,0.34072,0.467038,0.46645,0.233486,0.404187,0.578014,0.432085,0.15337,0.343239,0.266709,0.504609,0.48767,0.629304,0.408096,0.520267,0.279363,0.241799,0.500377,0.138463,0.482255,0.28854,0.187764
ADBE,0.516796,1.0,0.533435,0.244084,0.427808,0.613113,0.1307,0.353472,0.50177,0.404335,0.067043,0.263631,0.192955,0.464597,0.471482,0.595392,0.418179,0.508774,0.202815,0.174533,0.353941,0.135694,0.450599,0.18729,0.094016
AMZN,0.557505,0.533435,1.0,0.343214,0.427143,0.547078,0.178951,0.443831,0.615984,0.403782,0.010444,0.344451,0.105591,0.435191,0.601072,0.654076,0.478547,0.555291,0.107257,0.083281,0.448102,0.090847,0.40216,0.241102,0.119795
BAC,0.34072,0.244084,0.343214,1.0,0.252542,0.317494,0.43561,0.488234,0.311319,0.384021,0.169298,0.813549,0.205899,0.443223,0.286157,0.291225,0.224286,0.274703,0.149328,0.180736,0.306071,0.174686,0.447171,0.22983,0.400513
COST,0.467038,0.427808,0.427143,0.252542,1.0,0.372247,0.137861,0.259384,0.356759,0.496156,0.203159,0.271667,0.385425,0.40184,0.358341,0.464255,0.330727,0.373058,0.353786,0.382162,0.337985,0.188476,0.388415,0.554678,0.107131
CRM,0.46645,0.613113,0.547078,0.317494,0.372247,1.0,0.164438,0.390647,0.46315,0.341851,0.040018,0.310172,0.14492,0.449033,0.460277,0.554507,0.423453,0.487728,0.15366,0.120401,0.375181,0.14425,0.418226,0.16925,0.132732
CVX,0.233486,0.1307,0.178951,0.43561,0.137861,0.164438,1.0,0.311725,0.178817,0.200626,0.155271,0.400636,0.163054,0.267881,0.125531,0.137548,0.104156,0.144365,0.133965,0.074926,0.136963,0.137425,0.248991,0.146495,0.854865
DIS,0.404187,0.353472,0.443831,0.488234,0.259384,0.390647,0.311725,1.0,0.350553,0.392727,0.119708,0.463607,0.210029,0.504077,0.368294,0.389315,0.401379,0.35302,0.174788,0.171154,0.336463,0.106676,0.473994,0.220564,0.288436
GOOGL,0.578014,0.50177,0.615984,0.311319,0.356759,0.46315,0.178817,0.350553,1.0,0.323745,0.070407,0.317439,0.138036,0.414737,0.560668,0.637585,0.386429,0.527311,0.130091,0.111874,0.417003,0.119028,0.385479,0.196016,0.119625
HD,0.432085,0.404335,0.403782,0.384021,0.496156,0.341851,0.200626,0.392727,0.323745,1.0,0.253537,0.353176,0.329292,0.421103,0.33922,0.38881,0.22883,0.310043,0.322208,0.332391,0.267624,0.193435,0.395158,0.352377,0.159649


In [14]:
# save correlation matrix

corr_matrix.to_pickle(os.path.join(DATA_PROCESSED, "correlations.pkl"))


In [15]:
# Baseline portfolio ( Equal weight)

n_assets = len(ASSETS)
equal_weights = np.ones(n_assets) / n_assets

portfolio_variance = equal_weights.T @ returns.cov().values @ equal_weights
portfolio_volatility = np.sqrt(portfolio_variance * 252)

portfolio_volatility


np.float64(0.17199499517188938)

In [16]:
baseline_metrics = {
    "portfolio_volatility": portfolio_volatility,
    "turnover": 0.0,
    "transaction_cost": 0.0
}

baseline_metrics


{'portfolio_volatility': np.float64(0.17199499517188938),
 'turnover': 0.0,
 'transaction_cost': 0.0}

In [17]:
# save data setup summary 

summary = {
    "assets": ASSETS,
    "data_start": returns.index.min(),
    "data_end": returns.index.max(),
    "baseline_volatility": portfolio_volatility
}

summary


{'assets': ['AAPL',
  'MSFT',
  'GOOGL',
  'AMZN',
  'META',
  'TSLA',
  'NVDA',
  'JPM',
  'V',
  'MA',
  'UNH',
  'PG',
  'KO',
  'PEP',
  'XOM',
  'CVX',
  'JNJ',
  'WMT',
  'HD',
  'COST',
  'BAC',
  'DIS',
  'NFLX',
  'ADBE',
  'CRM'],
 'data_start': Timestamp('2021-01-05 00:00:00'),
 'data_end': Timestamp('2026-01-23 00:00:00'),
 'baseline_volatility': np.float64(0.17199499517188938)}