In [1]:
using CSV, DataFrames, Dates, TSFrames, Statistics, PortfolioAnalytics, LinearAlgebra, TimerOutputs, NearestCorrelationMatrix

# Create a TimerOutput, this will store all our timing information
#const to = TimerOutput();

In [2]:
function load_stocks(data_dir::String, stock_files::Vector{String})
    combined_df = DataFrame()  # Create an empty DataFrame to store the combined data
    
    # Iterate over each stock file
    for file in stock_files
        path = joinpath(data_dir, file)  # Get the full path of the file
        
        if isfile(path)
            # Load the CSV file into a DataFrame
            df = CSV.read(path, DataFrame)
            
            # Select required columns (including the ticker symbol)
            select!(df, [:Date, :Ticker, :Close, :Sector])
            
            # Append the DataFrame to the combined DataFrame
            append!(combined_df, df)
        else
            println("File not found: $file")  # Print an error if file is missing
        end
    end
    
    return combined_df
end

load_stocks (generic function with 1 method)

In [3]:
# Specify the data directory and list of stock files to load
data_dir = "C:/Users/Z2005081/Downloads/COPY FROM SCHOOL PC/DATA_MINNER/FULL_2020_2025_11_SECTION"
stock_files = [
    "Consumer_Discretionary.csv", "Consumer_Staples.csv", "Energy.csv",
    "Financials.csv", "Health_Care.csv", "Industrials.csv",         # Note: It must be UFT-8 CSV
    "Information_Technology.csv", "Materials.csv", "Real_Estate.csv", "Communication_Services.csv", "Utilities.csv"
]

# Call the function to load the stocks
combined_df = load_stocks(data_dir, stock_files)

# Display the combined DataFrame
println("\nStock Data:\n")
show(combined_df)


Stock Data:

[1m716100×4 DataFrame[0m
[1m    Row [0m│[1m Date                      [0m[1m Ticker  [0m[1m Close   [0m[1m Sector                 [0m
        │[90m String31                  [0m[90m String7 [0m[90m Float64 [0m[90m String31               [0m
────────┼─────────────────────────────────────────────────────────────────────
      1 │ 2020-01-02 00:00:00-05:00  AMZN     94.9005  Consumer Discretionary
      2 │ 2020-01-03 00:00:00-05:00  AMZN     93.7485  Consumer Discretionary
      3 │ 2020-01-06 00:00:00-05:00  AMZN     95.144   Consumer Discretionary
      4 │ 2020-01-07 00:00:00-05:00  AMZN     95.343   Consumer Discretionary
      5 │ 2020-01-08 00:00:00-05:00  AMZN     94.5985  Consumer Discretionary
      6 │ 2020-01-09 00:00:00-05:00  AMZN     95.0525  Consumer Discretionary
      7 │ 2020-01-10 00:00:00-05:00  AMZN     94.158   Consumer Discretionary
      8 │ 2020-01-13 00:00:00-05:00  AMZN     94.565   Consumer Discretionary
      9 │ 2020-01-14 0

In [4]:
df = DataFrame(combined_df)

# Group by sector and count the number of stocks in each sector
sector_counts = combine(groupby(df, :Sector), DataFrames.nrow => :Count)

# Display the count of stocks in each sector
println("\nNumber of Stocks in Each Sector:\n")
show(sector_counts, allrows=true, allcols=true)



Number of Stocks in Each Sector:

[1m11×2 DataFrame[0m
[1m Row [0m│[1m Sector                 [0m[1m Count [0m
     │[90m String31               [0m[90m Int64 [0m
─────┼───────────────────────────────
   1 │ Consumer Discretionary  65100
   2 │ Consumer Staples        65100
   3 │ Energy                  65100
   4 │ Financials              65100
   5 │ Health Care             65100
   6 │ Industrials             65100
   7 │ Information Technology  65100
   8 │ Materials               65100
   9 │ Real Estate             65100
  10 │ Communication Services  65100
  11 │ Utilities               65100

In [5]:
df = DataFrame(combined_df)

# Group by sector and count the number of unique tickers in each sector
sector_ticker_counts = combine(groupby(df, :Sector), :Ticker => (tickers -> length(unique(tickers))) => :Ticker_Count)

# Display the count of unique tickers in each sector
println("\nNumber of Unique Tickers in Each Sector:\n")
show(sector_ticker_counts, allrows=true, allcols=true)



Number of Unique Tickers in Each Sector:

[1m11×2 DataFrame[0m
[1m Row [0m│[1m Sector                 [0m[1m Ticker_Count [0m
     │[90m String31               [0m[90m Int64        [0m
─────┼──────────────────────────────────────
   1 │ Consumer Discretionary            50
   2 │ Consumer Staples                  50
   3 │ Energy                            50
   4 │ Financials                        50
   5 │ Health Care                       50
   6 │ Industrials                       50
   7 │ Information Technology            50
   8 │ Materials                         50
   9 │ Real Estate                       50
  10 │ Communication Services            50
  11 │ Utilities                         50

In [6]:
# Count the number of unique stocks (replace "Ticker" with the actual column name)
num_stocks = length(unique(combined_df.Ticker))
println("\nNumber of unique stocks: ", num_stocks)


550ber of unique stocks: 


In [7]:
println("Unique tickers in daily_df: ", length(unique(combined_df.Ticker)))

Unique tickers in daily_df: 550


In [8]:
# Extract and print all unique tickers (replace "Ticker" with your actual column name)
unique_tickers = sort(unique(combined_df.Ticker))  # Sorting for easier reading
println("\nUnique Stock Tickers:")
for ticker in unique_tickers
    println(ticker)
end

# Print the total number of unique tickers
println("\nTotal number of unique stocks: ", length(unique_tickers))


Unique Stock Tickers:
A
AA
AAL
AAP
AAPL
ABBV
ABT
ACN
ADBE
ADC
ADI
ADM
ADP
AEE
AEP
AES
AFL
AIG
AJG
AKAM
ALB
ALE
ALGN
ALL
ALLE
ALLY
AMAT
AMD
AMGN
AMP
AMT
AMX
AMZN
ANSS
AOS
APA
APD
APH
AR
ARE
ARTNA
ASH
ATI
ATO
ATUS
AVA
AVB
AVGO
AVT
AVY
AWK
AWR
AXP
AZO
BA
BAC
BALL
BBY
BCPC
BDX
BEN
BG
BHP
BIDU
BIIB
BILI
BIO
BK
BKH
BKNG
BKR
BLK
BMY
BP
BR
BRX
BSX
BURL
BXP
C
CABO
CACC
CAG
CAH
CASY
CAT
CB
CBT
CC
CCI
CCOI
CDNS
CE
CF
CFG
CHD
CHTR
CHWY
CHX
CI
CL
CLF
CLX
CMC
CMCSA
CME
CMG
CMI
CMS
CNC
CNP
CNQ
COF
COP
COST
COTY
CPB
CPT
CRM
CSCO
CSX
CTAS
CTRA
CTSH
CTVA
CUBE
CVE
CVS
CVX
CWT
CZR
D
DAL
DAR
DD
DE
DEA
DEI
DEO
DFS
DG
DGX
DHI
DHR
DINO
DIS
DLR
DNP
DOC
DOV
DOW
DOYU
DRI
DTE
DUK
DVA
DVN
DXC
E
EA
EBAY
EC
ECL
ED
EFX
EGP
EIX
EL
ELV
EMN
EMR
ENB
EOG
EQIX
EQNR
EQR
EQT
ES
ESS
ETN
ETSY
EVRG
EW
EXC
EXP
EXPD
EXR
F
FANG
FAST
FCX
FDP
FDS
FDX
FE
FIS
FITB
FIZZ
FLO
FLS
FMC
FND
FOX
FR
FRT
FTI
FTNT
FUL
FWONA
GD
GE
GILD
GIS
GLW
GM
GMRE
GNE
GOOG
GPC
GPN
GRMN
GS
GWW
HAIN
HAL
HAS
HBAN
HCA
HD
HE
HES
HII
HIW
HLT
HOLX
HON
HP
HPQ
HR
HR

In [60]:
# Extract unique tickers from the combined DataFrame
tickers = unique(combined_df.Ticker)

# Filter the combined DataFrame by the extracted tickers
filtered_df = filter(:Ticker => t -> t in tickers, combined_df)

# Select only the required columns
filtered_df = filtered_df[:, ["Date", "Ticker", "Close"]]

# Correctly parse the DateTime with the specified format
filtered_df[!, "Date"] = DateTime.(filtered_df.Date, "yyyy-mm-dd HH:MM:SS-HH:MM")

# Define the date range for filtering
start_date = DateTime("2024-01-02")
end_date = DateTime("2025-03-08")


# Filter the DataFrame for the specified date range
daily_df = filter(row -> start_date <= row.Date <= end_date, filtered_df)

# Sort the DataFrame by Date and Ticker
sort!(daily_df, [:Date, :Ticker])


Row,Date,Ticker,Close
Unnamed: 0_level_1,DateTime,String7,Float64
1,2024-01-02T05:00:00,A,137.794
2,2024-01-02T05:00:00,AA,32.816
3,2024-01-02T05:00:00,AAL,13.44
4,2024-01-02T05:00:00,AAP,60.5056
5,2024-01-02T05:00:00,AAPL,184.532
6,2024-01-02T05:00:00,ABBV,152.743
7,2024-01-02T05:00:00,ABT,107.138
8,2024-01-02T05:00:00,ACN,341.431
9,2024-01-02T05:00:00,ADBE,580.07
10,2024-01-02T05:00:00,ADC,60.2293


In [61]:
# Pivot the DataFrame to get the desired format, handling duplicates by taking the mean
pivoted_df = unstack(daily_df, :Date, :Ticker, :Close, combine = mean)

# Sort by Date (Index)
sort!(pivoted_df, :Date)

# Convert the DataFrame to a TSFrame
tsframe = TSFrame(pivoted_df, :Date)


[1m296×550 TSFrame with DateTime Index[0m
[1m Index               [0m[1m A        [0m[1m AA       [0m[1m AAL      [0m[1m AAP      [0m[1m AAPL     [0m[1m ABBV  [0m ⋯
[90m DateTime            [0m[90m Float64? [0m[90m Float64? [0m[90m Float64? [0m[90m Float64? [0m[90m Float64? [0m[90m Float6[0m ⋯
────────────────────────────────────────────────────────────────────────────────
 2024-01-02T05:00:00   137.794   32.816      13.44   60.5056   184.532   152.7 ⋯
 2024-01-03T05:00:00   130.256   31.023      12.95   59.5778   183.15    153.3
 2024-01-04T05:00:00   130.097   30.6684     13.09   59.0797   180.824   154.3
 2024-01-05T05:00:00   129.66    31.6535     13.6    60.0856   180.099   154.9
 2024-01-08T05:00:00   132.461   31.2595     14.58   60.4665   184.453   154.2 ⋯
 2024-01-09T05:00:00   129.779   31.4663     14.38   59.9587   184.035   155.1
 2024-01-10T05:00:00   130.187   31.1806     14.35   59.5582   185.079   157.5
 2024-01-11T05:00:00   128.786   30.4

In [62]:
all_tickers = unique(daily_df.Ticker)
pivoted_df = unstack(daily_df, :Date, :Ticker, :Close, combine = mean)
for ticker in setdiff(all_tickers, names(pivoted_df)[2:end])
    pivoted_df[!, ticker] = fill(NaN, nrow(pivoted_df))
end

In [63]:
# Calculate the log returns for each stock
asset_returns = asset_return(tsframe, method = "log")

# Extract returns as vectors for correlation analysis
returns_vectors = Dict(ticker => collect(skipmissing(asset_returns[:, ticker])) for ticker in names(asset_returns)[1:end])

# Convert the returns vectors to a DataFrame for better visualization
returns_df = DataFrame(returns_vectors)


Row,A,AA,AAL,AAP,AAPL,ABBV,ABT,ACN,ADBE,ADC,ADI,ADM,ADP,AEE,AEP,AES,AFL,AIG,AJG,AKAM,ALB,ALE,ALGN,ALL,ALLE,ALLY,AMAT,AMD,AMGN,AMP,AMT,AMX,AMZN,ANSS,AOS,APA,APD,APH,AR,ARE,ARTNA,ASH,ATI,ATO,ATUS,AVA,AVB,AVGO,AVT,AVY,AWK,AWR,AXP,AZO,BA,BAC,BALL,BBY,BCPC,BDX,BEN,BG,BHP,BIDU,BIIB,BILI,BIO,BK,BKH,BKNG,BKR,BLK,BMY,BP,BR,BRX,BSX,BURL,BXP,C,CABO,CACC,CAG,CAH,CASY,CAT,CB,CBT,CC,CCI,CCOI,CDNS,CE,CF,CFG,CHD,CHTR,CHWY,CHX,CI,⋯
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,⋯
1,-0.0562558,-0.0561878,-0.0371395,-0.0154538,-0.00751575,0.00399652,-0.00300854,-0.0262849,-0.014377,-0.0157108,-0.0241554,0.00507235,-0.00390627,-0.00446931,0.00240154,-0.0214274,-0.00108134,-0.00334663,8.8906e-5,-0.00488872,-0.0467567,0.00863182,-0.0483775,0.00824065,-0.0222991,-0.0202835,-0.0190969,-0.0238054,0.0110354,0.00397584,-0.0190086,-0.014177,-0.00978554,-0.0280964,-0.0340964,0.0175683,-0.00958977,-0.0182383,0.00660939,-0.0311029,-0.02457,-0.0306934,-0.0358494,0.00927849,-0.0413414,0.00746176,-0.0262761,-0.0250018,-0.0164005,-0.0148337,0.0,-0.0117515,-0.0106238,-0.000354321,-0.0316769,-0.0109744,-0.0200308,-0.0239713,-0.0240208,-0.0203757,-0.0354449,-0.0121922,-0.0108077,0.0201698,-0.0161166,0.0388562,-0.028231,-0.0111346,-0.00108754,-0.0206949,-0.000294885,-0.0203863,-0.00875685,0.0139863,-0.0139546,-0.0239716,-0.00415804,-0.0366376,-0.0387436,0.0112486,-0.0223403,-0.0337029,-0.00850196,0.0203528,-0.00512884,-0.0291523,-0.0041432,-0.0514456,-0.0264641,-0.0272061,-0.0136899,-0.0216087,-0.0293828,0.0132678,-0.0417425,-0.00657757,-0.0124394,-0.0680346,-0.00346616,-0.0209863,⋯
2,-0.00122057,-0.0114979,0.0107528,-0.00839572,-0.0127816,0.00621273,0.0132427,-0.00245939,-0.00832429,0.00701315,-0.0154125,-0.0190508,0.00493388,0.00325255,0.000120046,-0.00317472,-0.0107559,0.00029151,0.0039031,-0.0145495,-0.0307873,-0.00382721,0.0198623,0.0238513,0.00623517,-0.0110275,-0.0142307,0.00508597,0.00821408,-0.00394949,0.00358002,-0.00164872,-0.0266191,0.00273503,0.00847634,-0.0763083,-0.00225461,-0.00474406,-0.0168295,0.0025497,-0.0205617,-0.0217954,0.021755,-0.003565,-0.0363676,-0.00330939,0.00163539,-0.00908154,-0.0154139,-0.00535155,-0.0131943,-0.0158442,0.00761686,-0.00397276,0.00421397,0.00802029,0.0142225,-0.00172491,0.00461339,0.0100956,0.00704968,-0.0132519,-0.00882127,0.00533875,-0.0120297,-0.0319893,-0.00749201,0.00404582,-0.00308782,0.00312469,-0.0199501,0.00229277,0.00655488,-0.00668902,-0.000152205,0.00308313,0.00984719,0.00159568,-0.00173163,0.00242066,-0.0178359,-0.00203189,-0.019311,0.00789384,0.00987292,0.00631146,0.00392323,-0.014444,-0.0103932,0.00448382,-0.0158977,-0.00530009,-0.0032669,-0.0190436,0.00558485,0.00604872,-0.0286476,-0.00720982,-0.0345443,0.00839511,⋯
3,-0.00336453,0.0316182,0.0382212,0.0168841,-0.00402098,0.0042027,-0.00162319,-0.00139523,-0.00432999,0.00285491,0.00257639,-0.0131877,0.00661176,0.00189226,0.0081213,-0.00371659,0.00509033,0.00552159,-0.00274839,-0.00262051,-0.00073578,-0.00432308,0.0126228,0.00991699,0.00416247,0.0262075,-0.00207833,0.0187194,-0.00056102,0.00804055,-0.00549167,0.00548561,0.00462371,-0.000232527,-0.00429243,0.00848091,-0.000370232,-0.00179803,0.0376907,0.00666193,-0.0183778,0.00427201,-0.0110498,0.00322614,0.0133781,0.000275996,-0.00502386,0.000257359,-0.00315344,0.0020931,0.000986337,-0.00937053,0.0102074,-0.00253702,0.0164396,0.0184675,0.0129781,0.00701392,-0.0227121,-0.00338139,0.0142983,-0.00944318,-0.00859673,-0.0014378,-0.00926359,-0.0499885,-0.000316045,0.00727969,0.00435654,-0.00345624,0.00509965,-0.00397771,0.00364425,0.00752194,-0.00376158,-0.00706106,-0.000171896,0.0100477,0.0160461,0.0103609,0.011833,0.0248634,-0.0179172,0.00151464,0.0193173,0.00984291,0.0017582,-0.00206201,0.0209991,-0.00201971,0.00309256,-0.0126223,0.00253436,0.0,0.031971,-0.00690049,-0.00949265,-0.0145775,0.00539279,0.0237827,⋯
4,0.0213694,-0.0125276,0.0695809,0.00631949,0.0238872,-0.00438862,0.0143372,0.0110198,0.0278584,0.00898689,0.0129419,0.00352437,0.0065682,0.00820406,0.00522009,0.0142595,0.00181181,-0.0115136,0.015679,0.0144149,0.00550645,0.00272413,0.0343876,-0.00400846,0.00770765,-0.0131619,0.0170351,0.0533909,0.0256742,0.00610538,0.00836515,0.0016397,0.0262297,0.0131371,0.00290574,-0.0179231,0.00990817,0.0221952,-0.0103762,0.0252882,0.0137504,0.0178931,0.00715025,0.00734699,0.048632,0.00879866,0.010185,0.0240769,0.0104735,-0.00357628,0.00868139,0.00758019,0.000793198,0.00117535,-0.0837309,-0.00787307,-0.0038408,-0.00383173,0.0160649,0.0230298,0.0154617,0.0115633,0.0,-0.0021182,0.000387726,0.0,0.0175454,0.00589982,0.0149216,0.0159224,-0.0297621,0.0181776,-0.00845992,-0.016793,0.0184186,0.00662107,0.00446044,0.0203132,0.0103218,-0.00590735,0.034215,0.00953753,0.0182653,-0.00521597,0.00438654,0.0114253,-0.00660942,-0.0032306,0.00255428,0.00866423,-0.00107455,0.0357583,-0.00648197,-0.0243259,0.00686889,0.00531242,0.0177617,0.0126461,-0.00791936,0.000127525,⋯
5,-0.0204507,0.00659655,-0.0138124,-0.00843479,-0.00226591,0.00543637,0.00293157,0.00702673,0.00968514,-0.00251458,0.00411879,-0.00550344,-0.00652577,-0.00860924,-0.00450651,-0.011073,-0.0283951,-0.00573321,-0.0118688,-0.00250335,-0.0215365,-0.00449076,0.0129838,0.0106539,0.00258287,-0.0174322,-0.00350302,0.020851,-0.0117128,-0.0224379,-0.019723,-0.0181873,0.0151099,0.0151445,0.00716476,-0.0227895,-0.0175627,-0.00758719,0.0185151,-0.0109267,-0.0224084,-0.00160227,-0.0336559,-0.00413136,-0.131725,-0.00879866,-0.00777909,0.00708282,-0.013637,5.12687e-5,-0.00225737,-0.00217817,-0.0129797,-0.0128666,-0.0142495,-0.0156369,-0.00175069,-0.00504309,-0.00902691,-0.0407412,-0.00718574,-0.0254316,-0.0217432,-0.014524,-0.00832987,-0.0245802,-0.0131616,0.00473268,-0.0104037,0.00759206,-0.0256024,-0.00335488,-0.00989633,-0.0153586,0.0,-0.0017612,0.0137699,0.0140285,0.00323017,-0.00986152,0.00247502,-0.0101179,0.0203281,-0.0172633,-0.00903965,0.000136826,-0.00394235,-0.0164431,-0.0209449,-0.00875197,-0.024901,-0.000611098,-0.0105136,-0.0102814,-0.00806818,0.0169166,-0.027454,0.0219894,-0.0293383,-0.00242615,⋯
6,0.00313257,-0.00912101,-0.00208839,-0.00670101,0.00565535,0.0157707,0.0147056,0.00749917,0.00820578,-0.0171404,-0.00735205,-0.0191433,0.000297485,-0.00324771,-0.0190803,0.00105974,0.0256157,-0.00354468,0.017577,0.00327902,-0.00547617,-0.0100163,-0.00263384,-0.0044476,-0.0158444,-0.00470036,-0.00811066,-0.00483548,-0.00879325,0.00551785,-0.013735,0.00443939,0.0154707,0.00256761,0.00798419,-0.0122066,-0.00422395,0.00374877,-0.0237439,0.00613681,0.0169828,0.00688378,0.014864,-0.00270731,-0.0293061,-0.00331935,0.0032714,-0.00177531,0.000844424,0.00347392,0.00660751,0.0116051,-0.00968415,0.00627859,0.00917115,-0.000892451,0.00402204,0.00808315,0.00162805,0.00239369,-0.00137458,-0.018374,-0.0145009,-0.0035346,-0.00631288,-0.00647257,0.00936539,-0.00416359,-0.00270836,0.00910647,-0.0124106,-0.00272237,-0.0121641,-0.00546068,0.00980177,0.0113936,0.00471617,0.00106706,0.0104611,-0.00863859,-0.00737186,0.0076491,-0.00307445,0.00790136,-0.00014123,0.00361998,-0.000266306,0.0107318,-0.00817387,-0.00308147,0.0265123,0.00575232,0.00937326,-0.00383473,0.00119928,0.00519577,-0.00437729,-0.0589121,-0.0180253,-0.0165976,⋯
7,-0.0108142,-0.0246295,0.0165864,0.0276081,-0.00322781,-0.00675463,-0.00307889,0.00761589,0.0108707,0.00239838,0.00312744,0.00201717,0.000212653,-0.0216476,-0.0262689,-0.046066,0.000604888,-0.00444825,0.0080061,0.0102846,-0.0328024,-0.011759,-0.00155482,0.000399137,-0.0117808,-0.0112526,0.0141836,-0.00350681,-0.00483822,0.000740564,-0.00633681,0.021365,0.0093879,0.00822294,0.00223411,-0.00338308,-0.00891734,0.00218038,-0.00613496,-0.0255701,-0.0193571,0.00561938,-0.000937229,-0.0227057,-0.030191,-0.0170468,-0.00523941,0.0178034,0.00210836,0.00325878,-0.0157651,-0.0190726,0.000324321,0.00373342,-0.0229977,-0.0134832,-0.0115852,-0.0148919,-0.0073115,-0.0143638,-0.00967537,0.00431054,-0.00283098,0.013127,-0.0280011,0.0110702,0.000218136,0.0,-0.0250815,0.00880212,-0.00192315,0.000315483,-0.00872325,-0.00259682,-0.00377,-0.00611893,0.00167902,-0.00995285,-0.0301431,-0.0178881,0.00889345,-0.00439575,-0.0165577,0.0146726,0.014524,-0.00440734,0.010862,-0.00902274,-0.00791045,-0.00610265,-0.0244478,0.0133197,-0.0108649,0.00892519,-0.0230383,0.0109269,0.00168793,0.00450339,0.00980398,-0.00107282,⋯
8,0.00660986,-0.0279117,-0.0993623,-0.0182661,0.00177655,0.00104739,0.00369356,0.023741,-0.00159127,0.00382508,-0.00318039,0.00129461,0.00258863,-0.00430277,0.0134662,0.0224854,0.000604427,0.00311598,0.0119967,0.00696748,-0.0204168,0.000985221,-0.0172266,0.00437961,-0.00740278,-0.0165144,-0.00461741,-0.00991254,0.0111878,-0.00495589,0.00853565,0.0182605,-0.00361522,-0.0321211,0.00556339,0.00246156,-0.00174,0.000829284,0.0303054,0.00301445,0.00421735,-0.00979322,0.00584319,0.00346051,-0.0510916,0.00590148,-0.00114978,0.00697566,-0.00803553,0.00203115,0.00515687,0.0157706,-0.0162916,0.00877695,-0.022528,-0.0106143,-0.00495569,-0.010639,0.00341388,0.00534718,-0.0139861,-0.00452137,-0.00362932,-0.0725487,0.00367428,-0.0232029,-0.00710051,0.0394177,-0.00837758,-0.0141413,0.0111661,0.00878016,0.00179051,0.00719747,0.00688454,0.00306414,0.00552068,-0.0164985,0.000428885,0.0103153,-0.0138866,-0.0221017,0.00693242,0.000567309,0.00139213,-0.00566551,-0.000702907,0.00144389,-0.000993153,-8.88466e-5,0.0112116,0.00638905,-6.80463e-5,-0.0203893,-0.0301968,0.00112705,-0.00742646,-0.0454508,-0.00112627,-0.00777286,⋯
9,-7.66262e-5,-0.0746461,-0.00151519,0.00163002,-0.0123936,-0.00568123,-0.0014055,-0.00497962,0.00190922,-0.00750507,-0.00303077,-0.00663479,-0.00284364,-0.00950409,-0.00627809,-0.0302804,-0.00254126,-0.0041568,-0.00568043,0.00312806,-0.000793721,-0.0192218,-0.0155077,0.00310715,-0.00309372,0.000302492,0.0164588,0.0798328,-0.00993471,-0.00781463,-0.00661109,-0.0236953,-0.00948731,-0.0565813,-0.00160387,-0.0398109,-0.0140305,-0.00373757,-0.0629194,0.00245234,-0.0331583,-0.0240286,-0.0129004,-0.0148782,0.00402415,-0.0511605,-0.0131795,0.00655091,-0.00532201,-0.00478011,-0.0137085,-0.000509073,-0.00589512,0.038358,-0.0822039,-0.0209497,-0.0157371,-0.0274517,-0.0240006,-0.0053897,-0.0199154,-0.000948959,-0.0278909,-0.0431695,-0.00780887,-0.0589827,-0.01054,0.00979681,-0.0363607,0.000371231,-0.0273372,-0.0103332,-0.00618079,-0.016778,-0.00217423,-0.0158593,0.00515853,0.00836087,-0.00688469,-0.0143557,-0.0187839,-0.00524281,-0.0128631,0.00556151,0.00367992,-0.0110105,0.000790702,-0.0244303,-0.0200742,-0.0168206,0.0133704,0.0331862,-0.0192518,-0.0133022,-0.0140041,0.0052092,0.00938309,-0.0195205,-0.0216451,0.00630777,⋯
10,-0.0186358,-0.0250695,-0.0199087,-0.00588045,-0.00518682,0.003462,0.00149324,0.00107503,-0.00264708,-0.0164838,-0.0126469,-0.00260839,0.000467347,-0.0125768,-0.0155563,-0.0277719,0.00362839,-0.00701662,0.00222486,-0.00643581,-0.0425769,-0.00637802,-0.0299821,0.00690669,-0.00579517,-0.0149414,-0.00253947,0.00896806,0.00197478,-0.00378258,-0.0150125,-0.016484,-0.0095123,0.00857584,-0.0106768,-0.0148199,-0.00119094,-0.00846081,0.00407887,-0.0269885,0.0019013,-0.0116604,-0.0147446,-0.0118158,-0.0969179,0.000589535,-0.00987378,-0.0101776,-0.0139711,0.000968151,-0.0153771,-0.0197951,0.000165715,-0.000131153,0.0125875,-0.0100126,-0.0154394,-0.0100702,-0.00320516,-0.00298318,-0.0170262,-0.00316949,-0.0180422,-0.0222554,-0.00374394,-0.0231748,-0.0358485,-0.00961461,-0.00466385,-0.00394658,0.00390499,-0.00500376,-0.000400213,-0.0111471,-0.00103926,-0.00847285,0.0117153,0.0133562,-0.0375411,-0.00988083,-0.0364921,-0.00961924,0.0114804,-0.00868584,-0.00535082,-0.0303984,0.00228084,-0.00904606,-0.02602,-0.0212473,-0.0130986,-0.00784879,-0.0121794,-0.0125505,0.0168445,0.00548609,-0.0250158,0.0116526,0.00497802,0.00500471,⋯


In [64]:
# Convert the DataFrame to a matrix
returns_matrix = Matrix(returns_df)


295×550 Matrix{Float64}:
 -0.0562558    -0.0561878    -0.0371395   …  -0.0290514    -0.0186913
 -0.00122057   -0.0114979     0.0107528      -0.0040277     0.00573702
 -0.00336453    0.0316182     0.0382212       0.000896423   0.00416562
  0.0213694    -0.0125276     0.0695809       0.0211312     0.00664964
 -0.0204507     0.00659655   -0.0138124      -0.00117047   -0.00107122
  0.00313257   -0.00912101   -0.00208839  …  -0.00190486    0.0143399
 -0.0108142    -0.0246295     0.0165864       0.00468247   -0.0104178
  0.00660986   -0.0279117    -0.0993623       0.0282114     0.0112727
 -7.66262e-5   -0.0746461    -0.00151519     -0.0194905    -0.0303648
 -0.0186358    -0.0250695    -0.0199087      -0.0167826    -0.0108888
  0.0180995    -0.0159469     0.0665666   …  -0.00931473    0.000167991
  0.00580839    0.0225476    -0.011645        0.0252287    -0.0111909
  0.0100847     0.0094581    -0.00293255      0.00793477    0.0148588
  ⋮                                       ⋱                

In [67]:
A = cor(returns_matrix)


550×550 Matrix{Float64}:
 1.0         0.180324    0.179886    …   0.309042    0.228446    0.283332
 0.180324    1.0         0.238562        0.234882    0.119067    0.137479
 0.179886    0.238562    1.0             0.191537    0.149666    0.113521
 0.171505    0.169227    0.215886        0.188025    0.00192242  0.139253
 0.109031    0.139368    0.146015        0.232247    0.17866     0.127991
 0.174613   -0.0151612   0.0291155   …   0.0699976  -0.0361176   0.141966
 0.226759   -0.0430993   0.0168183       0.0571697   0.117557    0.293217
 0.258958    0.127923    0.149801        0.116319    0.288045    0.173855
 0.138474    0.0399881   0.147147        0.234428    0.210228    0.086072
 0.178138    0.0714902   0.0277416       0.164553    0.0471774   0.317152
 0.377385    0.31267     0.253275    …   0.251523    0.196444    0.177118
 0.0706985   0.13969     0.0587523       0.0187841   0.0255106   0.032967
 0.180588    0.0412681   0.142261        0.151448    0.181324    0.0823327
 ⋮          

In [68]:
eigvals(A)

550-element Vector{Float64}:
  -9.790324997736502e-15
  -8.887280656269296e-15
  -4.711371334814133e-15
  -3.865521854499892e-15
  -3.6031778059943086e-15
  -3.407822472397583e-15
  -3.167020226034407e-15
  -2.796474980776787e-15
  -2.669356124742055e-15
  -2.565714648039858e-15
  -2.2271619451824195e-15
  -2.2030497917076423e-15
  -2.103563099610509e-15
   ⋮
   5.332937181563127
   5.762241144572178
   6.093873520391833
   6.461197532053829
   6.990073749550197
   8.636126735811605
  10.5992666758315
  14.03573532157519
  16.29948868153619
  23.426399745610723
  43.41436704988023
 110.1124918636684

In [69]:
using LinearAlgebra
using Random
using TimerOutputs

In [70]:
# Function to project onto the PSD matrix
function Project_onto_PSD(A_symm::Matrix{Float64})::Matrix{Float64}
    λ, V = eigen(Symmetric(A_symm))  # Eigenvalue decomposition
    λ[λ .< 0] .= 0.0  # Set all negative eigenvalues exactly to zero
    return V * Diagonal(λ) * V'  # Reconstruct PSD matrix
end

Project_onto_PSD (generic function with 1 method)

In [71]:
# Function to project onto the unit diagonal (in-place modification)
function project_onto_UD!(A_symm::Matrix{Float64})::Matrix{Float64}
    for i in 1:size(A_symm,1)
        A_symm[i,i] = 1.0
    end
    return A_symm
end

project_onto_UD! (generic function with 1 method)

In [75]:
# Function for POCS with Dykstra's Correction
function nearest_corr_dykstra(A_symm::Matrix{Float64}, tol::Float64=1e-8, max_iter::Int=1000000)
    Y_k = copy(A_symm)
    ΔS_k = zeros(size(A_symm))

    for k in 1:max_iter
        R_k = Y_k - ΔS_k
        X_k = Project_onto_PSD(R_k)
        ΔS_k = X_k - R_k
        Y_k = project_onto_UD!(copy(X_k))  # Ensure no in-place modification to X_k

        # Convergence check (relative error)
        if norm(Y_k - X_k, Inf) / norm(Y_k, Inf) <= tol
            println("Converged after $k iterations")
            return Y_k, k
        end
    end
    println("Reached maximum iterations ($max_iter) without convergence")
    return Y_k, max_iter
end

nearest_corr_dykstra (generic function with 3 methods)

In [76]:
# ===== Example Usage ===== #
# Pass in the symmetric matrix (not necessarily a valid correlation matrix)


A_symm = (A + A') / 2  # Make it symmetric

# Run the nearest correlation matrix function
to = TimerOutput()  # Initialize timing
println("=== POCS with Dykstra ===")

@timeit to "POCS with Dykstra" begin
    result, iterations = nearest_corr_dykstra(A_symm)
end

show(to)

# Verify the result is PSD
min_eigenval = minimum(eigen(Symmetric(result)).values)
println("")
println("Minimum Eigenvalue after projection: ", min_eigenval)
result

=== POCS with Dykstra ===
Converged after 1 iterations
[0m[1m──────────────────────────────────────────────────────────────────────────────[22m
[0m[1m                            [22m         Time                    Allocations      
                            ───────────────────────   ────────────────────────
     Tot / % measured:           197ms /  97.8%           26.3MiB /  99.9%    

Section             ncalls     time    %tot     avg     alloc    %tot      avg
──────────────────────────────────────────────────────────────────────────────
POCS with Dykstra        1    193ms  100.0%   193ms   26.3MiB  100.0%  26.3MiB
[0m[1m──────────────────────────────────────────────────────────────────────────────[22m
Minimum Eigenvalue after projection: -1.499094601010266e-14


550×550 Matrix{Float64}:
 1.0         0.180324    0.179886    …   0.309042    0.228446    0.283332
 0.180324    1.0         0.238562        0.234882    0.119067    0.137479
 0.179886    0.238562    1.0             0.191537    0.149666    0.113521
 0.171505    0.169227    0.215886        0.188025    0.00192242  0.139253
 0.109031    0.139368    0.146015        0.232247    0.17866     0.127991
 0.174613   -0.0151612   0.0291155   …   0.0699976  -0.0361176   0.141966
 0.226759   -0.0430993   0.0168183       0.0571697   0.117557    0.293217
 0.258958    0.127923    0.149801        0.116319    0.288045    0.173855
 0.138474    0.0399881   0.147147        0.234428    0.210228    0.086072
 0.178138    0.0714902   0.0277416       0.164553    0.0471774   0.317152
 0.377385    0.31267     0.253275    …   0.251523    0.196444    0.177118
 0.0706985   0.13969     0.0587523       0.0187841   0.0255106   0.032967
 0.180588    0.0412681   0.142261        0.151448    0.181324    0.0823327
 ⋮          

In [77]:
eigvals(result)

550-element Vector{ComplexF64}:
  -3.974064830718866e-15 + 0.0im
  -3.326034694031045e-15 + 0.0im
 -2.8202128197104143e-15 - 1.117833780699321e-16im
 -2.8202128197104143e-15 + 1.117833780699321e-16im
 -2.6183202381707636e-15 + 0.0im
 -2.4665477637545403e-15 - 1.303550826005758e-16im
 -2.4665477637545403e-15 + 1.303550826005758e-16im
  -2.452370325024852e-15 + 0.0im
  -2.264588873868899e-15 - 1.783599720252432e-16im
  -2.264588873868899e-15 + 1.783599720252432e-16im
 -2.1208980729590884e-15 - 3.6692652240909735e-16im
 -2.1208980729590884e-15 + 3.6692652240909735e-16im
 -2.1012025747906937e-15 - 1.5269203745016495e-16im
                         ⋮
       5.332937181563107 + 0.0im
      5.7622411445722195 + 0.0im
       6.093873520391853 + 0.0im
       6.461197532053897 + 0.0im
       6.990073749550237 + 0.0im
        8.63612673581165 + 0.0im
      10.599266675831528 + 0.0im
      14.035735321575276 + 0.0im
       16.29948868153617 + 0.0im
      23.426399745610738 + 0.0im
       43.4143670

In [23]:

# Enforce small eigenvalues to be a small positive value
function enforce_small_eigenvalues_positive!(matrix::Matrix{Float64}, threshold::Float64=1e-8, small_positive::Float64=1e-6)
    λ, V = eigen(Symmetric(matrix))
    λ[λ .< threshold] .= small_positive
    matrix .= V * Diagonal(λ) * V'
    return Symmetric(matrix)
end

enforce_small_eigenvalues_positive! (generic function with 3 methods)

In [24]:
# Function to project onto the unit diagonal (in-place modification)
function project_onto_UD!(A::Matrix{Float64})::Matrix{Float64}
    for i in 1:size(A, 1)
        A[i, i] = 1.0
    end
    return A
end

project_onto_UD! (generic function with 1 method)

In [25]:
# Function to project onto the PSD cone
function Project_onto_PSD(matrix::Matrix{Float64})::Matrix{Float64}
    λ, V = eigen(Symmetric(matrix))
    λ[λ .< 0] .= 0
    return V * Diagonal(λ) * V'
end

Project_onto_PSD (generic function with 1 method)

In [26]:
# Function for POCS with Dykstra's Correction
function nearest_corr_dykstra(A::Matrix{Float64}, tol::Float64=1e-8, max_iter::Int=10000)
    Y_k = copy(A)
    ΔS_k = zeros(size(A))

    for k in 1:max_iter
        R_k = Y_k - ΔS_k
        X_k = @timeit to "proj_PSD" Project_onto_PSD(R_k)
        ΔS_k = X_k - R_k
        Y_k = @timeit to "proj_UD" project_onto_UD!(copy(X_k))  # Ensure no in-place modification to X_k

        # Convergence check: Ensure changes are small
        if norm(Y_k - X_k, Inf) <= tol * max(norm(Y_k, Inf), eps(Float64))
            println("Converged after $k iterations")
            return Y_k, k
        end
    end
    println("Reached maximum iterations ($max_iter) without convergence")
    return Y_k, max_iter
end

nearest_corr_dykstra (generic function with 3 methods)

In [27]:
# Define Matrix
@time Returns_Matrix  # Define a symmetric matrix

to = TimerOutput()  # Initialize timing
println("=== POCS with Dykstra ===")
@timeit to "POCS with Dykstra" begin
    result, iterations = nearest_corr_dykstra(Returns_Matrix)
end

show(to) 

# Verify the result is PSD
min_eigenval = minimum(eigen(Symmetric(result)).values);


println(" ");
 println("Minimum Eigenvalue after projection: ", min_eigenval);

# Apply the function to the result matrix
result = enforce_small_eigenvalues_positive!(result);
#println("Eigenvalues after enforcing small positive values: ", eigvals(result));

  0.000002 seconds
=== POCS with Dykstra ===
Converged after 1 iterations
[0m[1m──────────────────────────────────────────────────────────────────────────────[22m
[0m[1m                            [22m         Time                    Allocations      
                            ───────────────────────   ────────────────────────
     Tot / % measured:           530ms /  99.1%           38.8MiB /  99.9%    

Section             ncalls     time    %tot     avg     alloc    %tot      avg
──────────────────────────────────────────────────────────────────────────────
POCS with Dykstra        1    525ms  100.0%   525ms   38.8MiB  100.0%  38.8MiB
  proj_PSD               1    134ms   25.4%   134ms   11.7MiB   30.3%  11.7MiB
  proj_UD                1   1.02ms    0.2%  1.02ms   2.31MiB    6.0%  2.31MiB
[0m[1m──────────────────────────────────────────────────────────────────────────────[22m 
Minimum Eigenvalue after projection: 0.017080992878056752


In [28]:
result

550×550 Symmetric{Float64, Matrix{Float64}}:
 1.0       0.341684   0.291481   …  0.398537   0.222342    0.588853
 0.341684  1.0        0.453693      0.315123   0.0304738   0.291048
 0.291481  0.453693   1.0           0.296137   0.0164447   0.227405
 0.313961  0.313672   0.280062      0.255597   0.0435602   0.312147
 0.48978   0.306628   0.286348      0.405074   0.283077    0.53075
 0.348814  0.175168   0.136149   …  0.154286  -0.0349751   0.395757
 0.555384  0.185457   0.176073      0.278671   0.140008    0.563973
 0.563609  0.389523   0.343428      0.372454   0.2045      0.55957
 0.482745  0.227748   0.211223      0.417845   0.353948    0.478927
 0.351186  0.303158   0.307349      0.365073   0.0140673   0.427558
 0.554107  0.443891   0.36101    …  0.465851   0.226888    0.481578
 0.347397  0.435498   0.315121      0.166931  -0.0538514   0.300916
 0.511121  0.357705   0.362572      0.345502   0.0797917   0.548638
 ⋮                               ⋱                         
 0.514498  0.

In [29]:
eigvals(result)

550-element Vector{Float64}:
   0.017080992878057265
   0.01871712060762831
   0.01954749645360226
   0.019978980549153515
   0.02067697190946732
   0.02109590639371449
   0.021421645017632187
   0.02269889510960254
   0.022971009107125653
   0.023621000389374716
   0.02407213901508395
   0.02450544899786704
   0.02535173213479776
   ⋮
   3.9087487060765866
   4.125967269853987
   4.510806169979932
   4.898846527791914
   5.58734725499009
   6.033147897610605
   7.712303265236992
   9.471092905317402
  12.330409298835317
  24.930980682859936
  32.72933160863599
 211.12389412400785

In [30]:
rank(result)

550

In [31]:
# Checking Frobenius norm
frobenius_norm = norm(Returns_Matrix-result)  
println(frobenius_norm)

1.1532233022175007e-12
