In [1]:
using CSV, DataFrames, Statistics, LinearAlgebra, MultivariateStats

function factominer_cov(df::DataFrame; scale::Bool = true, dropmissing_flag::Bool = true)
    # Garde uniquement les colonnes numériques
    numcols = [name for name in names(df) if eltype(skipmissing(df[!, name])) <: Number]
    num_df = df[:, numcols]

    # Supprime les lignes avec missing si demandé
    if dropmissing_flag
        num_df = dropmissing(num_df)
    end

    # Convertir en matrice
    X = Matrix(num_df)

    # Matrice de covariance ou corrélation
    cov_mat = scale ? cor(X) : cov(X)
    println("Matrice de covariance/corrélation :")
    println(round.(cov_mat, digits=4))
 # Centrage et standardisation
    Xc = X .- mean(X, dims=1)
    if scale
        sds = std(X, dims=1, corrected=true)
        sds[sds .== 0.0] .= 1.0
        Xc .= Xc ./ sds
    end

    # PCA
    pca_model = fit(PCA, Xc; maxoutdim=size(Xc, 2))

    # Scores
    scores = MultivariateStats.transform(pca_model, Xc)

    # Valeurs propres
    eigvals = principalvars(pca_model)

    # Loadings (vecteurs propres)
    loadings = pca_model.proj

    println("\nValeurs propres :\n", round.(eigvals, digits=4))
    println("\nLoadings (vecteurs propres) :\n", round.(loadings, digits=4))
    println("\nScores (coordonnées des individus) :\n", round.(scores, digits=4))

    return (cov_mat=cov_mat, model=pca_model, scores=scores, loadings=loadings)
end

# Exemple d'utilisation
df = CSV.read("C:/Users/Lenovo/Downloads/student_exam_scores.csv", DataFrame)
result = factominer_cov(df, scale=true, dropmissing_flag=true)


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling DataFrames [a93c6f00-e57d-5684-b7b6-d8193f3e46c0] 
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling MultivariateStats [6f286f6a-111f-5878-ab1e-185364afe411] (cache misses: wrong dep version loaded (2))


Matrice de covariance/corrélation :
[1.0 0.0779 -0.0313 0.0691 0.7768; 0.0779 1.0 0.0006 -0.1943 0.1882; -0.0313 0.0006 1.0 0.052 0.2257; 0.0691 -0.1943 0.052 1.0 0.4311; 0.7768 0.1882 0.2257 0.4311 1.0]

Valeurs propres :
[61.2838, 57.465, 40.4224, 8.0743]

Loadings (vecteurs propres) :
0.0155 -0.0807; 0.115 -0.0019 -0.1123 0.0011; 0.0133 0.0318 -0.1159 0.0687; -0.0217 0.1114 0.0794 0.0757; -0.0263 0.0079 0.0721 0.0289; -0.097 -0.0515 -0.0156 -0.0338; -0.1164 -0.0866 -0.0634 -0.0864; 0.0086 0.0956 0.0274 -0.1696; -0.0508 0.0603 -0.0204 -0.0038; 0.1001 0.0375 -0.1472 -0.0415; -0.0307 0.1289 0.0011 -0.0489; 0.0121 0.0488 -0.0429 -0.0321; 0.0185 0.1004 -0.0291 0.081; -0.0121 0.0092 -0.0658 0.0234; -0.0216 -0.0702 -0.1365 0.0545; 0.1009 -0.1435 -0.0408 0.0534; 0.0626 -0.0282 -0.0102 0.0391; 0.038 -0.0451 0.0152 0.0327; -0.0299 0.0089 0.168 -0.0586; 0.0073 0.0431 -0.1061 -0.1743; -0.0163 -0.0784 -0.0625 0.1077; 0.0262 -0.0319 0.0736 -0.0066; 0.0196 -0.0304 -0.0447 -0.0145; 0.0236 0.0796 -0

(cov_mat = [1.0 0.07786430142964218 … 0.06907820106185657 0.7767514349789607; 0.07786430142964218 1.0 … -0.19425104117971997 0.1882219847044782; … ; 0.06907820106185657 -0.19425104117971997 … 1.0 0.4311047112479793; 0.7767514349789607 0.1882219847044782 … 0.4311047112479793 1.0], model = PCA(indim = 200, outdim = 4, principalratio = 1.0), scores = [-3.3437944119983896 11.64641705271435 … -8.185819014222602 -4.014458007164156; -8.394254934554814 -3.343742644088582 … 5.422452885478385 -3.873221789430506; 4.671178219203071 -5.658648377457481 … -7.649524156533638 1.865393172941796; -3.1789154692470194 -0.002348712358230552 … -1.0907220602961825 4.572921009637001], loadings = [0.11452458521880182 -0.07211461769862519 0.015515822276268087 -0.08069491788038956; 0.1149999256879469 -0.0018521973931939482 -0.11232845787649047 0.001105185288641439; … ; -0.030259985660515965 -0.17969855224111086 0.003690710896143732 0.029557153879470166; -0.013243211135712751 0.02175772263335847 0.1114520735402369