In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from math import sqrt

class ACPClass:
    def __init__(self):
        self.pca = PCA()
        self.n_components = 0
        self.principalDf = None
        self.explained_variance_ratio_ = None

    def fit(self, datas):
        self.datas = datas
        try:
            self.X = StandardScaler(with_mean=True, with_std=True).fit_transform(self.datas.drop(columns=self.datas.columns[-1]))
            self.Y = self.datas[self.datas.columns[-1]]
        except (KeyError, TypeError, AttributeError) as e:
            raise type(e)(f"Erreur lors du traitement des données : {e}")

        principalComponent = self.pca.fit_transform(self.X)
        self.explained_variance_ratio_ = self.pca.explained_variance_ratio_
        e = 0.9
        s = 0
        sm = sum(self.explained_variance_ratio_)

        for i, k in enumerate(self.explained_variance_ratio_):
            s += k
            r = s / sm
            if r >= e:
                self.n_components = i + 1
                break

        if self.n_components == 0:
            try:
                self.n_components = int(1 + 2 * sqrt(float(len(self.X) - 1) / (len(self.Y) - 1)))
            except ZeroDivisionError:
                self.n_components = len(self.X[0])
            
        col = [f"principal component {i}" for i in range(1, self.n_components + 1)]
        principalDf = pd.DataFrame(data=principalComponent[:, :self.n_components], columns=col)
        self.principalDf = pd.concat([principalDf, self.Y], axis=1)
        return self.principalDf