In [None]:
# Librairie
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
class DataManager:

    # Initialisation : lecture des données
    def __init__(self, filepath, **csv_options):
        self.df = pd.read_csv(filepath, **csv_options)
        self.feature_names = []

    # Module permettant de centrer-réduire les données, imputation par mediane, onehotencoder
    def preprocess(self):
        num_features = self.df.select_dtypes(include=['int64','float64']).columns
        cat_features = self.df.select_dtypes(include=['object']).columns

        num_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ])

        cat_transformer = Pipeline( steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('onehot', OneHotEncoder(handle_unknown='ignore')),
            ('scaler', StandardScaler(with_mean=False))
        ])

        preprocessor = ColumnTransformer(
            transformers=[
                ('num', num_transformer, num_features),
                ('cat', cat_transformer, cat_features) ])
        
        self.df = pd.DataFrame(preprocessor.fit_transform(self.df))

        self.update_feature_names(preprocessor, num_features, cat_features)

    # Module permettant de reccupérer les valeurs des variables categorielles
    def update_feature_names(self, preprocessor, num_features, cat_features):
        categ_features = preprocessor.named_transformers_['cat']['onehote'].get_feature_names_out(cat_features)
        self.feature_names = num_features.to_list() + categ_features.tolist()

    # Module permettant de reccupérer le df après le traitement
    def get_data(self):
        return self.df
    
    # Module permettant d'avoir accès au caractéristique des variables catégorielles
    def get_feature_names(self):
        return self.feature_names

In [None]:
# Exemple d'utilisation

file_path = "data.csv"
data_manager = DataManager(file_path, sep=',', header=0)
data_manager.preprocess()
data = data_manager.get_data()
feature_names = data_manager.get_feature_names()

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA


class ACP:

    def __init__(self, n_components=4):
        self.n_components = n_components
        self.pca = PCA(n_components = n_components)
        self.components_ = None
        self.explained_variance_ = None

    def fit(self, data):
        self.pca.fit(data)
        self.components_ = self.pca.components_
        self.explained_variance_ = self.pca.explained_variance_

    def get_eigenvalues(self):
        return self.explained_variance_
    
    def get_contributions(self):
        contributions= np.square(self.components_)*100
        df_contributions = pd.DataFrame(contributions.T, columns=[f"PC{i+1}" for i in range(self.n_components)])
        return df_contributions
    
    def get_circle_of_correlations(self):
        circle_values = self.components_.T
        df_circle = pd.DataFrame(circle_values, columns=[f"PC{i+1}" for i in range(self.n_components)] )
        return df_circle
    
    def select_variables_for_circle(self, variables, df_circle):
        return df_circle[df_circle.index.isin(variables)]


In [None]:
# Exe