In [None]:
import pandas as pd
import numpy as np
from describe import custom_mean, custom_std

In [10]:
df = pd.read_csv('./datasets/dataset_train.csv')
print(df.shape)

(1600, 19)


In [11]:
class CustomLabelEncoder:
    def __init__(self):
        self.mapping = {}
        self.inverse_mapping = {}
    
    def fit(self, data):
        unique_values = np.unique(data)
        for i, value in enumerate(unique_values):
            self.mapping[value] = i
            self.inverse_mapping[i] = value
    
    def transform(self, data):
        return np.array([self.mapping[value] for value in data])
    
    def inverse_transform(self, data):
        return np.array([self.inverse_mapping[value] for value in data])
    
    def fit_transform(self, data):
        self.fit(data)
        return self.transform(data)
    
    def save(self, filename):
        df = pd.DataFrame(self.mapping.items(), columns=['value', 'label'])
        df.to_csv(filename, index=False)

    def load(self, filename):
        df = pd.read_csv(filename)
        self.mapping = dict(zip(df['value'], df['label']))
        self.inverse_mapping = dict(zip(df['label'], df['value']))

In [14]:
# CustomLabelEncoder usage example
dict_data = {'A': [1, 2, 3, 4, 5], 'B': ['a', 'b', 'b', 'a', 'c']}
df = pd.DataFrame(dict_data)
print(df)
encoder = CustomLabelEncoder()
df['B'] = encoder.fit_transform(df['B'])
print(encoder.mapping)
print(encoder.inverse_mapping)
print(df)


   A  B
0  1  a
1  2  b
2  3  b
3  4  a
4  5  c
{'a': 0, 'b': 1, 'c': 2}
{0: 'a', 1: 'b', 2: 'c'}
   A  B
0  1  0
1  2  1
2  3  1
3  4  0
4  5  2


In [66]:
class CustomStandardScaler:
    def __init__(self):
        self.data_dict = {'column': [], 'mean': [], 'std': []}

    def fit(self, df, except_columns=[]):
        """Calcule la moyenne et l'écart-type pour chaque colonne de X"""
        numerial_columns = df.select_dtypes(include=['Int64', 'float64']).columns
        numerial_columns = [col for col in numerial_columns if col not in except_columns]
        for col in numerial_columns:
            self.data_dict['column'].append(col)
            self.data_dict['mean'].append(custom_mean(df[col]))
            self.data_dict['std'].append(custom_std(df[col]))
        return self

    def transform(self, df):
        """Applique la transformation standardisée"""
        for i in range(len(self.data_dict['column'])):
            col = self.data_dict['column'][i]
            mean = self.data_dict['mean'][i]
            std = self.data_dict['std'][i]
            df[col] = (df[col] - mean) / std
        return df

    def fit_transform(self, df, except_columns=[]):
        """Combine fit() et transform()"""
        return self.fit(df, except_columns).transform(df)

    def save(self, filename):
        """Sauvegarde les paramètres dans un fichier texte"""
        df = pd.DataFrame(self.data_dict)
        df.to_csv(filename, index=False)

    def load(self, filename):
        """Charge les paramètres depuis un fichier texte"""
        df = pd.read_csv(filename)
        self.data_dict = df.to_dict(orient='list')


In [67]:
scaler = CustomStandardScaler()
new_scaler = CustomStandardScaler()

In [68]:
scaler.fit_transform(df, except_columns=['Index'])

Unnamed: 0,Index,Hogwarts House,First Name,Last Name,Birthday,Best Hand,Arithmancy,Astronomy,Herbology,Defense Against the Dark Arts,Divination,Muggle Studies,Ancient Runes,History of Magic,Transfiguration,Potions,Care of Magical Creatures,Charms,Flying
0,0,Ravenclaw,Tamara,Hsu,2000-03-30,Left,0.524552,-1.014194,0.878628,1.010346,0.377371,1.021139,0.345639,0.512444,0.219633,-0.686183,0.791972,1.204553,-0.500330
1,1,Slytherin,Erich,Paredes,1999-10-14,Right,1.055434,-1.137535,-1.365690,1.133455,-2.109573,-0.540256,-1.204191,0.258503,0.653769,0.412462,0.149365,-1.002983,-1.386928
2,2,Ravenclaw,Stephany,Braun,1999-11-03,Left,-1.554729,-0.780078,1.261379,0.776671,0.718622,1.828915,1.005195,0.133871,1.314249,0.882556,-0.475471,1.825184,0.086673
3,3,Gryffindor,Vesta,Mcmichael,2000-08-19,Left,-1.017252,1.264555,-1.463352,-1.264114,0.209874,-0.642366,0.265645,-1.756242,-2.486237,-1.629193,0.040544,-1.533799,1.830165
4,4,Gryffindor,Gaston,Gibbs,1998-09-27,Left,0.630908,0.762982,-1.716894,,-0.220901,-0.451681,0.974516,-1.447763,-2.099988,-0.520770,-0.216832,-1.481492,1.393217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,1595,Gryffindor,Jung,Blank,2001-09-14,Right,-0.037505,0.604428,-1.088736,-0.605230,0.613214,-0.560600,1.152299,-1.851612,-1.492961,-0.813661,-0.614084,-0.799168,1.678473
1596,1596,Slytherin,Shelli,Lock,1998-03-12,Left,0.819040,0.629896,0.942595,-0.630650,-0.336175,-0.860873,-0.469684,-0.163809,0.590376,-0.039745,-0.288777,-0.347553,0.233961
1597,1597,Gryffindor,Benjamin,Christensen,1999-10-24,Right,0.855551,0.969101,-0.832290,-0.969217,0.700573,-0.330137,1.312164,-2.021646,-1.727593,-1.347129,0.128669,-0.940656,1.807939
1598,1598,Hufflepuff,Charlotte,Dillon,2001-09-21,Left,1.983142,0.795465,0.440987,-0.795907,0.862534,-1.248397,-1.056620,0.192060,1.311096,-0.650053,-0.492505,-0.320631,-1.011640


In [69]:
print(scaler.data_dict)

{'column': ['Arithmancy', 'Astronomy', 'Herbology', 'Defense Against the Dark Arts', 'Divination', 'Muggle Studies', 'Ancient Runes', 'History of Magic', 'Transfiguration', 'Potions', 'Care of Magical Creatures', 'Charms', 'Flying'], 'mean': [49634.57024265645, 39.797130890164766, 1.1410195296768058, -0.38786349744178583, 3.1539096732863516, -224.5899148634645, 495.747970059158, 2.9630946151165927, 1030.0969463871315, 5.9503729927800775, -0.0534271367004963, -243.3744090125, 21.958012499999985], 'std': [np.float64(16679.806035559308), np.float64(520.298267605171), np.float64(5.219681993531829), np.float64(5.212793707585852), np.float64(4.155300897977581), np.float64(486.3448396520667), np.float64(106.28516457845276), np.float64(4.425774656123574), np.float64(44.12511586678027), np.float64(3.14785425039337), np.float64(0.971456967725625), np.float64(8.783639876017116), np.float64(97.63160206806155)]}


In [70]:
scaler.save('scaler.csv')
new_scaler.load('scaler.csv')
new_scaler.transform(df)

Unnamed: 0,Index,Hogwarts House,First Name,Last Name,Birthday,Best Hand,Arithmancy,Astronomy,Herbology,Defense Against the Dark Arts,Divination,Muggle Studies,Ancient Runes,History of Magic,Transfiguration,Potions,Care of Magical Creatures,Charms,Flying
0,0,Ravenclaw,Tamara,Hsu,2000-03-30,Left,-2.975697,-0.078438,-0.050270,0.268226,-0.668192,0.463891,-4.661068,-0.553722,-23.339934,-2.108279,0.870238,27.844830,-0.230031
1,1,Slytherin,Erich,Paredes,1999-10-14,Right,-2.975665,-0.078675,-0.480242,0.291843,-1.266691,0.460681,-4.675649,-0.611100,-23.330096,-1.759266,0.208750,27.593507,-0.239113
2,2,Ravenclaw,Stephany,Braun,1999-11-03,Left,-2.975821,-0.077988,0.023059,0.223399,-0.586068,0.465552,-4.654862,-0.639261,-23.315127,-1.609927,-0.434444,27.915488,-0.224019
3,3,Gryffindor,Vesta,Mcmichael,2000-08-19,Left,-2.975789,-0.074059,-0.498952,-0.168096,-0.708501,0.460471,-4.661820,-1.066330,-23.401257,-2.407852,0.096732,27.533074,-0.206161
4,4,Gryffindor,Gaston,Gibbs,1998-09-27,Left,-2.975690,-0.075023,-0.547526,,-0.812170,0.460863,-4.655151,-0.996629,-23.392504,-2.055731,-0.168206,27.539029,-0.210637
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,1595,Gryffindor,Jung,Blank,2001-09-14,Right,-2.975731,-0.075327,-0.427182,-0.041699,-0.611435,0.460639,-4.653478,-1.087879,-23.378747,-2.148776,-0.577130,27.616711,-0.207715
1596,1596,Slytherin,Shelli,Lock,1998-03-12,Left,-2.975679,-0.075278,-0.038015,-0.046575,-0.839911,0.460021,-4.668739,-0.706521,-23.331532,-1.902921,-0.242265,27.668126,-0.222510
1597,1597,Gryffindor,Benjamin,Christensen,1999-10-24,Right,-2.975677,-0.074626,-0.378052,-0.111524,-0.590411,0.461113,-4.651974,-1.126298,-23.384064,-2.318246,0.187446,27.600603,-0.206389
1598,1598,Hufflepuff,Charlotte,Dillon,2001-09-21,Left,-2.975609,-0.074960,-0.134114,-0.078277,-0.551434,0.459225,-4.674261,-0.626113,-23.315199,-2.096802,-0.451979,27.671191,-0.235269


In [63]:
print(new_scaler.data_dict)

{'column': ['Arithmancy', 'Astronomy', 'Herbology', 'Defense Against the Dark Arts', 'Divination', 'Muggle Studies', 'Ancient Runes', 'History of Magic', 'Transfiguration', 'Potions', 'Care of Magical Creatures', 'Charms', 'Flying'], 'mean': [-2.122831852121912e-16, -5.4519880673556786e-17, -2.23249058748811e-16, -1.1590473641402208e-16, 7.76160401520457e-16, 6.771739718490421e-16, -7.543841305983332e-16, 1.9116942382916151e-16, -2.441846391612364e-14, 3.725540753461895e-15, 4.412424841458955e-18, -7.711279531585901e-16, 1.5695778010638151e-16], 'std': [0.9999999999999992, 1.0000000000000009, 0.9999999999999988, 1.0000000000000009, 1.0000000000000004, 1.0000000000000002, 0.9999999999999998, 0.9999999999999996, 0.9999999999999986, 0.9999999999999998, 0.9999999999999996, 1.0000000000000002, 0.9999999999999998]}


In [25]:
scaler.save('scaler.csv')

In [26]:
new_scaler = CustomStandardScaler()

In [27]:
new_scaler.load('scaler.csv')
print(new_scaler.data_dict)

{'column': ['Arithmancy', 'Astronomy', 'Herbology', 'Defense Against the Dark Arts', 'Divination', 'Muggle Studies', 'Ancient Runes', 'History of Magic', 'Transfiguration', 'Potions', 'Care of Magical Creatures', 'Charms', 'Flying'], 'mean': [49634.57024265645, 39.797130890164766, 1.1410195296768058, -0.3878634974417858, 3.153909673286352, -224.5899148634645, 495.747970059158, 2.9630946151165927, 1030.0969463871315, 5.9503729927800775, -0.0534271367004963, -243.3744090125, 21.958012499999985], 'std': [16679.806035559308, 520.298267605171, 5.219681993531829, 5.212793707585852, 4.155300897977581, 486.3448396520667, 106.28516457845276, 4.425774656123574, 44.12511586678027, 3.14785425039337, 0.971456967725625, 8.783639876017116, 97.63160206806155]}


In [28]:
new_scaler.transform(df)

Unnamed: 0,Index,Hogwarts House,First Name,Last Name,Birthday,Best Hand,Arithmancy,Astronomy,Herbology,Defense Against the Dark Arts,Divination,Muggle Studies,Ancient Runes,History of Magic,Transfiguration,Potions,Care of Magical Creatures,Charms,Flying
0,0,Ravenclaw,Tamara,Hsu,2000-03-30,Left,-2.975697,-0.078438,-0.050270,0.268226,-0.668192,0.463891,-4.661068,-0.553722,-23.339934,-2.108279,0.870238,27.844830,-0.230031
1,1,Slytherin,Erich,Paredes,1999-10-14,Right,-2.975665,-0.078675,-0.480242,0.291843,-1.266691,0.460681,-4.675649,-0.611100,-23.330096,-1.759266,0.208750,27.593507,-0.239113
2,2,Ravenclaw,Stephany,Braun,1999-11-03,Left,-2.975821,-0.077988,0.023059,0.223399,-0.586068,0.465552,-4.654862,-0.639261,-23.315127,-1.609927,-0.434444,27.915488,-0.224019
3,3,Gryffindor,Vesta,Mcmichael,2000-08-19,Left,-2.975789,-0.074059,-0.498952,-0.168096,-0.708501,0.460471,-4.661820,-1.066330,-23.401257,-2.407852,0.096732,27.533074,-0.206161
4,4,Gryffindor,Gaston,Gibbs,1998-09-27,Left,-2.975690,-0.075023,-0.547526,,-0.812170,0.460863,-4.655151,-0.996629,-23.392504,-2.055731,-0.168206,27.539029,-0.210637
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,1595,Gryffindor,Jung,Blank,2001-09-14,Right,-2.975731,-0.075327,-0.427182,-0.041699,-0.611435,0.460639,-4.653478,-1.087879,-23.378747,-2.148776,-0.577130,27.616711,-0.207715
1596,1596,Slytherin,Shelli,Lock,1998-03-12,Left,-2.975679,-0.075278,-0.038015,-0.046575,-0.839911,0.460021,-4.668739,-0.706521,-23.331532,-1.902921,-0.242265,27.668126,-0.222510
1597,1597,Gryffindor,Benjamin,Christensen,1999-10-24,Right,-2.975677,-0.074626,-0.378052,-0.111524,-0.590411,0.461113,-4.651974,-1.126298,-23.384064,-2.318246,0.187446,27.600603,-0.206389
1598,1598,Hufflepuff,Charlotte,Dillon,2001-09-21,Left,-2.975609,-0.074960,-0.134114,-0.078277,-0.551434,0.459225,-4.674261,-0.626113,-23.315199,-2.096802,-0.451979,27.671191,-0.235269
