In [1]:
import pandas as pd
import numpy as np
import os

from sklearn.base import BaseEstimator, TransformerMixin


class CustomMinMax(BaseEstimator, TransformerMixin):
   # Constructeur: initialisation des attributs
   def __init__(self):
       # nan = not a number
       self.min_val = np.nan
       self.max_val = np.nan

   # Fit: calcul min/max
   def fit(self, X, y=None):
       self.min_val = X.min()
       self.max_val = X.max()
       return self

   # Transform: application de la normalisation
   def transform(self, X, y=None):
       X_scaled = (X - self.min_val) / (self.max_val - self.min_val)
       return X_scaled


In [2]:
titanic_df = pd.read_csv("titanic.csv")

In [3]:
train = titanic_df[:214]
test = titanic_df[214:]

In [4]:
train.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [5]:
test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
214,1106,3,"Andersson, Miss. Ida Augusta Margareta",female,38.0,4,2,347091,7.775,,S
215,1107,1,"Head, Mr. Christopher",male,42.0,0,0,113038,42.5,B11,S
216,1108,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,,Q
217,1109,1,"Wick, Mr. George Dennick",male,57.0,1,1,36928,164.8667,,S
218,1110,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50.0,1,1,113503,211.5,C80,C


In [6]:
from sklearn.pipeline import Pipeline

pipeline = Pipeline([('scaler', CustomMinMax())])
pipeline.fit(train[['Age', 'Fare']])
norm = pipeline.transform(test[['Age', 'Fare']])



In [7]:
from sklearn.preprocessing import MinMaxScaler, KBinsDiscretizer
from sklearn.impute import SimpleImputer

# Pipeline numérique corrigé
numeric_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')), # Remplace les valeurs manquantes par la moyenne
    ('minmaxscaler', MinMaxScaler()),             # Met toutes les valeurs numériques à la même échelle entre 0 et 1.
    ('discretizer', KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile'))
# n_bins=5	Nombre de groupes (bins)
# encode='ordinal'	Renvoie un entier par bin (ex: 0, 1, 2, 3, 4)
# strategy='quantile'	Coupe les données pour que chaque bin contienne à peu près autant de valeurs
])

# Apprentissage sur les colonnes numériques
numeric_pipeline.fit(train[['Age', 'Fare']])

# Transformation des données test
new_df = numeric_pipeline.transform(test[['Age', 'Fare']])

In [8]:
print(numeric_pipeline.steps[1][1].data_min_,  numeric_pipeline.steps[1][1].data_max_)






[0.33   3.1708] [ 76. 263.]
