In [13]:
import pandas as pd
import random
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import category_encoders as ce

# Création de la dataframe initiale
data = pd.DataFrame({
    'Animaux': [random.choice(['Chien', 'Chat', 'Lion']) for _ in range(10)],
})

print("DataFrame d'origine :")
display(data)

# Transformation 1 : Label Encoding
le = LabelEncoder()
data_label_encoded = data.copy()
data_label_encoded['Animaux'] = le.fit_transform(data['Animaux'])

# Transformation 2 : One-Hot Encoding
data_one_hot_encoded = pd.get_dummies(data, columns=['Animaux'])

# Transformation 3 : Count Encoding
encoder = ce.CountEncoder(cols=['Animaux'])
data_count_encoded = encoder.fit_transform(data)

# Transformation 4 : Target Encoding (Supposons une variable cible 'Target')
data_target_encoded = data.copy()
target_variable = [random.choice([0, 1]) for _ in range(10)]
encoder = ce.TargetEncoder(cols=['Animaux'])
data_target_encoded['Animaux'] = encoder.fit_transform(data['Animaux'], target_variable)

# Transformation 5 : Feature Hashing (Hachage de caractéristiques)
from sklearn.feature_extraction import FeatureHasher
hasher = FeatureHasher(n_features=5, input_type='string')
hashed_features = hasher.transform(data['Animaux']).toarray()
data_feature_hashed = pd.DataFrame(hashed_features, columns=['Feature_0', 'Feature_1', 'Feature_2', 'Feature_3', 'Feature_4'])

# Affichage des dataframes transformées
print("\nDataFrame après Label Encoding :")
display(data_label_encoded)

print("\nDataFrame après One-Hot Encoding :")
display(data_one_hot_encoded)

print("\nDataFrame après Count Encoding :")
display(data_count_encoded)

print("\nDataFrame après Target Encoding :")
display(data_target_encoded)

print("\nDataFrame après Feature Hashing :")
display(data_feature_hashed)


DataFrame d'origine :


Unnamed: 0,Animaux
0,Lion
1,Chat
2,Lion
3,Chat
4,Chat
5,Chat
6,Chien
7,Lion
8,Chien
9,Chat



DataFrame après Label Encoding :


Unnamed: 0,Animaux
0,2
1,0
2,2
3,0
4,0
5,0
6,1
7,2
8,1
9,0



DataFrame après One-Hot Encoding :


Unnamed: 0,Animaux_Chat,Animaux_Chien,Animaux_Lion
0,0,0,1
1,1,0,0
2,0,0,1
3,1,0,0
4,1,0,0
5,1,0,0
6,0,1,0
7,0,0,1
8,0,1,0
9,1,0,0



DataFrame après Count Encoding :


Unnamed: 0,Animaux
0,3
1,5
2,3
3,5
4,5
5,5
6,2
7,3
8,2
9,5



DataFrame après Target Encoding :


Unnamed: 0,Animaux
0,0.389702
1,0.436485
2,0.389702
3,0.436485
4,0.436485
5,0.436485
6,0.34326
7,0.389702
8,0.34326
9,0.436485



DataFrame après Feature Hashing :


Unnamed: 0,Feature_0,Feature_1,Feature_2,Feature_3,Feature_4
0,0.0,-2.0,0.0,1.0,-1.0
1,0.0,-1.0,0.0,0.0,-1.0
2,0.0,-2.0,0.0,1.0,-1.0
3,0.0,-1.0,0.0,0.0,-1.0
4,0.0,-1.0,0.0,0.0,-1.0
5,0.0,-1.0,0.0,0.0,-1.0
6,-1.0,-2.0,0.0,0.0,0.0
7,0.0,-2.0,0.0,1.0,-1.0
8,-1.0,-2.0,0.0,0.0,0.0
9,0.0,-1.0,0.0,0.0,-1.0
