In [4]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Sample data
data = {'Color': ['Red', 'Green', 'Blue', 'Green', 'Red']}
df = pd.DataFrame(data)

# One-Hot Encoding
encoder = OneHotEncoder(sparse_output=False)
one_hot_encoded = encoder.fit_transform(df[['Color']])

# Create a DataFrame with one-hot encoded columns
one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(['Color']))
print(one_hot_df)

   Color_Blue  Color_Green  Color_Red
0         0.0          0.0        1.0
1         0.0          1.0        0.0
2         1.0          0.0        0.0
3         0.0          1.0        0.0
4         0.0          0.0        1.0


In [5]:
from sklearn.preprocessing import LabelEncoder

# Sample data
data = {'Color': ['Red', 'Green', 'Blue', 'Green', 'Red']}
df = pd.DataFrame(data)

# Label Encoding
label_encoder = LabelEncoder()
df['Color_Label'] = label_encoder.fit_transform(df['Color'])

print(df)

   Color  Color_Label
0    Red            2
1  Green            1
2   Blue            0
3  Green            1
4    Red            2


In [6]:
from sklearn.preprocessing import OrdinalEncoder

# Sample data
data = {'Size': ['Small', 'Medium', 'Large', 'Medium', 'Small']}
df = pd.DataFrame(data)

# Ordinal Encoding
ordinal_encoder = OrdinalEncoder()
df['Size_Ordinal'] = ordinal_encoder.fit_transform(df[['Size']])

print(df)

     Size  Size_Ordinal
0   Small           2.0
1  Medium           1.0
2   Large           0.0
3  Medium           1.0
4   Small           2.0


In [7]:
# Sample data
data = {'Color': ['Red', 'Green', 'Blue', 'Green', 'Red']}
df = pd.DataFrame(data)

# Frequency Encoding
frequency_encoding = df['Color'].value_counts()
df['Color_Frequency'] = df['Color'].map(frequency_encoding)

print(df)

   Color  Color_Frequency
0    Red                2
1  Green                2
2   Blue                1
3  Green                2
4    Red                2


In [8]:
# Sample data with target variable
data = {'Color': ['Red', 'Green', 'Blue', 'Green', 'Red'], 'Target': [1, 0, 0, 1, 1]}
df = pd.DataFrame(data)

# Target Encoding
target_means = df.groupby('Color')['Target'].mean()
df['Color_Target'] = df['Color'].map(target_means)

print(df)

   Color  Target  Color_Target
0    Red       1           1.0
1  Green       0           0.5
2   Blue       0           0.0
3  Green       1           0.5
4    Red       1           1.0


In [11]:
from category_encoders import BinaryEncoder

# Sample data
data = {'Color': ['Red', 'Green', 'Blue', 'Yellow']}
df = pd.DataFrame(data)

# Binary Encoding
binary_encoder = BinaryEncoder()
df_binary_encoded = binary_encoder.fit_transform(df['Color'])

print(df_binary_encoded)

   Color_0  Color_1  Color_2
0        0        0        1
1        0        1        0
2        0        1        1
3        1        0        0


In [13]:
import pandas as pd
from sklearn.feature_extraction import FeatureHasher

# Sample data
data = {'Color': ['Red', 'Green', 'Blue', 'Yellow']}
df = pd.DataFrame(data)

# Hashing Encoding
# Convert each category into a list
df['Color'] = df['Color'].apply(lambda x: [x])

# Initialize the hasher
hasher = FeatureHasher(input_type='string', n_features=2)
hashed_features = hasher.transform(df['Color'])

# Convert hashed features to DataFrame for better visualization
hashed_df = pd.DataFrame(hashed_features.toarray(), columns=['Bucket 1', 'Bucket 2'])

print(hashed_df)

   Bucket 1  Bucket 2
0       0.0       1.0
1       0.0       1.0
2      -1.0       0.0
3      -1.0       0.0
