In [1]:
# Feature transformation is a technique used in data preprocessing, particularly in the fields of machine learning and statistics, 
# to modify or create new features from existing data.
# This process can help improve the performance of machine learning models by making the data more suitable for analysis and 
# by enhancing the ability of algorithms to detect underlying patterns.


In [None]:
# Label Encoding:
# Label encoding assigns each unique category value a numerical code. It is straightforward but introduces a new 
# problem: the model might infer a natural ordering in categories, which might not be intended. 
# For example: ["red"< "blue" < "green"] to [O, 1, 2]
# • Advantages:
#     Simple to implement and keeps the dataset's dimensionality unchanged.
#     Useful for ordinal data or tree-based models that can handle ordinality.
# • Disadvantages:
#     Imposes an ordinal relationship where it might not exist, potentially leading to poor model performance for non-ordinal data.
#     Not suitable for linear models unless the data is ordinal.

In [2]:
classes = ['ClassA','ClassB','ClassC','ClassD']
instances= ['ClassA','ClassB','ClassC','ClassD','ClassA','ClassB','ClassC','ClassD','ClassA','ClassB']

In [3]:
label_to_int = {label: index for index, label in enumerate(classes)}
encoded_labels = [label_to_int[label] for label in instances]
print("Label Encoding: ",encoded_labels)

Label Encoding:  [0, 1, 2, 3, 0, 1, 2, 3, 0, 1]


In [4]:
int_to_label = {index: label for label, index in label_to_int.items()}
decoded_labels = [int_to_label[index] for index in encoded_labels]
print("Label Encoding: ",encoded_labels)
print("Label Decoding: ",decoded_labels)

Label Encoding:  [0, 1, 2, 3, 0, 1, 2, 3, 0, 1]
Label Decoding:  ['ClassA', 'ClassB', 'ClassC', 'ClassD', 'ClassA', 'ClassB', 'ClassC', 'ClassD', 'ClassA', 'ClassB']


In [5]:
from sklearn.preprocessing import LabelEncoder

In [8]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(instances)
print("Label Encoding: ",encoded_labels)

Label Encoding:  [0 1 2 3 0 1 2 3 0 1]


In [9]:
original_labels = label_encoder.inverse_transform(encoded_labels)
print("Label Encoding: ",encoded_labels)
print("Label Decoding: ",original_labels)


Label Encoding:  [0 1 2 3 0 1 2 3 0 1]
Label Decoding:  ['ClassA' 'ClassB' 'ClassC' 'ClassD' 'ClassA' 'ClassB' 'ClassC' 'ClassD'
 'ClassA' 'ClassB']
