In [15]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder

In [10]:
data = pd.DataFrame({
    'Color': ['Red', 'Blue', 'Green', 'Green', 'Red','Blue']})
print("Original DataFrame:", data)

Original DataFrame:    Color
0    Red
1   Blue
2  Green
3  Green
4    Red
5   Blue


In [4]:
encoder = OneHotEncoder(sparse_output=False)
encoded_colors = encoder.fit_transform(data[['Color']])
encoded_data = pd.DataFrame(encoded_colors, columns=encoder.get_feature_names_out(['Color']))

In [5]:
encoded_data

Unnamed: 0,Color_Blue,Color_Green,Color_Red
0,0.0,0.0,1.0
1,1.0,0.0,0.0
2,0.0,1.0,0.0
3,0.0,1.0,0.0
4,0.0,0.0,1.0
5,1.0,0.0,0.0


In [6]:
encoder.transform([['Red'], ['Green']]) # for new data



array([[0., 0., 1.],
       [0., 1., 0.]])

In [7]:
pd.concat([data, encoded_data], axis=1)

Unnamed: 0,Color,Color_Blue,Color_Green,Color_Red
0,Red,0.0,0.0,1.0
1,Blue,1.0,0.0,0.0
2,Green,0.0,1.0,0.0
3,Green,0.0,1.0,0.0
4,Red,0.0,0.0,1.0
5,Blue,1.0,0.0,0.0


In [9]:
data1 = pd.DataFrame({
    'Color': ['Red', 'Blue', 'Green', 'Green', 'Red','Blue']})
print("Original DataFrame:", data1)

Original DataFrame:    Color
0    Red
1   Blue
2  Green
3  Green
4    Red
5   Blue


In [11]:
lbl_encoder = LabelEncoder()
lbl_encoder.fit_transform(data1['Color'])

array([2, 0, 1, 1, 2, 0])

In [12]:
lbl_encoder.transform(['Red', 'Green']) # for new data

array([2, 1])

In [13]:
### Ordinal

In [16]:
data2 = pd.DataFrame({
    'size': ['Small', 'Medium','Large','Medium','Small','Large']
})

data2

Unnamed: 0,size
0,Small
1,Medium
2,Large
3,Medium
4,Small
5,Large


In [18]:
encoder = OrdinalEncoder(categories=[['Small', 'Medium', 'Large']])
encoder.fit_transform(data2[['size']])

array([[0.],
       [1.],
       [2.],
       [1.],
       [0.],
       [2.]])

In [19]:
encoder.transform([['Small'], ['Large']]) # for new data



array([[0.],
       [2.]])

In [22]:
data2 = pd.DataFrame ({
    'city': ['NewYork', 'London', 'Paris', 'Tokyo', 'New York', 'Paris'],
    'price': [200, 150, 300, 250, 180, 320]
})

In [23]:
data2

Unnamed: 0,city,price
0,NewYork,200
1,London,150
2,Paris,300
3,Tokyo,250
4,New York,180
5,Paris,320


In [26]:
mean_price = data2.groupby('city')['price'].mean().to_dict()

In [28]:
data2['city_encoded'] = data2['city'].map(mean_price)

In [29]:
data2

Unnamed: 0,city,price,city_encoded
0,NewYork,200,200.0
1,London,150,150.0
2,Paris,300,310.0
3,Tokyo,250,250.0
4,New York,180,180.0
5,Paris,320,310.0


In [30]:
data2[['price','city_encoded']]

Unnamed: 0,price,city_encoded
0,200,200.0
1,150,150.0
2,300,310.0
3,250,250.0
4,180,180.0
5,320,310.0
