## Data Encoding
1. Nominal/OHE(One Hot Encoding) Encoding
2. Label and Ordinal Encoding
3. Target Guided Ordinal Encoding

### Nominal or One Hot Encoding

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [None]:
## Create a simple data frame
df = pd.DataFrame({
    'color': ['red', 'green', 'blue', 'red', 'blue', 'blue', 'green', 'red']
})

In [None]:
df.head()

Unnamed: 0,color
0,red
1,green
2,blue
3,red
4,blue


In [None]:
## Create an instance of One Hot Encoding
encoder = OneHotEncoder()

In [None]:
## Fit and Transform

encoded = encoder.fit_transform(df[['color']]).toarray()

In [None]:
import pandas as pd

In [None]:
encoder_df = pd.DataFrame(encoded, columns = encoder.get_feature_names_out())

In [None]:
encoder_df.head()

Unnamed: 0,color_blue,color_green,color_red
0,0.0,0.0,1.0
1,0.0,1.0,0.0
2,1.0,0.0,0.0
3,0.0,0.0,1.0
4,1.0,0.0,0.0


In [None]:
# For new Data
encoder.transform([['blue']]).toarray()



array([[1., 0., 0.]])

In [None]:
pd.concat([df,encoder_df], axis=1)

Unnamed: 0,color,color_blue,color_green,color_red
0,red,0.0,0.0,1.0
1,green,0.0,1.0,0.0
2,blue,1.0,0.0,0.0
3,red,0.0,0.0,1.0
4,blue,1.0,0.0,0.0
5,blue,1.0,0.0,0.0
6,green,0.0,1.0,0.0
7,red,0.0,0.0,1.0


### Label Encoding

In [None]:
df.head()

Unnamed: 0,color
0,red
1,green
2,blue
3,red
4,blue


In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
lbl_encoder = LabelEncoder()

In [None]:
lbl_encoder.fit_transform(df[["color"]])

  y = column_or_1d(y, warn=True)


array([2, 1, 0, 2, 0, 0, 1, 2])

In [None]:
lbl_encoder.transform([['red']])

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


array([2])

In [None]:
lbl_encoder.transform([['blue']])

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


array([0])

In [None]:
lbl_encoder.transform([['green']])

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


array([1])

### Odinal Encoding

In [None]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder

In [None]:
df = pd.DataFrame({
    'size':['small', 'medium', 'large', 'medium', 'small', 'large']
})

In [None]:
df.head()

Unnamed: 0,size
0,small
1,medium
2,large
3,medium
4,small


In [None]:
# Create an instance of Ordinal Encoder

od_encoder = OrdinalEncoder(categories=[['small', 'medium', 'large']])

In [None]:
od_encoder.fit_transform(df[['size']])

array([[0.],
       [1.],
       [2.],
       [1.],
       [0.],
       [2.]])

In [None]:
od_encoder.transform([['small']])



array([[0.]])

### Target Guided Ordinal Encoding

In [None]:
data = {
    'city': ['New York', 'Los Angeles', 'Chicago', 'San Francisco', 'Miami', 'Chicago', 'Miami', 'New York'],
    'price': [500000, 450000, 350000, 700000, 400000, 360000, 420000, 520000]
}

# Create DataFrame
df = pd.DataFrame(data)

In [None]:
df

Unnamed: 0,city,price
0,New York,500000
1,Los Angeles,450000
2,Chicago,350000
3,San Francisco,700000
4,Miami,400000
5,Chicago,360000
6,Miami,420000
7,New York,520000


In [None]:
df.groupby('city')['price'].mean()

Unnamed: 0_level_0,price
city,Unnamed: 1_level_1
Chicago,355000.0
Los Angeles,450000.0
Miami,410000.0
New York,510000.0
San Francisco,700000.0


In [None]:
mean_price = df.groupby('city')['price'].mean().to_dict()

In [None]:
mean_price

{'Chicago': 355000.0,
 'Los Angeles': 450000.0,
 'Miami': 410000.0,
 'New York': 510000.0,
 'San Francisco': 700000.0}

In [None]:
df['city_encoded'] = df['city'].map(mean_price)

In [None]:
df

Unnamed: 0,city,price,city_encoded
0,New York,500000,510000.0
1,Los Angeles,450000,450000.0
2,Chicago,350000,355000.0
3,San Francisco,700000,700000.0
4,Miami,400000,410000.0
5,Chicago,360000,355000.0
6,Miami,420000,410000.0
7,New York,520000,510000.0


In [None]:
df[['city_encoded', 'price']]

Unnamed: 0,city_encoded,price
0,510000.0,500000
1,450000.0,450000
2,355000.0,350000
3,700000.0,700000
4,410000.0,400000
5,355000.0,360000
6,410000.0,420000
7,510000.0,520000


In [None]:
df[['']]