In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('customer.csv')

In [3]:
df.head()

Unnamed: 0,age,gender,review,education,purchased
0,30,Female,Average,School,No
1,68,Female,Poor,UG,No
2,70,Female,Good,PG,No
3,72,Female,Good,PG,No
4,16,Female,Average,UG,No


In [4]:
df['gender'].value_counts()

Unnamed: 0_level_0,count
gender,Unnamed: 1_level_1
Female,29
Male,21


## 1. OneHotEncoding using Pandas

In [5]:
one_hot_encoded_data = pd.get_dummies(df, columns = ['gender'])
print(one_hot_encoded_data)

    age   review education purchased  gender_Female  gender_Male
0    30  Average    School        No           True        False
1    68     Poor        UG        No           True        False
2    70     Good        PG        No           True        False
3    72     Good        PG        No           True        False
4    16  Average        UG        No           True        False
5    31  Average    School       Yes           True        False
6    18     Good    School        No          False         True
7    60     Poor    School       Yes           True        False
8    65  Average        UG        No           True        False
9    74     Good        UG       Yes          False         True
10   98     Good        UG       Yes           True        False
11   74     Good        UG       Yes          False         True
12   51     Poor    School        No          False         True
13   57  Average    School        No           True        False
14   15     Poor        P

In [6]:
# Convert only the one-hot encoded columns to integer type
for col in one_hot_encoded_data.columns:
    if col.startswith('gender_'):
        one_hot_encoded_data[col] = one_hot_encoded_data[col].astype(int)

print("\nFinal DataFrame with Integer Values for One-Hot Encoded Columns:\n", one_hot_encoded_data)



Final DataFrame with Integer Values for One-Hot Encoded Columns:
     age   review education purchased  gender_Female  gender_Male
0    30  Average    School        No              1            0
1    68     Poor        UG        No              1            0
2    70     Good        PG        No              1            0
3    72     Good        PG        No              1            0
4    16  Average        UG        No              1            0
5    31  Average    School       Yes              1            0
6    18     Good    School        No              0            1
7    60     Poor    School       Yes              1            0
8    65  Average        UG        No              1            0
9    74     Good        UG       Yes              0            1
10   98     Good        UG       Yes              1            0
11   74     Good        UG       Yes              0            1
12   51     Poor    School        No              0            1
13   57  Average    Sch

## 2. K-1 OneHotEncoding

In [7]:
one_hot_encoded_data = pd.get_dummies(df,columns=['gender'],drop_first=True)

In [8]:
# Convert only the one-hot encoded columns to integer type
for col in one_hot_encoded_data.columns:
    if col.startswith('gender_'):
        one_hot_encoded_data[col] = one_hot_encoded_data[col].astype(int)

print("\nFinal DataFrame with Integer Values for One-Hot Encoded Columns:\n", one_hot_encoded_data)



Final DataFrame with Integer Values for One-Hot Encoded Columns:
     age   review education purchased  gender_Male
0    30  Average    School        No            0
1    68     Poor        UG        No            0
2    70     Good        PG        No            0
3    72     Good        PG        No            0
4    16  Average        UG        No            0
5    31  Average    School       Yes            0
6    18     Good    School        No            1
7    60     Poor    School       Yes            0
8    65  Average        UG        No            0
9    74     Good        UG       Yes            1
10   98     Good        UG       Yes            0
11   74     Good        UG       Yes            1
12   51     Poor    School        No            1
13   57  Average    School        No            0
14   15     Poor        PG       Yes            1
15   75     Poor        UG        No            1
16   59     Poor        UG       Yes            1
17   22     Poor        UG       

## 3. OneHotEncoding using Sklearn

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,0:4],df.iloc[:,-1],test_size=0.2,random_state=2)

In [None]:
X_train.head()

Unnamed: 0,age,gender,review,education
24,16,Female,Average,PG
48,39,Female,Good,UG
17,22,Female,Poor,UG
12,51,Male,Poor,School
27,69,Female,Poor,PG


In [None]:
from sklearn.preprocessing import OneHotEncoder

In [None]:
ohe = OneHotEncoder(drop='first',sparse=False,dtype=np.int32)

In [None]:
X_train_new = ohe.fit_transform(X_train[['gender']])



In [None]:
X_test_new = ohe.transform(X_test[['gender']])

In [None]:
X_train_new.shape

(40, 1)

In [None]:
np.hstack((X_train[['gender']].values,X_train_new))

array([['Female', 0],
       ['Female', 0],
       ['Female', 0],
       ['Male', 1],
       ['Female', 0],
       ['Female', 0],
       ['Male', 1],
       ['Female', 0],
       ['Female', 0],
       ['Male', 1],
       ['Male', 1],
       ['Female', 0],
       ['Male', 1],
       ['Female', 0],
       ['Female', 0],
       ['Male', 1],
       ['Male', 1],
       ['Male', 1],
       ['Male', 1],
       ['Female', 0],
       ['Female', 0],
       ['Male', 1],
       ['Female', 0],
       ['Female', 0],
       ['Female', 0],
       ['Female', 0],
       ['Male', 1],
       ['Female', 0],
       ['Female', 0],
       ['Male', 1],
       ['Female', 0],
       ['Female', 0],
       ['Male', 1],
       ['Male', 1],
       ['Male', 1],
       ['Female', 0],
       ['Female', 0],
       ['Male', 1],
       ['Male', 1],
       ['Male', 1]], dtype=object)