## Ordinal encoder

In [1]:
# Example of ordinal modeling
import pandas as pd
from numpy import asarray
from sklearn.preprocessing import OrdinalEncoder
# Create data
data = asarray([['red'], ['green'], ['blue']])
print(f'Initial data\n-----------\n{data}')
# Define ordinal encoding
encoder = OrdinalEncoder()
# Transform ordinal to numerical values
transformed_data = encoder.fit_transform(data).astype('int')
print(f'Encoded data\n-----------\n{transformed_data}')

Initial data
-----------
[['red']
 ['green']
 ['blue']]
Encoded data
-----------
[[2]
 [1]
 [0]]


In [2]:
# Sample dataframe
data_values = {'name': ['Jugurtha', 'Saran', 'Milton', 'Simeon'],
                'degree': ['Master', 'Phd', 'Bachelor', 'Phd']}
print(data_values)
df = pd.DataFrame(data_values)
print(f'Original dataframe\n---------------------\n{df}')
# Replace the degree
ordinal_degree = {'Phd':3, 'Master':2, 'Bachelor':1, 'aec': 0}
df['degree'] = df['degree'].replace(ordinal_degree)
print(f'Ordinal dataframe\n---------------------\n{df}')


{'name': ['Jugurtha', 'Saran', 'Milton', 'Simeon'], 'degree': ['Master', 'Phd', 'Bachelor', 'Phd']}
Original dataframe
---------------------
       name    degree
0  Jugurtha    Master
1     Saran       Phd
2    Milton  Bachelor
3    Simeon       Phd
Ordinal dataframe
---------------------
       name  degree
0  Jugurtha       2
1     Saran       3
2    Milton       1
3    Simeon       3


## One-Hot Enconding

In [3]:
# Example of One-Hot Enconding modeling
import pandas as pd
from numpy import asarray
from sklearn.preprocessing import OneHotEncoder
# Create data
data = asarray([['red'], ['green'], ['blue']])
print(f'Initial data\n-----------\n{data}')
# Define ordinal encoding
encoder = OneHotEncoder(sparse=False)
# Transform ordinal to numerical values
transformed_data = encoder.fit_transform(data)
print(f'Encoded data\n-----------\n{transformed_data}')

Initial data
-----------
[['red']
 ['green']
 ['blue']]
Encoded data
-----------
[[0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]]


## Dummy variable Encoding

In [4]:
# Example of One-Hot Enconding modeling
import pandas as pd
from numpy import asarray
from sklearn.preprocessing import OneHotEncoder
# Create data
data = asarray([['red'], ['green'], ['blue']])
print(f'Initial data\n-----------\n{data}')
# Define ordinal encoding
encoder = OneHotEncoder(drop = 'first', sparse=False)
# Transform ordinal to numerical values
transformed_data = encoder.fit_transform(data)
print(f'Encoded data\n-----------\n{transformed_data}')

Initial data
-----------
[['red']
 ['green']
 ['blue']]
Encoded data
-----------
[[0. 1.]
 [1. 0.]
 [0. 0.]]


In [5]:
# Train Model
from pandas import read_csv
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.model_selection import train_test_split
filename = 'heart.csv'
try:
    dataframe = read_csv(filename)
    print('Data loaded')
except Exception as e:
    print(f'Error: {e}')

# Data preprocessing
#Define encoder
encoder = OneHotEncoder(drop = 'first', sparse=False)

Data loaded


In [6]:
X, Y = dataframe.values[ : , : -1],dataframe.values[ : , -1].astype('int')
# Define ordinal encoding
encoder = OneHotEncoder(drop = 'first', sparse=False)
# Transform ordinal to numerical values
transformed_X = encoder.fit_transform(X)
print(transformed_X)
X_tr, X_te, Y_tr, Y_te = train_test_split(transformed_X, Y, test_size=0.20)

# model = LogisticRegression(solver='newton-cg')
model = DTC()
model.fit(X_tr, Y_tr)
result = model.score(X_te, Y_te)
print(result)

[[0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 ...
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]]
0.8152173913043478
