In [37]:
# Binning Predictor Values 

In [38]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing 
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.preprocessing import OneHotEncoder
%matplotlib inline
plt.style.use('ggplot')

In [39]:
# Using Target table with "Y" values beacuse we are dealing with categorical data 
df = pd.read_csv('ReadyDF1', index_col=0)
df.head(2)

Unnamed: 0,Y1,Y2,C1,C2,C4,C7,C3',C5',C6',T4',T3',T5',S1',S2',S3'
0,0,1,2.08636,1.0,0.029074,1.710498,1,1.351792,1.306758,0.640426,0.97966,-1.241958,0.172661,-2.012621,-1.937792
1,1,0,2.4133,0.0,-0.013352,1.413903,0,2.487306,0.0,0.644753,1.275142,-1.031589,0.180641,-1.696991,-1.706058


In [40]:
predictors = df.filter(['C4', "C6'", "T4'", "S2'"])

In [41]:
# converting to an array 
dfarray = predictors.values 
X = dfarray[:, 0:4]
X

array([[ 0.02907427,  1.30675833,  0.64042553, -2.01262094],
       [-0.01335164,  0.        ,  0.64475348, -1.69699144],
       [ 0.020715  ,  0.        ,  0.63681592, -2.56229286],
       ...,
       [ 0.03042531,  0.        ,  0.64776119, -2.16699295],
       [ 0.0322843 ,  1.38498346,  0.7109375 , -2.50102321],
       [ 0.02468735,  1.23465442,  0.67070218, -2.06796958]])

In [42]:
OHE = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy='uniform')
OHE.fit(X)
OHE_trans = OHE.transform(X)
OHE_trans
#my_C4 = OHE.transform(df.C4)
#df["C4_BIN"] = OHE.fit_transform(df[['C4']]) 

array([[2., 3., 2., 2.],
       [1., 0., 2., 3.],
       [2., 0., 2., 0.],
       ...,
       [2., 0., 2., 1.],
       [2., 3., 2., 0.],
       [2., 2., 2., 2.]])

In [43]:
OHE_trans = OHE_trans.astype(dtype='int64')

In [44]:
OHE_trans.dtype

dtype('int64')

In [45]:
ENC = OneHotEncoder(handle_unknown='ignore')
features = OHE_trans 
ENC.fit(features)

OneHotEncoder(categorical_features=None, categories=None,
       dtype=<class 'numpy.float64'>, handle_unknown='ignore',
       n_values=None, sparse=True)

In [46]:
names = ENC.get_feature_names()
names

array(['x0_0', 'x0_1', 'x0_2', 'x0_3', 'x1_0', 'x1_2', 'x1_3', 'x2_0',
       'x2_1', 'x2_2', 'x2_3', 'x3_0', 'x3_1', 'x3_2', 'x3_3'],
      dtype=object)

In [47]:
OHE_labels = ENC.transform(features).toarray()
OHE_labels.shape

(682, 15)

In [48]:
OHE_labels

array([[0., 0., 1., ..., 0., 1., 0.],
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 1., 0.]])

In [49]:
ENC_DF = pd.DataFrame(OHE_labels, columns=['C4_0','C4_1', 'C4_2', 'C4_3', "C6'_0", "C6'_1", "C6'_2", "T4'_0",  "T4'_1",  "T4'_2",  "T4'_3",  "S2'_0",  "S2'_1",  "S2'_2",  "S2'_3"])
ENC_DF.head()

Unnamed: 0,C4_0,C4_1,C4_2,C4_3,C6'_0,C6'_1,C6'_2,T4'_0,T4'_1,T4'_2,T4'_3,S2'_0,S2'_1,S2'_2,S2'_3
0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [51]:
df = pd.concat([ENC_DF,df], axis=1)

In [52]:
# Exporting DataFrame as CSV 
df.to_csv('BinDF1', ',')