In [2]:
import pandas as pd
from IPython.display import display

In [4]:
df = pd.DataFrame([
    ["green", "M", 10.1, "class1"],
    ["red", "L", 13.5, "class2"],
    ["blue", "XL", 15.3, "class1"]
], columns=["color", "size", "prize", "class label"])
display(df)

Unnamed: 0,color,size,prize,class label
0,green,M,10.1,class1
1,red,L,13.5,class2
2,blue,XL,15.3,class1


In [5]:
df.dtypes

color           object
size            object
prize          float64
class label     object
dtype: object

In [6]:
class_mapping = {label:idx for idx, label in enumerate(set(df["class label"]))}
df["class label"] = df["class label"].map(class_mapping)
df

Unnamed: 0,color,size,prize,class label
0,green,M,10.1,1
1,red,L,13.5,0
2,blue,XL,15.3,1


In [7]:
size_mapping = {"XL":3, "L":2, "M":1}
df["size"] = df["size"].map(size_mapping)
df

Unnamed: 0,color,size,prize,class label
0,green,1,10.1,1
1,red,2,13.5,0
2,blue,3,15.3,1


In [8]:
color_mapping = {
    "green":(0, 0, 1),
    "red":(0, 1, 0),
    "blue":(1,0, 0)
}
df["color"] = df["color"].map(color_mapping)
df

Unnamed: 0,color,size,prize,class label
0,"(0, 0, 1)",1,10.1,1
1,"(0, 1, 0)",2,13.5,0
2,"(1, 0, 0)",3,15.3,1


In [9]:
import numpy as np

In [10]:
y = df["class label"].values
X = df.iloc[:, :-1].values

X = np.apply_along_axis(func1d=lambda x:np.array(list(x[0]) + list(x[1:])), axis=1, arr=X)

print("Class Labels:", y)
print("\nFeatures:\n", X)

Class Labels: [1 0 1]

Features:
 [[ 0.   0.   1.   1.  10.1]
 [ 0.   1.   0.   2.  13.5]
 [ 1.   0.   0.   3.  15.3]]


In [11]:

inv_color_mapping = {v:k for k, v in color_mapping.items()}
inv_size_mapping = {v:k for k, v in size_mapping.items()}
inv_class_mapping = {v:k for k, v in class_mapping.items()}

df["color"] = df["color"].map(inv_color_mapping)
df["size"] = df["size"].map(inv_size_mapping)
df["class label"] = df["class label"].map(inv_class_mapping)
df

Unnamed: 0,color,size,prize,class label
0,green,M,10.1,class1
1,red,L,13.5,class2
2,blue,XL,15.3,class1


In [12]:
from sklearn.preprocessing import LabelEncoder

In [13]:
class_le= LabelEncoder()
df["class label"] = class_le.fit_transform(df["class label"])

size_mapping = {
    "XL":3,
    "L":2,
    "M":1
}
df["size"] = df["size"].map(size_mapping)
df

Unnamed: 0,color,size,prize,class label
0,green,1,10.1,0
1,red,2,13.5,1
2,blue,3,15.3,0


In [14]:
class_le.inverse_transform(df["class label"])

array(['class1', 'class2', 'class1'], dtype=object)

In [25]:
df.transpose()

Unnamed: 0,0,1,2
color,green,red,blue
size,1,2,3
prize,10.1,13.5,15.3
class label,0,1,0


In [28]:
df.transpose().to_dict().values()

dict_values([{'color': 'green', 'size': 1, 'prize': 10.1, 'class label': 0}, {'color': 'red', 'size': 2, 'prize': 13.5, 'class label': 1}, {'color': 'blue', 'size': 3, 'prize': 15.3, 'class label': 0}])

In [29]:
from sklearn.feature_extraction import DictVectorizer

In [30]:
dvec = DictVectorizer(sparse=False)

X = dvec.fit_transform(df.transpose().to_dict().values())
X

array([[ 0. ,  0. ,  1. ,  0. , 10.1,  1. ],
       [ 1. ,  0. ,  0. ,  1. , 13.5,  2. ],
       [ 0. ,  1. ,  0. ,  0. , 15.3,  3. ]])

In [32]:
pd.DataFrame(X, columns=dvec.feature_names_)

Unnamed: 0,class label,color=blue,color=green,color=red,prize,size
0,0.0,0.0,1.0,0.0,10.1,1.0
1,1.0,0.0,0.0,1.0,13.5,2.0
2,0.0,1.0,0.0,0.0,15.3,3.0


In [33]:
color_le = LabelEncoder()
df["color"] = color_le.fit_transform(df["color"])
df

Unnamed: 0,color,size,prize,class label
0,1,1,10.1,0
1,2,2,13.5,1
2,0,3,15.3,0


In [34]:
from sklearn.preprocessing import OneHotEncoder

In [38]:
ohe = OneHotEncoder(sparse=False)
X = ohe.fit_transform(df[["color"]].values)
X

array([[0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]])

In [39]:
import pandas as pd

In [40]:
df = pd.DataFrame([
    ["green", "M", 10.1, "class1"],
    ["red", "L", 13.5, "class2"],
    ["blue", "XL", 15.3, "class1"]
], columns=["color", "size", "prize", "class label"])

size_mapping = {
    "XL":3,
    "L":2,
    "M":1
}
df["size"] = df["size"].map(size_mapping)
class_mapping = {label:idx for idx, label in enumerate(set(df["class label"]))}
df["class label"] = df["class label"].map(class_mapping)

df

Unnamed: 0,color,size,prize,class label
0,green,1,10.1,1
1,red,2,13.5,0
2,blue,3,15.3,1


In [41]:
pd.get_dummies(df)

Unnamed: 0,size,prize,class label,color_blue,color_green,color_red
0,1,10.1,1,0,1,0
1,2,13.5,0,0,0,1
2,3,15.3,1,1,0,0


In [42]:
df = pd.DataFrame([
    ["green", "M", 10.1, "class1"],
    ["red", "L", 13.5, "class2"],
    ["blue", "XL", 15.3, "class1"]
], columns=["color", "size", "prize", "class label"])
df

Unnamed: 0,color,size,prize,class label
0,green,M,10.1,class1
1,red,L,13.5,class2
2,blue,XL,15.3,class1


In [43]:
pd.get_dummies(df)

Unnamed: 0,prize,color_blue,color_green,color_red,size_L,size_M,size_XL,class label_class1,class label_class2
0,10.1,0,1,0,0,1,0,1,0
1,13.5,0,0,1,1,0,0,0,1
2,15.3,1,0,0,0,0,1,1,0
