### Data Standardization

In [5]:
import numpy as np
import pandas as pd
V1 = np.array([1,3,6,5,7])
V2 = np.array([7,7,5,8,12])
V3 = np.array([6,12,5,6,14])
df = pd.DataFrame(
    {"V1" :V1,
     "V2" :V2,
     "V3" :V3,})
df = df.astype(float)
df

Unnamed: 0,V1,V2,V3
0,1.0,7.0,6.0
1,3.0,7.0,12.0
2,6.0,5.0,5.0
3,5.0,8.0,6.0
4,7.0,12.0,14.0


### Standardization

In [8]:
from sklearn import preprocessing

In [9]:
preprocessing.scale(df)

array([[-1.57841037, -0.34554737, -0.70920814],
       [-0.64993368, -0.34554737,  0.92742603],
       [ 0.74278135, -1.2094158 , -0.98198051],
       [ 0.27854301,  0.08638684, -0.70920814],
       [ 1.2070197 ,  1.81412369,  1.47297076]])

In [10]:
df

Unnamed: 0,V1,V2,V3
0,1.0,7.0,6.0
1,3.0,7.0,12.0
2,6.0,5.0,5.0
3,5.0,8.0,6.0
4,7.0,12.0,14.0


### Normalization

In [15]:
preprocessing.normalize(df)

array([[0.10783277, 0.75482941, 0.64699664],
       [0.21107926, 0.49251828, 0.84431705],
       [0.64699664, 0.53916387, 0.53916387],
       [0.4472136 , 0.71554175, 0.53665631],
       [0.35491409, 0.60842415, 0.70982818]])

### Min-Max 

In [18]:
scaler = preprocessing.MinMaxScaler(feature_range = (10,20)) # scaler is translater object 

In [20]:
scaler.fit_transform(df)

array([[10.        , 12.85714286, 11.11111111],
       [13.33333333, 12.85714286, 17.77777778],
       [18.33333333, 10.        , 10.        ],
       [16.66666667, 14.28571429, 11.11111111],
       [20.        , 20.        , 20.        ]])

### Binarize

In [23]:
binarizer = preprocessing.Binarizer(threshold = 5).fit(df)

In [25]:
binarizer.transform(df)

array([[0., 1., 1.],
       [0., 1., 1.],
       [1., 0., 0.],
       [0., 1., 1.],
       [1., 1., 1.]])

### 0-1 Transform


In [31]:
import seaborn as sns
tips = sns.load_dataset("tips")
df = tips.copy()
df_l = df.copy()

In [33]:
df_l.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [51]:
df_l["yeni_sex"] = df_l["sex"].cat.codes ### It starts converting the detected value from 1. 

In [53]:
df_l.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,yeni_sex,daha_yeni-sex
0,16.99,1.01,Female,No,Sun,Dinner,2,1,0
1,10.34,1.66,Male,No,Sun,Dinner,3,0,1
2,21.01,3.5,Male,No,Sun,Dinner,3,0,1
3,23.68,3.31,Male,No,Sun,Dinner,2,0,1
4,24.59,3.61,Female,No,Sun,Dinner,4,1,0


In [55]:
lbe = preprocessing.LabelEncoder()

In [57]:
df_l["daha_yeni-sex"] = lbe.fit_transform(df_l["sex"])

### 1 And Others

In [64]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [66]:
df_l.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,yeni_sex,daha_yeni-sex
0,16.99,1.01,Female,No,Sun,Dinner,2,1,0
1,10.34,1.66,Male,No,Sun,Dinner,3,0,1
2,21.01,3.5,Male,No,Sun,Dinner,3,0,1
3,23.68,3.31,Male,No,Sun,Dinner,2,0,1
4,24.59,3.61,Female,No,Sun,Dinner,4,1,0


In [78]:
df_l["yeni_gün"] = np.where(df_l["day"].str.contains("Sun"),1,0)
df_l.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,yeni_sex,daha_yeni-sex,yeni_gun,yeni_gün
0,16.99,1.01,Female,No,Sun,Dinner,2,1,0,1,1
1,10.34,1.66,Male,No,Sun,Dinner,3,0,1,1,1
2,21.01,3.5,Male,No,Sun,Dinner,3,0,1,1,1
3,23.68,3.31,Male,No,Sun,Dinner,2,0,1,1,1
4,24.59,3.61,Female,No,Sun,Dinner,4,1,0,1,1


### Multi Class Transformation 

In [83]:
lbe = preprocessing.LabelEncoder()

In [89]:
df_l["daha_yeni_gun"] = lbe.fit_transform(df_l["day"])

In [91]:
df_l.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,yeni_sex,daha_yeni-sex,yeni_gun,yeni_gün,daha_yeni_gun
0,16.99,1.01,Female,No,Sun,Dinner,2,1,0,1,1,2
1,10.34,1.66,Male,No,Sun,Dinner,3,0,1,1,1,2
2,21.01,3.5,Male,No,Sun,Dinner,3,0,1,1,1,2
3,23.68,3.31,Male,No,Sun,Dinner,2,0,1,1,1,2
4,24.59,3.61,Female,No,Sun,Dinner,4,1,0,1,1,2


### One_Hot Transform And Dummy Variable Trap

In [110]:
df_one_hot = df.copy()

In [118]:
pd.get_dummies(df_one_hot, columns = ["sex"], prefix = ["sex"]).head()  ### Prefix == "sex_Male" , "sex_female" 

Unnamed: 0,total_bill,tip,smoker,day,time,size,sex_Male,sex_Female
0,16.99,1.01,No,Sun,Dinner,2,False,True
1,10.34,1.66,No,Sun,Dinner,3,True,False
2,21.01,3.5,No,Sun,Dinner,3,True,False
3,23.68,3.31,No,Sun,Dinner,2,True,False
4,24.59,3.61,No,Sun,Dinner,4,False,True


In [124]:
pd.get_dummies(df_one_hot , columns = ["day"], prefix = ["day"]).head()

Unnamed: 0,total_bill,tip,sex,smoker,time,size,day_Thur,day_Fri,day_Sat,day_Sun
0,16.99,1.01,Female,No,Dinner,2,False,False,False,True
1,10.34,1.66,Male,No,Dinner,3,False,False,False,True
2,21.01,3.5,Male,No,Dinner,3,False,False,False,True
3,23.68,3.31,Male,No,Dinner,2,False,False,False,True
4,24.59,3.61,Female,No,Dinner,4,False,False,False,True


### Converting a continuous variable into a categorical variable

In [127]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [139]:
dff= df.select_dtypes(include = ["float64" , "int64"])

In [143]:
ex = preprocessing.KBinsDiscretizer(n_bins = [3,2,2], encode = "ordinal" ,strategy = "quantile").fit(df)

ValueError: n_bins must be a scalar or array of shape (n_features,).