In [1]:
# code from - https://stackoverflow.com/questions/24645153/pandas-dataframe-columns-scaling-with-sklearn

import pandas as pd
import numpy as np

df = pd.DataFrame({'A':[14.00,90.20,90.95,96.27,91.21],'B':[103.02,107.26,110.35,114.23,114.68], 'C':['big','small','big','small','small']})
df

Unnamed: 0,A,B,C
0,14.0,103.02,big
1,90.2,107.26,small
2,90.95,110.35,big
3,96.27,114.23,small
4,91.21,114.68,small


In [2]:
df["A"] - df["A"].min()

0     0.00
1    76.20
2    76.95
3    82.27
4    77.21
Name: A, dtype: float64

In [3]:
( df["A"] - df["A"].min() ) / (df["A"].max() - df["A"].min()) 

0    0.000000
1    0.926219
2    0.935335
3    1.000000
4    0.938495
Name: A, dtype: float64

In [4]:
df["A"] = ( df["A"] - df["A"].min() )  \
/ (df["A"].max() - df["A"].min()) * (5 - 1) + 1
df

Unnamed: 0,A,B,C
0,1.0,103.02,big
1,4.704874,107.26,small
2,4.741339,110.35,big
3,5.0,114.23,small
4,4.753981,114.68,small


In [5]:
df["B"].mean(), df["B"].std()

(109.90799999999999, 4.9016191202499639)

In [6]:
df["B"] = ( df["B"] - df["B"].mean() )  \
/ (df["B"].std() )

In [7]:
df

Unnamed: 0,A,B,C
0,1.0,-1.40525,big
1,4.704874,-0.54023,small
2,4.741339,0.090174,big
3,5.0,0.881749,small
4,4.753981,0.973556,small


In [8]:
def feture_scaling(df, scaling_strategy="min-max", column=None):
    if column == None:
        column = [column_name for column_name in df.columns]
    for column_name in column:
        if scaling_strategy == "min-max":
            df[column_name] = ( df[column_name] - df[column_name].min() ) /\
                            (df[column_name].max() - df[column_name].min()) 
        elif scaling_strategy == "z-score":
            df[column_name] = ( df[column_name] - \
                               df[column_name].mean() ) /\
                            (df[column_name].std() )
    return df

In [9]:
# code from - http://sebastianraschka.com/Articles/2014_about_feature_scaling.html

import pandas as pd
import numpy as np

df = pd.io.parsers.read_csv(
    'https://raw.githubusercontent.com/rasbt/pattern_classification/master/data/wine_data.csv',
     header=None,
     usecols=[0,1,2]
    )

df.columns=['Class label', 'Alcohol', 'Malic acid']

df.head()

Unnamed: 0,Class label,Alcohol,Malic acid
0,1,14.23,1.71
1,1,13.2,1.78
2,1,13.16,2.36
3,1,14.37,1.95
4,1,13.24,2.59


In [10]:
df = feture_scaling(df, "min-max", column=['Alcohol', 'Malic acid'])
df.head()

Unnamed: 0,Class label,Alcohol,Malic acid
0,1,0.842105,0.1917
1,1,0.571053,0.205534
2,1,0.560526,0.320158
3,1,0.878947,0.23913
4,1,0.581579,0.365613


In [11]:
from sklearn import preprocessing

df = pd.io.parsers.read_csv(
    'https://raw.githubusercontent.com/rasbt/pattern_classification/master/data/wine_data.csv',
     header=None,
     usecols=[0,1,2]
    )
df.columns=['Class label', 'Alcohol', 'Malic acid']

std_scale = preprocessing.StandardScaler().fit(df[['Alcohol', 'Malic acid']])
df_std = std_scale.transform(df[['Alcohol', 'Malic acid']])

minmax_scale = preprocessing.MinMaxScaler().fit(df[['Alcohol', 'Malic acid']])
df_minmax = minmax_scale.transform(df[['Alcohol', 'Malic acid']])


In [12]:
df_minmax

array([[ 0.84210526,  0.1916996 ],
       [ 0.57105263,  0.2055336 ],
       [ 0.56052632,  0.3201581 ],
       [ 0.87894737,  0.23913043],
       [ 0.58157895,  0.36561265],
       [ 0.83421053,  0.20158103],
       [ 0.88421053,  0.22332016],
       [ 0.79736842,  0.27865613],
       [ 1.        ,  0.17786561],
       [ 0.74473684,  0.12055336],
       [ 0.80789474,  0.28063241],
       [ 0.81315789,  0.14624506],
       [ 0.71578947,  0.19565217],
       [ 0.97894737,  0.19565217],
       [ 0.88157895,  0.22332016],
       [ 0.68421053,  0.21146245],
       [ 0.86052632,  0.23320158],
       [ 0.73684211,  0.16403162],
       [ 0.83157895,  0.16798419],
       [ 0.68684211,  0.46640316],
       [ 0.79736842,  0.17588933],
       [ 0.5       ,  0.60474308],
       [ 0.70526316,  0.22134387],
       [ 0.47894737,  0.16996047],
       [ 0.65      ,  0.21146245],
       [ 0.53157895,  0.25889328],
       [ 0.62105263,  0.20355731],
       [ 0.59736842,  0.19367589],
       [ 0.74736842,