In [3]:
import pandas as pd 
from sklearn import datasets 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler

# 1.Load Dataset

In [5]:
wine = datasets.load_wine()
df = pd.DataFrame(wine.data , columns = wine.feature_names)
df['target'] = wine.target

# 2. Split into train/test

In [7]:
X = df.drop('target', axis = 1 )
Y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

print("Before Scaling:", X_train.head())

Before Scaling:      alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
158    14.34        1.68  2.70               25.0       98.0           2.80   
137    12.53        5.51  2.64               25.0       96.0           1.79   
98     12.37        1.07  2.10               18.5       88.0           3.52   
159    13.48        1.67  2.64               22.5       89.0           2.60   
38     13.07        1.50  2.10               15.5       98.0           2.40   

     flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
158        1.31                  0.53             2.70            13.00  0.57   
137        0.60                  0.63             1.10             5.00  0.82   
98         3.75                  0.24             1.95             4.50  1.04   
159        1.10                  0.52             2.29            11.75  0.57   
38         2.64                  0.28             1.37             3.70  1.18   

     od280/od315_of_di

# 3. Standardization (Z-Score scaling )

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("After scaling (first 5 rows):\n", X_train_scaled[:5])

After scaling (first 5 rows):
 [[ 1.66529275 -0.60840587  1.21896194  1.60540017 -0.16738426  0.80400157
  -0.6916784   1.26722552  1.8775398   3.41947305 -1.65632857 -0.87940904
  -0.24860607]
 [-0.54952506  2.7515415   1.00331502  1.60540017 -0.30437887 -0.78538376
  -1.40123291  2.04959953 -0.87350523 -0.0248012  -0.58463272 -1.25462095
  -0.72992237]
 [-0.74531007 -1.14354109 -0.93750727 -0.28270426 -0.8523573   1.93702874
   1.7467906  -1.00165913  0.58798744 -0.24006834  0.35845962  0.2462267
  -0.24860607]
 [ 0.61294837 -0.61717858  1.00331502  0.87920616 -0.78385999  0.4892718
  -0.90154664  1.18898812  1.17258451  2.8813052  -1.65632857 -1.12955031
  -0.38138298]
 [ 0.11124931 -0.76631462 -0.93750727 -1.15413707 -0.16738426  0.17454204
   0.63748708 -0.68870952 -0.40926638 -0.58449577  0.95860929  0.1350528
   0.94638614]]


# 4. Min-Max Scaling example

In [10]:
minmax_scaler = MinMaxScaler()
X_train_minmax = minmax_scaler.fit_transform(X_train)

print("MinMax scaled (first 5 rows):\n", X_train_minmax[:5])

MinMax scaled (first 5 rows):
 [[0.87105263 0.16089613 0.71657754 0.74226804 0.30434783 0.62758621
  0.20464135 0.75471698 0.72151899 1.         0.07317073 0.25274725
  0.30102443]
 [0.39473684 0.94093686 0.68449198 0.74226804 0.2826087  0.27931034
  0.05485232 0.94339623 0.21518987 0.28952043 0.27642276 0.15384615
  0.18676123]
 [0.35263158 0.03665988 0.39572193 0.40721649 0.19565217 0.87586207
  0.71940928 0.20754717 0.48417722 0.24511545 0.45528455 0.54945055
  0.30102443]
 [0.64473684 0.15885947 0.68449198 0.61340206 0.20652174 0.55862069
  0.16033755 0.73584906 0.59177215 0.88898757 0.07317073 0.18681319
  0.26950355]
 [0.53684211 0.12423625 0.39572193 0.25257732 0.30434783 0.48965517
  0.48523207 0.28301887 0.30063291 0.1740675  0.56910569 0.52014652
  0.58471237]]
