# EXPERIMENT 4

**Normalizing**
* Purpose: To bring all features to a similar scale.

**Scaling**
* Purpose: To standardize features by removing the mean and scaling to unit
  variance.

**Balancing**
* Purpose: To handle imbalanced datasets where some classes are underrepresented.

**Z-Score Normalization**
* Purpose: To standardize features by removing the mean and scaling to unit variance.

1. **IMPORTING ALL THE NESSESARY LIBRARIES**

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

In [None]:
df = pd.read_csv('mtcars.csv')

* **Normalizing Data**

In [None]:
#Using MinMax Scaler
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(df)
normalized_data

array([[0.45106383, 0.5       , 0.22175106, 0.204947  , 0.52534562,
        0.28304781, 0.23333333, 0.        , 1.        , 0.5       ,
        0.42857143],
       [0.45106383, 0.5       , 0.22175106, 0.204947  , 0.52534562,
        0.34824853, 0.3       , 0.        , 1.        , 0.5       ,
        0.42857143],
       [0.52765957, 0.        , 0.0920429 , 0.14487633, 0.50230415,
        0.20634109, 0.48928571, 1.        , 1.        , 0.5       ,
        0.        ],
       [0.46808511, 0.5       , 0.46620105, 0.204947  , 0.14746544,
        0.43518282, 0.58809524, 1.        , 0.        , 0.        ,
        0.        ],
       [0.35319149, 1.        , 0.72062859, 0.43462898, 0.1797235 ,
        0.49271286, 0.3       , 0.        , 0.        , 0.        ,
        0.14285714],
       [0.32765957, 0.5       , 0.38388626, 0.18727915, 0.        ,
        0.49782664, 0.68095238, 1.        , 0.        , 0.        ,
        0.        ],
       [0.16595745, 1.        , 0.72062859, 0.6819788 , 0.

In [None]:
#Using Z-Score Normalising method

zscore_scaler = StandardScaler()
zscore_normalized_data = zscore_scaler.fit_transform(df)
zscore_normalized_data

array([[ 1.53299135e-01, -1.06667720e-01, -5.79750316e-01,
        -5.43654869e-01,  5.76594481e-01, -6.20166581e-01,
        -7.89600577e-01, -8.81917104e-01,  1.20894105e+00,
         4.30331483e-01,  7.46967077e-01],
       [ 1.53299135e-01, -1.06667720e-01, -5.79750316e-01,
        -5.43654869e-01,  5.76594481e-01, -3.55382189e-01,
        -4.71201785e-01, -8.81917104e-01,  1.20894105e+00,
         4.30331483e-01,  7.46967077e-01],
       [ 4.56736599e-01, -1.24445674e+00, -1.00602601e+00,
        -7.95569902e-01,  4.81584062e-01, -9.31677630e-01,
         4.32823359e-01,  1.13389342e+00,  1.20894105e+00,
         4.30331483e-01, -1.14010764e+00],
       [ 2.20729683e-01, -1.06667720e-01,  2.23615417e-01,
        -5.43654869e-01, -9.81576392e-01, -2.33633287e-03,
         9.04735855e-01,  1.13389342e+00, -8.27170192e-01,
        -9.46729262e-01, -1.14010764e+00],
       [-2.34426513e-01,  1.03112130e+00,  1.05977159e+00,
         4.19549669e-01, -8.48561806e-01,  2.31296954e-01,
  

* **Scaling**


In [None]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)
scaled_data

array([[ 1.53299135e-01, -1.06667720e-01, -5.79750316e-01,
        -5.43654869e-01,  5.76594481e-01, -6.20166581e-01,
        -7.89600577e-01, -8.81917104e-01,  1.20894105e+00,
         4.30331483e-01,  7.46967077e-01],
       [ 1.53299135e-01, -1.06667720e-01, -5.79750316e-01,
        -5.43654869e-01,  5.76594481e-01, -3.55382189e-01,
        -4.71201785e-01, -8.81917104e-01,  1.20894105e+00,
         4.30331483e-01,  7.46967077e-01],
       [ 4.56736599e-01, -1.24445674e+00, -1.00602601e+00,
        -7.95569902e-01,  4.81584062e-01, -9.31677630e-01,
         4.32823359e-01,  1.13389342e+00,  1.20894105e+00,
         4.30331483e-01, -1.14010764e+00],
       [ 2.20729683e-01, -1.06667720e-01,  2.23615417e-01,
        -5.43654869e-01, -9.81576392e-01, -2.33633287e-03,
         9.04735855e-01,  1.13389342e+00, -8.27170192e-01,
        -9.46729262e-01, -1.14010764e+00],
       [-2.34426513e-01,  1.03112130e+00,  1.05977159e+00,
         4.19549669e-01, -8.48561806e-01,  2.31296954e-01,
  

* **Balancing(Oversampling: Increases the number of minority class samples.)**

In [None]:
X = df.drop(columns=['gear'])
y = df['gear']

# Normalize the features using Z-Score
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

# Balance the dataset using SMOTE
smote = SMOTE(k_neighbors=2)
X_resampled, y_resampled = smote.fit_resample(X_normalized, y)

In [None]:
X_resampled

array([[ 1.53299135e-01, -1.06667720e-01, -5.79750316e-01,
        -5.43654869e-01,  5.76594481e-01, -6.20166581e-01,
        -7.89600577e-01, -8.81917104e-01,  1.20894105e+00,
         7.46967077e-01],
       [ 1.53299135e-01, -1.06667720e-01, -5.79750316e-01,
        -5.43654869e-01,  5.76594481e-01, -3.55382189e-01,
        -4.71201785e-01, -8.81917104e-01,  1.20894105e+00,
         7.46967077e-01],
       [ 4.56736599e-01, -1.24445674e+00, -1.00602601e+00,
        -7.95569902e-01,  4.81584062e-01, -9.31677630e-01,
         4.32823359e-01,  1.13389342e+00,  1.20894105e+00,
        -1.14010764e+00],
       [ 2.20729683e-01, -1.06667720e-01,  2.23615417e-01,
        -5.43654869e-01, -9.81576392e-01, -2.33633287e-03,
         9.04735855e-01,  1.13389342e+00, -8.27170192e-01,
        -1.14010764e+00],
       [-2.34426513e-01,  1.03112130e+00,  1.05977159e+00,
         4.19549669e-01, -8.48561806e-01,  2.31296954e-01,
        -4.71201785e-01, -8.81917104e-01, -8.27170192e-01,
        -5.

In [None]:
y_resampled

0     4
1     4
2     4
3     3
4     3
5     3
6     3
7     4
8     4
9     4
10    4
11    3
12    3
13    3
14    3
15    3
16    3
17    4
18    4
19    4
20    3
21    3
22    3
23    3
24    3
25    4
26    5
27    5
28    5
29    5
30    5
31    4
32    4
33    4
34    4
35    5
36    5
37    5
38    5
39    5
40    5
41    5
42    5
43    5
44    5
Name: gear, dtype: int64

* **Balancing(Undersampling: Reduces the number of majority class samples.)**

In [None]:
rus = RandomUnderSampler()
X_resampled, y_resampled = rus.fit_resample(X_normalized, y)

In [None]:
X_resampled

array([[-1.1447389 ,  1.0311213 ,  0.97779549,  1.45684686,  0.25355906,
         0.64664502, -1.38659831, -0.8819171 , -0.82717019,  0.74696708],
       [-0.47043343,  1.0311213 ,  0.36953287,  0.49364233, -1.00057848,
         0.5324243 , -0.14143161, -0.8819171 , -0.82717019,  0.11794217],
       [-0.90873199,  1.0311213 ,  1.71558035,  1.23456889, -0.69654514,
         2.20939212, -0.24377408, -0.8819171 , -0.82717019,  0.74696708],
       [-1.63361037,  1.0311213 ,  1.97790385,  0.86410561, -1.26660765,
         2.11074695,  0.07462472, -0.8819171 , -0.82717019,  0.74696708],
       [-0.8244438 ,  1.0311213 ,  0.60070545,  0.04908639, -0.84856181,
         0.2261051 , -0.31200239, -0.8819171 , -0.82717019, -0.51108274],
       [ 1.737917  , -1.24445674, -1.2708088 , -1.40312969,  2.53380911,
        -1.6637286 ,  0.38165212,  1.13389342,  1.20894105, -0.51108274],
       [ 0.15329914, -0.10666772, -0.57975032, -0.54365487,  0.57659448,
        -0.62016658, -0.78960058, -0.8819171 

In [None]:
y_resampled

0     3
1     3
2     3
3     3
4     3
5     4
6     4
7     4
8     4
9     4
10    5
11    5
12    5
13    5
14    5
Name: gear, dtype: int64