In [2]:
import pandas as pd
# Import StandardScaler from sklearn for standardizing features (mean = 0, std = 1)
from sklearn.preprocessing import StandardScaler 

# Import MinMaxScaler from sklearn for scaling features to a specific range (default: 0 to 1)
from sklearn.preprocessing import MinMaxScaler 

In [3]:
df = pd.read_csv('cleaned_data.csv')

In [6]:
df.head()

Unnamed: 0,CustomerID,Name,Email,Age,Country,SignupDate,LastLogin,TotalPurchase,Feedback,CartValue,DiscountApplied
0,e3e70682-c209-4cac-a29f-6fbed82c07cd,Edwin Mack,timothy14@ward.com,52.0,India,2023-10-18,2025-02-14 03:26:26,80.63,Excellent,196.06,50.0
1,f728b4fa-4248-4e3a-8a5d-2f346baa9455,Kimberly Moore,brendagonzalez@atkins.org,75.0,United States,2025-02-28,2025-02-17 15:38:32,27.38,,136.79,0.0
2,eb1167b3-67a9-4378-bc65-c1e582e2e662,Kaitlin Gregory,velazquezanita@yahoo.com,30.0,Germany,2023-09-10,2025-02-01 15:53:26,817.67,Bad,239.65,20.0
3,f7c1bd87-4da5-4709-9471-3d60c8a70639,Crystal Gill,allentyler@hill-smith.com,40.0,United States,2022-05-23,2025-05-07 21:17:28,29.28,,291.47,10.0
4,e443df78-9558-467f-9ba9-1faf7a024204,William Martinez,jonesashley@gmail.com,54.0,United States,2024-09-12,2025-05-07 13:32:36,829.67,Average,265.40743,15.0


In [7]:
df.describe().round(2)

Unnamed: 0,Age,TotalPurchase,CartValue,DiscountApplied
count,275.0,275.0,275.0,275.0
mean,50.56,496.9,265.41,9.82
std,17.84,288.72,128.53,6.93
min,19.0,0.61,22.87,0.0
25%,36.0,264.24,157.08,5.0
50%,48.0,496.9,265.41,10.0
75%,68.0,744.25,369.98,15.0
max,80.0,998.49,497.93,50.0


# `Normalization`
It is the process of scaling the data to a common range, usually between 0 and 1. This is done to prevent features with large ranges from dominating the model. 

In [8]:
new_df = pd.DataFrame(df, columns=['TotalPurchase','CartValue'])

In [9]:
scalar = MinMaxScaler() # Initialize the MinMaxScaler object 

In [11]:
scalar

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False


In [10]:
normalized_df = scalar.fit_transform(new_df) # Apply the transformation to the new data

In [13]:
print(normalized_df.round(3))

[[0.08  0.365]
 [0.027 0.24 ]
 [0.819 0.456]
 [0.029 0.565]
 [0.831 0.511]
 [0.196 0.745]
 [0.822 0.791]
 [0.03  0.511]
 [1.    0.778]
 [0.497 0.241]
 [0.186 0.384]
 [0.327 0.136]
 [0.812 0.374]
 [0.489 0.917]
 [0.24  0.123]
 [0.101 0.293]
 [0.519 0.511]
 [0.877 0.335]
 [0.267 0.092]
 [0.422 0.111]
 [0.921 0.865]
 [0.877 0.857]
 [0.59  0.623]
 [0.571 0.905]
 [0.004 0.511]
 [0.422 0.812]
 [0.132 0.937]
 [0.922 0.667]
 [0.901 0.6  ]
 [0.279 0.574]
 [0.358 0.277]
 [0.857 0.757]
 [0.497 0.387]
 [0.176 0.011]
 [0.416 0.233]
 [0.971 0.659]
 [0.861 0.218]
 [0.478 0.488]
 [0.558 0.679]
 [0.292 1.   ]
 [0.319 0.495]
 [0.072 0.626]
 [0.827 0.923]
 [0.198 0.11 ]
 [0.196 0.902]
 [0.989 0.052]
 [0.995 0.723]
 [0.127 0.728]
 [0.517 0.185]
 [0.238 0.125]
 [0.948 0.009]
 [0.706 0.929]
 [0.497 0.034]
 [0.115 0.745]
 [0.419 0.513]
 [0.09  0.829]
 [0.299 0.799]
 [0.985 0.511]
 [0.497 0.255]
 [0.594 0.859]
 [0.547 0.983]
 [0.015 0.975]
 [0.497 0.94 ]
 [0.246 0.852]
 [0.694 0.866]
 [0.555 0.666]
 [0.748 0.

# `Standardization`
It is a process that scales the data to have a mean of 0 and a standard deviation of 1. This is often done to prevent features with large ranges from dominating the model. 

In [14]:
scalar = StandardScaler()

In [15]:
standard_df = scalar.fit_transform(new_df)

In [18]:
print(standard_df.round(2))

[[-1.44 -0.54]
 [-1.63 -1.  ]
 [ 1.11 -0.2 ]
 [-1.62  0.2 ]
 [ 1.15  0.  ]
 [-1.04  0.87]
 [ 1.12  1.04]
 [-1.62  0.  ]
 [ 1.74  0.99]
 [ 0.   -1.  ]
 [-1.08 -0.47]
 [-0.59 -1.39]
 [ 1.09 -0.51]
 [-0.03  1.51]
 [-0.89 -1.44]
 [-1.37 -0.81]
 [ 0.07  0.  ]
 [ 1.32 -0.65]
 [-0.8  -1.55]
 [-0.26 -1.48]
 [ 1.47  1.31]
 [ 1.32  1.28]
 [ 0.32  0.42]
 [ 0.25  1.46]
 [-1.71  0.  ]
 [-0.26  1.11]
 [-1.26  1.58]
 [ 1.47  0.58]
 [ 1.4   0.33]
 [-0.75  0.24]
 [-0.48 -0.86]
 [ 1.25  0.91]
 [ 0.   -0.46]
 [-1.11 -1.85]
 [-0.28 -1.03]
 [ 1.64  0.55]
 [ 1.26 -1.08]
 [-0.07 -0.08]
 [ 0.21  0.62]
 [-0.71  1.81]
 [-0.62 -0.06]
 [-1.47  0.43]
 [ 1.14  1.53]
 [-1.04 -1.48]
 [-1.04  1.45]
 [ 1.7  -1.7 ]
 [ 1.72  0.79]
 [-1.28  0.8 ]
 [ 0.07 -1.2 ]
 [-0.9  -1.43]
 [ 1.56 -1.86]
 [ 0.72  1.55]
 [ 0.   -1.76]
 [-1.32  0.87]
 [-0.27  0.01]
 [-1.41  1.18]
 [-0.69  1.07]
 [ 1.69  0.  ]
 [ 0.   -0.94]
 [ 0.34  1.29]
 [ 0.17  1.75]
 [-1.67  1.72]
 [ 0.    1.59]
 [-0.87  1.26]
 [ 0.68  1.31]
 [ 0.2   0.57]
 [ 0.87 -0