In [1]:
import numpy as np
import pandas as pd

In [2]:
coffee = pd.read_csv('../data/starbucks_customers.csv')
coffee.shape

(122, 10)

In [3]:
coffee.head()

Unnamed: 0,avg_spent,nearest_starbucks,age,rate_quality,rate_price,rate_promo,ambiance,wifi,service,meetings_hangout
0,13,8,52,4,4,4,4,4,4,4
1,25,8,35,4,5,5,4,4,3,3
2,10,8,29,2,1,5,2,2,2,2
3,7,8,28,4,1,4,5,3,3,4
4,10,8,28,4,2,4,4,3,4,3


In [5]:
spent = coffee['avg_spent']
spent.describe()

count    122.000000
mean       9.770492
std        6.618016
min        0.000000
25%        5.000000
50%       10.000000
75%       14.000000
max       28.000000
Name: avg_spent, dtype: float64

In [6]:
print(f'Range: {spent.max() - spent.min()}')

Range: 28


Formula min max normalization: 
$$x_{norm} = \frac{x - x_{min}}{x_{max} - x_{min}}$$

In [7]:
spent_normalized = (spent - spent.min()) / (spent.max() - spent.min())
spent_normalized.describe()

count    122.000000
mean       0.348946
std        0.236358
min        0.000000
25%        0.178571
50%        0.357143
75%        0.500000
max        1.000000
Name: avg_spent, dtype: float64

## Min-Max Normalization with Sklearn

In [8]:
from sklearn.preprocessing import MinMaxScaler
mmscaler = MinMaxScaler()

In [9]:
spent_reshaped = spent.values.reshape(-1, 1)
spent_reshaped.shape

(122, 1)

In [11]:
reshaped_scaled = mmscaler.fit_transform(spent_reshaped)

In [12]:
print(f'Min: {reshaped_scaled.min()}')
print(f'Max: {reshaped_scaled.max()}')
print(f'Mean: {reshaped_scaled.mean()}')
print(f'Std: {reshaped_scaled.std()}')
print(f'Range: {reshaped_scaled.max() - reshaped_scaled.min()}')

Min: 0.0
Max: 1.0
Mean: 0.3489461358313818
Std: 0.23538702733077754
Range: 1.0
