**Dataframe Normalization Techniques in Python**

In [28]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [29]:
# Create a sample DataFrame
data = {'age': [25, 22, 34, 55, 39], 'salary': [28000, 32000, 18000, 29000, 26000]}
df = pd.DataFrame(data)

**Min-Max Scaling with scikit-learn**

In [30]:
# Create the MinMaxScaler object
scaler = MinMaxScaler()

# Scale the features in the DataFrame
minmax_scaled_df = scaler.fit_transform(df)

# Print the original and scaled DataFrames
print("Original DataFrame:")
print(df)
print("\nMinMax Scaled DataFrame:")
print(pd.DataFrame(minmax_scaled_df, columns=df.columns))

Original DataFrame:
   age  salary
0   25   28000
1   22   32000
2   34   18000
3   55   29000
4   39   26000

MinMax Scaled DataFrame:
        age    salary
0  0.090909  0.714286
1  0.000000  1.000000
2  0.363636  0.000000
3  1.000000  0.785714
4  0.515152  0.571429


**Standard Scaling with scikit-learn***

In [31]:
# Create the StandardScaler object
scaler = StandardScaler()

# Scale the features in the DataFrame
standard_scaled_df = scaler.fit_transform(df)

# Print the original and scaled DataFrames
print("Original DataFrame:")
print(df)
print("\nStandard Scaled DataFrame:")
print(pd.DataFrame(standard_scaled_df, columns=df.columns))

Original DataFrame:
   age  salary
0   25   28000
1   22   32000
2   34   18000
3   55   29000
4   39   26000

Standard Scaled DataFrame:
        age    salary
0 -0.853735  0.296866
1 -1.109855  1.145055
2 -0.085373 -1.823606
3  1.707469  0.508913
4  0.341494 -0.127228


**Notes**

In [32]:
import pandas as pd

# Create a sample DataFrame
data = {'age': [25, 30, 42, 22, 55]}
df = pd.DataFrame(data)

# Calculate mean and standard deviation of 'age'
mean = df['age'].mean()
std = df['age'].std()

df_scaled = pd.DataFrame()
# Apply Standard scaling formula to 'age'
df_scaled['age_scaled'] = (df['age'] - mean) / std

# Print the original and scaled DataFrames
print("Original DataFrame:")
print(df)

print("\nDataFrame with 'age' scaled:")
print(df_scaled)

Original DataFrame:
   age
0   25
1   30
2   42
3   22
4   55

DataFrame with 'age' scaled:
   age_scaled
0   -0.719151
1   -0.352237
2    0.528356
3   -0.939299
4    1.482332


**Important 1**

In [33]:
# Create a sample DataFrame
data = {'age': [25, 22, 34, 55, 39], 'salary': [28000, 32000, 18000, 29000, 26000]}
df = pd.DataFrame(data)

# Select the 'age' column as a DataFrame
age_df = df[['age']]

# Create and fit the scaler on the 'age' DataFrame
scaler = StandardScaler()
scaled_age = scaler.fit_transform(age_df)

# Print the original and scaled age (notice it's a DataFrame now)
print("Original Age:")
print(df['age'])
print("\nScaled Age:")
print(scaled_age)

Original Age:
0    25
1    22
2    34
3    55
4    39
Name: age, dtype: int64

Scaled Age:
[[-0.85373472]
 [-1.10985514]
 [-0.08537347]
 [ 1.70746944]
 [ 0.34149389]]


**Important 2**

In [37]:
# Create a sample DataFrame
data = {'age': [25, 22, 34, 55, 39]}
df = pd.DataFrame(data)

# Calculate mean and standard deviation of 'age'
mean = df['age'].mean()
std = df['age'].std()

# Apply Standard scaling formula to 'age'
df_age_scaled = (df['age'] - mean) / std

# Print the original and scaled DataFrames
print("Original DataFrame:")
print(df)
print("\nScaled Age")
print(df_age_scaled)

Original DataFrame:
   age
0   25
1   22
2   34
3   55
4   39

Scaled Age
0   -0.763604
1   -0.992685
2   -0.076360
3    1.527207
4    0.305441
Name: age, dtype: float64
