# Normalization with MinMax Scalar

In [9]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Sample Data
data = {
    "age": [25, 20, 35, 40, 45],
    "height": [150, 160, 170, 180, 190],
    "weight": [50, 60, 70, 80, 90]
}

df = pd.DataFrame(data)
print("Original dataframe")
print(df)

scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(df)
normalized_df = pd.DataFrame(normalized_data, columns=df.columns)
print("Normalized dataframe")
print(normalized_df)

Original dataframe
   age  height  weight
0   25     150      50
1   20     160      60
2   35     170      70
3   40     180      80
4   45     190      90
Normalized dataframe
   age  height  weight
0  0.2    0.00    0.00
1  0.0    0.25    0.25
2  0.6    0.50    0.50
3  0.8    0.75    0.75
4  1.0    1.00    1.00


# Standardization with Standard Scalar

In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Sample Data
data = {
    "age": [25, 20, 35, 40, 45],
    "height": [150, 160, 170, 180, 190],
    "weight": [50, 60, 70, 80, 90]
}

df = pd.DataFrame(data)
print("Original dataframe")
print(df)

scaler = StandardScaler()
standardized_data = scaler.fit_transform(df)
standardized_df = pd.DataFrame(standardized_data, columns=df.columns)
print("Standardized dataframe")
print(standardized_df)

Original dataframe
   age  height  weight
0   25     150      50
1   20     160      60
2   35     170      70
3   40     180      80
4   45     190      90
Standardized dataframe
        age    height    weight
0 -0.862662 -1.414214 -1.414214
1 -1.401826 -0.707107 -0.707107
2  0.215666  0.000000  0.000000
3  0.754829  0.707107  0.707107
4  1.293993  1.414214  1.414214


# One-Hot Encoding

In [20]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Sample data
data = {
    "customer_id": [1, 2, 3, 4],
    "gender": ["Male", "Female", "Female", "Male"],
    "city": ["Hyderabad", "Pune", "Bangalore", "Mumbai"],
    "fruits": ["Apple", "Orange", "Kiwi", "Banana"]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Initialize OneHotEncoder
one_hot_encoder = OneHotEncoder(sparse_output=False)

# Define columns to encode
columns_to_encode = ["gender", "city", "fruits"]

# Fit and transform the data
encoded_data = one_hot_encoder.fit_transform(df[columns_to_encode])

# Create a DataFrame with the encoded data
encoded_columns = one_hot_encoder.get_feature_names_out(columns_to_encode)
encoded_df = pd.DataFrame(encoded_data, columns=encoded_columns)

# Combine with the original DataFrame
final_df = pd.concat([df, encoded_df], axis=1).drop(columns_to_encode, axis=1)
print("\nOne-Hot Encoded DataFrame with sklearn:")
print(final_df)

Original DataFrame:
   customer_id  gender       city  fruits
0            1    Male  Hyderabad   Apple
1            2  Female       Pune  Orange
2            3  Female  Bangalore    Kiwi
3            4    Male     Mumbai  Banana

One-Hot Encoded DataFrame with sklearn:
   customer_id  gender_Female  gender_Male  city_Bangalore  city_Hyderabad  \
0            1            0.0          1.0             0.0             1.0   
1            2            1.0          0.0             0.0             0.0   
2            3            1.0          0.0             1.0             0.0   
3            4            0.0          1.0             0.0             0.0   

   city_Mumbai  city_Pune  fruits_Apple  fruits_Banana  fruits_Kiwi  \
0          0.0        0.0           1.0            0.0          0.0   
1          0.0        1.0           0.0            0.0          0.0   
2          0.0        0.0           0.0            0.0          1.0   
3          1.0        0.0           0.0            1.0

# Label Encoding

In [22]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Sample data
data = {
    "customer_id": [1, 2, 3, 4],
    "gender": ["Male", "Female", "Female", "Male"],
    "city": ["Hyderabad", "Pune", "Bangalore", "Mumbai"],
    "fruits": ["Apple", "Orange", "Kiwi", "Banana"]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Encode the 'gender' column
df['gender_encoded'] = label_encoder.fit_transform(df['gender'])

# Encode the 'city' column
df['city_encoded'] = label_encoder.fit_transform(df['city'])

# Encode the 'fruits' column
df['fruits_encoded'] = label_encoder.fit_transform(df['fruits'])

print("\nDataFrame with Label Encoded columns:")
print(df)


Original DataFrame:
   customer_id  gender       city  fruits
0            1    Male  Hyderabad   Apple
1            2  Female       Pune  Orange
2            3  Female  Bangalore    Kiwi
3            4    Male     Mumbai  Banana

DataFrame with Label Encoded columns:
   customer_id  gender       city  fruits  gender_encoded  city_encoded  \
0            1    Male  Hyderabad   Apple               1             1   
1            2  Female       Pune  Orange               0             3   
2            3  Female  Bangalore    Kiwi               0             0   
3            4    Male     Mumbai  Banana               1             2   

   fruits_encoded  
0               0  
1               3  
2               2  
3               1  
