In [None]:
# Feature Scaling & Encoding

# Objective: Learn to scale numerical features and encode categorical features for better model performance.
# Instructions:
# For each example, perform the following steps:
#     1. Load the Dataset: Load the dataset into your environment.
#     2. Feature Scaling: Apply scaling methods (StandardScaler or MinMaxScaler) to specified numerical columns.
#     3. Feature Encoding: Apply encoding methods (One-Hot Encoding or Label Encoding) to specified categorical columns.
#     4. Verify Changes: Check the data to ensure proper scaling and encoding. 
# Task:
#     Dataset: customer_data.csv (get it by your own it includes the columns of Age , Annual_Income)
#     Columns to scale: Age , Annual_Income
#     Column to encode: Region
#     Steps:
#         1. Load customer_data.csv .
#         2. Use MinMaxScaler on Age and Annual_Income .
#         3. Perform One-Hot Encoding on Region .
#         4. Verify by assessing the transformed dataset.
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import io
sample_data = """
Age,Annual_Income,Region
25,50000,North
30,75000,South
45,120000,East
38,90000,West
52,150000,North
"""
customer_data = pd.read_csv(io.StringIO(sample_data))
scaler = MinMaxScaler()
numerical_cols = ['Age', 'Annual_Income']
customer_data[numerical_cols] = scaler.fit_transform(customer_data[numerical_cols])
encoder = OneHotEncoder(sparse_output=False, drop='first')
encoded_region = encoder.fit_transform(customer_data[['Region']])
encoded_region_names = encoder.get_feature_names_out(['Region'])
encoded_region_df = pd.DataFrame(encoded_region, columns=encoded_region_names)
customer_data_encoded = pd.concat([customer_data.drop('Region', axis=1), encoded_region_df], axis=1)
print("Transformed Data:")
print(customer_data_encoded)
print("\nData Summary:")
print(customer_data_encoded.describe())

Transformed Data:
        Age  Annual_Income  Region_North  Region_South  Region_West
0  0.000000           0.00           1.0           0.0          0.0
1  0.185185           0.25           0.0           1.0          0.0
2  0.740741           0.70           0.0           0.0          0.0
3  0.481481           0.40           0.0           0.0          1.0
4  1.000000           1.00           1.0           0.0          0.0

Data Summary:
            Age  Annual_Income  Region_North  Region_South  Region_West
count  5.000000       5.000000      5.000000      5.000000     5.000000
mean   0.481481       0.470000      0.400000      0.200000     0.200000
std    0.404874       0.389872      0.547723      0.447214     0.447214
min    0.000000       0.000000      0.000000      0.000000     0.000000
25%    0.185185       0.250000      0.000000      0.000000     0.000000
50%    0.481481       0.400000      0.000000      0.000000     0.000000
75%    0.740741       0.700000      1.000000      0.000