In [3]:
# Feature Scaling & Encoding

# Objective: Learn to scale numerical features and encode categorical features for better model performance.
# Instructions:
# For each example, perform the following steps:
#     1. Load the Dataset: Load the dataset into your environment.
#     2. Feature Scaling: Apply scaling methods (StandardScaler or MinMaxScaler) to specified numerical columns.
#     3. Feature Encoding: Apply encoding methods (One-Hot Encoding or Label Encoding) to specified categorical columns.
#     4. Verify Changes: Check the data to ensure proper scaling and encoding. 


# Task:
#     Dataset: customer_data.csv (get it by your own it includes the columns of Age , Annual_Income)
#     Columns to scale: Age , Annual_Income
#     Column to encode: Region
#     Steps:
#         1. Load customer_data.csv .
#         2. Use MinMaxScaler on Age and Annual_Income .
#         3. Perform One-Hot Encoding on Region .
#         4. Verify by assessing the transformed dataset.



    
    
    

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

# Sample data
data = {
    'Customer_ID': [1, 2, 3, 4, 5],
    'Age': [25, 40, 35, 50, 23],
    'Annual_Income': [30000, 60000, 45000, 80000, 27000],
    'Region': ['North', 'South', 'East', 'West', 'South']
}

df = pd.DataFrame(data)
print("Original Data:\n", df)

# Scale Age and Annual_Income
scaler = MinMaxScaler()
df[['Age', 'Annual_Income']] = scaler.fit_transform(df[['Age', 'Annual_Income']])

# One-Hot Encode Region
encoder = OneHotEncoder(sparse_output=False)  # Changed here
region_encoded = encoder.fit_transform(df[['Region']])

region_df = pd.DataFrame(region_encoded, columns=encoder.get_feature_names_out(['Region']))
df = pd.concat([df.drop('Region', axis=1), region_df], axis=1)

print("\nTransformed Data:\n", df)


Original Data:
    Customer_ID  Age  Annual_Income Region
0            1   25          30000  North
1            2   40          60000  South
2            3   35          45000   East
3            4   50          80000   West
4            5   23          27000  South

Transformed Data:
    Customer_ID       Age  Annual_Income  Region_East  Region_North  \
0            1  0.074074       0.056604          0.0           1.0   
1            2  0.629630       0.622642          0.0           0.0   
2            3  0.444444       0.339623          1.0           0.0   
3            4  1.000000       1.000000          0.0           0.0   
4            5  0.000000       0.000000          0.0           0.0   

   Region_South  Region_West  
0           0.0          0.0  
1           1.0          0.0  
2           0.0          0.0  
3           0.0          1.0  
4           1.0          0.0  
