In [1]:
# Feature Scaling & Encoding

# Objective: Learn to scale numerical features and encode categorical features for better model performance.
# Instructions:
# For each example, perform the following steps:
#     1. Load the Dataset: Load the dataset into your environment.
#     2. Feature Scaling: Apply scaling methods (StandardScaler or MinMaxScaler) to specified numerical columns.
#     3. Feature Encoding: Apply encoding methods (One-Hot Encoding or Label Encoding) to specified categorical columns.
#     4. Verify Changes: Check the data to ensure proper scaling and encoding. 


# Task:
#     Dataset: customer_data.csv (get it by your own it includes the columns of Age , Annual_Income)
#     Columns to scale: Age , Annual_Income
#     Column to encode: Region
#     Steps:
#         1. Load customer_data.csv .
#         2. Use MinMaxScaler on Age and Annual_Income .
#         3. Perform One-Hot Encoding on Region .
#         4. Verify by assessing the transformed dataset.


import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Step 1: Create sample customer data (simulating customer_data.csv)
data = {
    "Age": [25, 35, 45, 32, 23, 52],
    "Annual_Income": [50000, 60000, 80000, 62000, 45000, 90000],
    "Region": ["North", "South", "East", "West", "East", "North"]
}

df = pd.DataFrame(data)

# Save to CSV (simulate the customer_data.csv)
df.to_csv("customer_data.csv", index=False)

# Step 2: Load the dataset
df = pd.read_csv("customer_data.csv")
print("Original Data:")
print(df)

# Step 3: Apply MinMaxScaler on Age and Annual_Income
scaler = MinMaxScaler()
df[["Age", "Annual_Income"]] = scaler.fit_transform(df[["Age", "Annual_Income"]])

# Step 4: One-Hot Encoding on Region
df_encoded = pd.get_dummies(df, columns=["Region"])

print("\nTransformed Data (After Scaling and Encoding):")
print(df_encoded)
    
    
    

Original Data:
   Age  Annual_Income Region
0   25          50000  North
1   35          60000  South
2   45          80000   East
3   32          62000   West
4   23          45000   East
5   52          90000  North

Transformed Data (After Scaling and Encoding):
        Age  Annual_Income  Region_East  Region_North  Region_South  \
0  0.068966       0.111111        False          True         False   
1  0.413793       0.333333        False         False          True   
2  0.758621       0.777778         True         False         False   
3  0.310345       0.377778        False         False         False   
4  0.000000       0.000000         True         False         False   
5  1.000000       1.000000        False          True         False   

   Region_West  
0        False  
1        False  
2        False  
3         True  
4        False  
5        False  
