In [None]:
# Feature Scaling & Encoding

# Objective: Learn to scale numerical features and encode categorical features for better model performance.
# Instructions:
# For each example, perform the following steps:
#     1. Load the Dataset: Load the dataset into your environment.
#     2. Feature Scaling: Apply scaling methods (StandardScaler or MinMaxScaler) to specified numerical columns.
#     3. Feature Encoding: Apply encoding methods (One-Hot Encoding or Label Encoding) to specified categorical columns.
#     4. Verify Changes: Check the data to ensure proper scaling and encoding. 


# Task:
#     Dataset: customer_data.csv (get it by your own it includes the columns of Age , Annual_Income)
#     Columns to scale: Age , Annual_Income
#     Column to encode: Region
#     Steps:
#         1. Load customer_data.csv .
#         2. Use MinMaxScaler on Age and Annual_Income .
#         3. Perform One-Hot Encoding on Region .
#         4. Verify by assessing the transformed dataset.



    
    
    

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

# Step 1: Create sample customer_data.csv
data = {
    'Age': np.random.randint(20, 65, size=10),
    'Annual_Income': np.random.randint(30000, 120000, size=10),
    'Region': ['North', 'South', 'East', 'West', 'North', 'East', 'South', 'West', 'North', 'East']
}

df = pd.DataFrame(data)
df.to_csv('customer_data.csv', index=False)
print("✅ Created and saved 'customer_data.csv':\n")
print(df)

# Step 2: Load the dataset
df = pd.read_csv('customer_data.csv')

# Step 3: Apply MinMaxScaler to 'Age' and 'Annual_Income'
scaler = MinMaxScaler()
df[['Age_scaled', 'Annual_Income_scaled']] = scaler.fit_transform(df[['Age', 'Annual_Income']])

# Step 4: Perform One-Hot Encoding on 'Region'
encoder = OneHotEncoder(sparse_output=False, drop=None)
region_encoded = encoder.fit_transform(df[['Region']])
region_columns = encoder.get_feature_names_out(['Region'])
df_region = pd.DataFrame(region_encoded, columns=region_columns)

# Combine everything
df_final = pd.concat([df, df_region], axis=1)

# Optional: Drop original unscaled/unencoded columns if needed
# df_final.drop(['Age', 'Annual_Income', 'Region'], axis=1, inplace=True)

# Step 5: Verify
print("\n✅ Transformed dataset with scaled and encoded features:\n")
print(df_final)


✅ Created and saved 'customer_data.csv':

   Age  Annual_Income Region
0   32          47248  North
1   29          94128  South
2   28          33476   East
3   28          60269   West
4   62          70321  North
5   63         114755   East
6   56         104091  South
7   39         109556   West
8   49          90507  North
9   60          79032   East

✅ Transformed dataset with scaled and encoded features:

   Age  Annual_Income Region  Age_scaled  Annual_Income_scaled  Region_East  \
0   32          47248  North    0.114286              0.169441          0.0   
1   29          94128  South    0.028571              0.746220          0.0   
2   28          33476   East    0.000000              0.000000          1.0   
3   28          60269   West    0.000000              0.329642          0.0   
4   62          70321  North    0.971429              0.453315          0.0   
5   63         114755   East    1.000000              1.000000          1.0   
6   56         104091  South