In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Sample DataFrame
data = {
    'Age': [25, 32, 47, 51],
    'Salary': [50000, 60000, 80000, 90000],
    'Gender': ['Male', 'Female', 'Female', 'Male'],
    'City': ['New York', 'Paris', 'London', 'New York']
}

df = pd.DataFrame(data)
print("🚀 Original Data:\n", df)

# Function to standardize numerical data
def standardize_data(df, columns):
    scaler = StandardScaler()
    df_copy = df.copy()
    df_copy[columns] = scaler.fit_transform(df_copy[columns])
    return df_copy

# Function to one-hot encode categorical data
def encode_categorical(df, columns):
    encoder = OneHotEncoder(sparse_output=False, drop='first')  # Use sparse_output for sklearn ≥ 1.2
    encoded = encoder.fit_transform(df[columns])
    encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(columns))
    df = df.drop(columns=columns).reset_index(drop=True)
    df_encoded = pd.concat([df.reset_index(drop=True), encoded_df], axis=1)
    return df_encoded

# Step 1: Standardize numerical columns
numerical_columns = ['Age', 'Salary']
df_standardized = standardize_data(df, numerical_columns)
print("\n✅ After Standardizing Numerical Columns:\n", df_standardized)

# Step 2: Encode categorical columns
categorical_columns = ['Gender', 'City']
df_encoded = encode_categorical(df_standardized, categorical_columns)
print("\n🎯 Final DataFrame After Encoding Categorical Columns:\n", df_encoded)


🚀 Original Data:
    Age  Salary  Gender      City
0   25   50000    Male  New York
1   32   60000  Female     Paris
2   47   80000  Female    London
3   51   90000    Male  New York

✅ After Standardizing Numerical Columns:
         Age    Salary  Gender      City
0 -1.292419 -1.264911    Male  New York
1 -0.634460 -0.632456  Female     Paris
2  0.775452  0.632456  Female    London
3  1.151428  1.264911    Male  New York

🎯 Final DataFrame After Encoding Categorical Columns:
         Age    Salary  Gender_Male  City_New York  City_Paris
0 -1.292419 -1.264911          1.0            1.0         0.0
1 -0.634460 -0.632456          0.0            0.0         1.0
2  0.775452  0.632456          0.0            0.0         0.0
3  1.151428  1.264911          1.0            1.0         0.0
