In [10]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder

### Step 1: Create a sample dataset
- 'feature1' and 'feature2' are numerical features
- 'category' is a categorical feature

In [14]:
data = {
    'feature1': [10, 20, 30, 40],      # Numerical feature
    'feature2': [1.5, 2.5, 3.5, 4.5],  # Numerical feature
    'category': ['Red', 'Green', 'Blue', 'Red']  # Categorical feature
}

In [16]:
df = pd.DataFrame(data)  # Create DataFrame from dictionary
df

Unnamed: 0,feature1,feature2,category
0,10,1.5,Red
1,20,2.5,Green
2,30,3.5,Blue
3,40,4.5,Red


### Step 2: Feature scaling using Min-Max Scaler
### Min-Max Scaler scales numerical values between a defined range (0 and 1 by default)

In [17]:
min_max_scaler = MinMaxScaler()  # Initialize the MinMaxScaler

### Fit the scaler to numerical columns 'feature1' and 'feature2' and transform them

### Step 3: One-Hot Encoding for categorical data
### OneHotEncoder converts categorical values (like 'Red', 'Green') into binary columns

In [26]:
df[['feature1', 'feature2']] = min_max_scaler.fit_transform(df[['feature1', 'feature2']])

### Fit and transform the 'category' column to one-hot encoding

In [22]:
category_encoded = one_hot_encoder.fit_transform(df[['category']])


### Convert the encoded categories into a DataFrame with appropriate column names

In [23]:
category_encoded_df = pd.DataFrame(category_encoded, columns=one_hot_encoder.get_feature_names_out(['category']))

### Step 4: Combine scaled numerical features and one-hot encoded categorical features
- Drop the original 'category' column from df
- Concatenate the scaled numerical columns and the new one-hot encoded categorical columns

In [24]:
df_final = pd.concat([df.drop('category', axis=1), category_encoded_df], axis=1)


### Display the final processed DataFrame

In [25]:
df_final

Unnamed: 0,feature1,feature2,category_Blue,category_Green,category_Red
0,0.0,0.0,0.0,0.0,1.0
1,0.333333,0.333333,0.0,1.0,0.0
2,0.666667,0.666667,1.0,0.0,0.0
3,1.0,1.0,0.0,0.0,1.0
