In [5]:
# Feature Scaling & Encoding

# Objective: Learn to scale numerical features and encode categorical features for better model performance.
# Instructions:
# For each example, perform the following steps:
#     1. Load the Dataset: Load the dataset into your environment.
#     2. Feature Scaling: Apply scaling methods (StandardScaler or MinMaxScaler) to specified numerical columns.
#     3. Feature Encoding: Apply encoding methods (One-Hot Encoding or Label Encoding) to specified categorical columns.
#     4. Verify Changes: Check the data to ensure proper scaling and encoding. 


# Task:
#   Dataset: car_features.csv (get it by your own it includes the columns of Mileage , Horsepower)
    # Columns to scale: Mileage , Horsepower
    # Column to encode: Fuel_Type
    # Steps:
    #     1. Load car_features.csv .
    #     2. Scale Mileage and Horsepower using StandardScaler.
    #     3. Encode Fuel_Type using Label Encoding.
    #     4. Confirm scaling and encoding by checking these columns.



    
    
    

In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Simulated sample data for car_features.csv
data = pd.DataFrame({
    'Mileage': [15000, 30000, 22000, 40000, 18000],
    'Horsepower': [130, 150, 120, 170, 140],
    'Fuel_Type': ['Petrol', 'Diesel', 'Petrol', 'Diesel', 'Petrol']
})

print("Original Data:")
print(data)

# Step 2: Scale Mileage and Horsepower using StandardScaler
scaler = StandardScaler()
data[['Mileage', 'Horsepower']] = scaler.fit_transform(data[['Mileage', 'Horsepower']])

print("\nData after Standard Scaling:")
print(data[['Mileage', 'Horsepower']])

# Step 3: Encode Fuel_Type using Label Encoding
label_encoder = LabelEncoder()
data['Fuel_Type'] = label_encoder.fit_transform(data['Fuel_Type'])

print("\nData after Label Encoding Fuel_Type:")
print(data[['Fuel_Type']])

# Step 4: Confirm by checking the columns
print("\nSummary statistics after scaling:")
print(data[['Mileage', 'Horsepower']].describe())

print("\nUnique values in Fuel_Type after encoding:")
print(data['Fuel_Type'].unique())


Original Data:
   Mileage  Horsepower Fuel_Type
0    15000         130    Petrol
1    30000         150    Diesel
2    22000         120    Petrol
3    40000         170    Diesel
4    18000         140    Petrol

Data after Standard Scaling:
    Mileage  Horsepower
0 -1.107019   -0.697486
1  0.553509    0.464991
2 -0.332106   -1.278724
3  1.660528    1.627467
4 -0.774913   -0.116248

Data after Label Encoding Fuel_Type:
   Fuel_Type
0          1
1          0
2          1
3          0
4          1

Summary statistics after scaling:
            Mileage    Horsepower
count  5.000000e+00  5.000000e+00
mean  -2.220446e-17  1.942890e-17
std    1.118034e+00  1.118034e+00
min   -1.107019e+00 -1.278724e+00
25%   -7.749130e-01 -6.974858e-01
50%   -3.321056e-01 -1.162476e-01
75%    5.535093e-01  4.649906e-01
max    1.660528e+00  1.627467e+00

Unique values in Fuel_Type after encoding:
[1 0]
