In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder

# Load the Excel file
df = pd.read_excel("sampledv.xlsx")

# 1. Handling Missing Values (if any)
df.dropna(inplace=True)

# 2. Changing Data Types (if needed)
df['year'] = df['year'].astype(int)
df['price'] = df['price'].astype(float)

# 3. Feature Engineering: Create "price_per_mile"
df['price_per_mile'] = df['price'] / df['mileage']

# 4. Label Encoding for categorical features
le = LabelEncoder()
df['model_encoded'] = le.fit_transform(df['model'])
df['transmission_encoded'] = le.fit_transform(df['transmission'])
df['fuelType_encoded'] = le.fit_transform(df['fuelType'])

# 5. Normalization using MinMaxScaler for numeric features
scaler = MinMaxScaler()
df[['price', 'mileage', 'tax', 'mpg', 'engineSize']] = scaler.fit_transform(
    df[['price', 'mileage', 'tax', 'mpg', 'engineSize']]
)

# 6. Standardization (optional example)
std_scaler = StandardScaler()
df[['year']] = std_scaler.fit_transform(df[['year']])

# 7. Binning Example: mpg into categories
df['mpg_level'] = pd.cut(df['mpg'], bins=[0, 40, 60, 80], labels=['Low', 'Medium', 'High'])

# 8. Rename Column (optional)
df.rename(columns={'mpg': 'MilesPerGallon'}, inplace=True)

# Display the transformed DataFrame
print(df.head())


  model      year     price transmission   mileage fuelType       tax  \
0    A1 -0.357473  0.114773       Manual  0.202087   Petrol  0.566038   
1    A6 -0.972871  0.245942    Automatic  0.465128   Diesel  0.075472   
2    A1 -0.972871  0.065585       Manual  0.384717   Petrol  0.113208   
3    A4 -0.357473  0.255780    Automatic  0.333389   Diesel  0.547170   
4    A3  0.873321  0.272176       Manual  0.025548   Petrol  0.547170   

   MilesPerGallon  engineSize  Make  price_per_mile  model_encoded  \
0        0.197074         0.2  audi        0.794407              7   
1        0.272806         0.5  audi        0.455763             10   
2        0.197074         0.2  audi        0.367328              7   
3        0.299484         0.5  audi        0.647349              9   
4        0.147160         0.0  audi        8.658659              8   

   transmission_encoded  fuelType_encoded mpg_level  
0                     1                 2       Low  
1                     0         