# Task 1

In [47]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

def data_preprocessing_pipeline(data):
    #Identify numeric and categorical features
    numeric_features = data.select_dtypes(include=['float', 'int']).columns
    categorical_features = data.select_dtypes(include=['object']).columns

    #Handle missing values in numeric features
    data[numeric_features] = data[numeric_features].fillna(data[numeric_features].mean())

    #Detect and handle outliers in numeric features using IQR
    for feature in numeric_features:
        Q1 = data[feature].quantile(0.25)
        Q3 = data[feature].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - (1.5 * IQR)
        upper_bound = Q3 + (1.5 * IQR)
        data[feature] =np.where((data[feature] < lower_bound) | (data[feature] > upper_bound),
                                 data[feature].mean(), data[feature])

    #Normalize numeric features
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data[numeric_features])
    data[numeric_features] = scaler.transform(data[numeric_features])

    #Handle missing values in categorical features
    data[categorical_features] = data[categorical_features].fillna(data[categorical_features].mode().iloc[0])

    return data

In [48]:
data=pd.read_csv("C:\\Users\\dhara\\Documents\\Python Besant\\Training_Data_Set.csv")
data

Unnamed: 0,Id,Maker,model,Location,Distance,Owner Type,manufacture_year,Age of car,engine_displacement,engine_power,body_type,Vroom Audit Rating,transmission,door_count,seat_count,fuel_type,Price
0,25001,skoda,octavia,Ahmedabad,,Second,1964,55,1964,147.0,compact,8,man,,,petrol,543764.25
1,25002,fiat,panda,Ahmedabad,27750.0,Third,2012,7,1242,51.0,,6,man,4.0,4.0,petrol,401819.25
2,25003,bmw,x1,Hyderabad,46000.0,Third,2014,5,1995,105.0,,7,auto,4.0,5.0,diesel,2392855.50
3,25004,nissan,juke,Mumbai,43949.0,Third,2011,8,1618,140.0,,7,man,4.0,5.0,petrol,958606.50
4,25005,bmw,x5,Jaipur,59524.0,Fourth & Above,2012,7,2993,180.0,,7,auto,4.0,5.0,diesel,3085561.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53510,78511,skoda,octavia,Delhi,29334.0,Fourth & Above,2014,5,1598,77.0,,4,man,4.0,5.0,diesel,1342996.50
53511,78512,skoda,octavia,Bangalore,223631.0,Fourth & Above,2009,10,1900,77.0,,8,man,5.0,5.0,diesel,510732.75
53512,78513,bmw,x1,Pune,25500.0,Third,2015,4,1995,105.0,,4,auto,4.0,5.0,diesel,2008123.50
53513,78514,toyota,avensis,Jaipur,1195500.0,Third,2011,8,11950,93.0,compact,5,man,,,diesel,874352.25


In [49]:
cleaned_data=data_preprocessing_pipeline(data)
cleaned_data

Unnamed: 0,Id,Maker,model,Location,Distance,Owner Type,manufacture_year,Age of car,engine_displacement,engine_power,body_type,Vroom Audit Rating,transmission,door_count,seat_count,fuel_type,Price
0,-1.732018,skoda,octavia,Ahmedabad,0.189598,Second,-0.059750,0.059750,0.818675,1.396893,compact,1.411263,man,-1.309783,-1.625640,petrol,-0.718618
1,-1.731954,fiat,panda,Ahmedabad,-0.735467,Third,0.331815,-0.331815,-1.254184,-1.258447,compact,0.001146,man,0.763485,-1.625640,petrol,-0.941891
2,-1.731889,bmw,x1,Hyderabad,-0.482722,Third,0.823765,-0.823765,0.907676,0.235182,compact,0.706204,auto,0.763485,0.615142,diesel,2.189914
3,-1.731824,nissan,juke,Mumbai,-0.511126,Third,0.085840,-0.085840,-0.174690,1.203275,compact,0.706204,man,0.763485,0.615142,petrol,-0.066091
4,-1.731760,bmw,x5,Jaipur,-0.295427,Fourth & Above,0.331815,-0.331815,0.646556,2.309667,compact,0.706204,auto,0.763485,0.615142,diesel,0.153300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53510,1.731760,skoda,octavia,Delhi,-0.713530,Fourth & Above,0.823765,-0.823765,-0.232110,-0.539292,compact,-1.408970,man,0.763485,0.615142,diesel,0.538536
53511,1.731824,skoda,octavia,Bangalore,1.977297,Fourth & Above,-0.406110,0.406110,0.634931,-0.539292,compact,1.411263,man,-1.309783,0.615142,diesel,-0.770575
53512,1.731889,bmw,x1,Pune,-0.766627,Third,1.069739,-1.069739,0.907676,0.235182,compact,-1.408970,auto,0.763485,0.615142,diesel,1.584749
53513,1.731954,toyota,avensis,Jaipur,0.189598,Third,0.085840,-0.085840,0.646556,-0.096736,compact,-0.703912,man,-1.309783,-1.625640,diesel,-0.198619
