# Data Analysis

**Part C:** **One-Hot Encoding For Categorical Variables**

Converting categorical variables Fuel_Type and Transmission to numerical form using one-hot encoding.

In [30]:
import pandas as pd
data = pd.read_csv("/content/clean_data.csv",index_col=0)
data

# Converting categorical variables 'Fuel_Type' and 'Transmission' into one-hot encoded columns
data = pd.get_dummies(data, columns=['Fuel_Type', 'Transmission'], drop_first=True)
print(data.head(20))

                                                Name    Location  Year  \
1                   Hyundai Creta 1.6 CRDi SX Option        Pune  2015   
2                                       Honda Jazz V     Chennai  2011   
3                                  Maruti Ertiga VDI     Chennai  2012   
4                    Audi A4 New 2.0 TDI Multitronic  Coimbatore  2013   
6                             Nissan Micra Diesel XV      Jaipur  2013   
7                  Toyota Innova Crysta 2.8 GX AT 8S      Mumbai  2016   
8                Volkswagen Vento Diesel Comfortline        Pune  2013   
9                     Tata Indica Vista Quadrajet LS     Chennai  2012   
10                                  Maruti Ciaz Zeta       Kochi  2018   
11                       Honda City 1.5 V AT Sunroof     Kolkata  2012   
12                             Maruti Swift VDI BSIV      Jaipur  2015   
13                  Land Rover Range Rover 2.2L Pure       Delhi  2014   
14                    Land Rover Freel

**Part D:** Adding a new feature Car_Age

Calculate the current age of each car by subtracting the Year from the current year.

In [31]:
from datetime import datetime

# Calculate the current age of the car by subtracting the 'Year' from the current year
current_year = datetime.now().year
data['Car_Age'] = current_year - data['Year']
data[['Year', 'Car_Age']].head()

Unnamed: 0,Year,Car_Age
1,2015,9
2,2011,13
3,2012,12
4,2013,11
6,2013,11


**Part E:** Data Manipulation and Operations (Select, Filter, Rename, Mutate, Arrange, Summarize)

In [32]:
# Select specific columns
selected_data = data[['Name', 'Location', 'Car_Age', 'Price']]
selected_data.head()


Unnamed: 0,Name,Location,Car_Age,Price
1,Hyundai Creta 1.6 CRDi SX Option,Pune,9,12.5
2,Honda Jazz V,Chennai,13,4.5
3,Maruti Ertiga VDI,Chennai,12,6.0
4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,11,17.74
6,Nissan Micra Diesel XV,Jaipur,11,3.5


In [33]:
# Filter rows where Price is greater than 10 (in lakhs)
filtered_data = data[data['Price'] > 10]
filtered_data.head()

Unnamed: 0,Name,Location,Year,Kilometers_Driven,Owner_Type,Mileage,Engine,Power,Seats,Price,Fuel_Type_Electric,Fuel_Type_Petrol,Transmission_Manual,Car_Age
1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,First,19.67,1582.0,126.2,5.0,12.5,False,False,True,9
4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Second,15.2,1968.0,140.8,5.0,17.74,False,False,False,11
7,Toyota Innova Crysta 2.8 GX AT 8S,Mumbai,2016,36000,First,11.36,2755.0,171.5,8.0,17.5,False,False,False,8
13,Land Rover Range Rover 2.2L Pure,Delhi,2014,72000,First,12.7,2179.0,187.7,5.0,27.0,False,False,False,10
14,Land Rover Freelander 2 TD4 SE,Pune,2012,85000,Second,0.0,2179.0,115.0,5.0,17.5,False,False,False,12


In [34]:
# Rename column (e.g., 'Power' to 'Horse_Power')
data.rename(columns={'Power': 'Horse_Power'}, inplace=True)
data.head()

Unnamed: 0,Name,Location,Year,Kilometers_Driven,Owner_Type,Mileage,Engine,Horse_Power,Seats,Price,Fuel_Type_Electric,Fuel_Type_Petrol,Transmission_Manual,Car_Age
1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,First,19.67,1582.0,126.2,5.0,12.5,False,False,True,9
2,Honda Jazz V,Chennai,2011,46000,First,13.0,1199.0,88.7,5.0,4.5,False,True,True,13
3,Maruti Ertiga VDI,Chennai,2012,87000,First,20.77,1248.0,88.76,7.0,6.0,False,False,True,12
4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Second,15.2,1968.0,140.8,5.0,17.74,False,False,False,11
6,Nissan Micra Diesel XV,Jaipur,2013,86999,First,23.08,1461.0,63.1,5.0,3.5,False,False,True,11


In [36]:
# Arrange data by 'Price' in descending order
arranged_data = data.sort_values(by='Price', ascending=False)
arranged_data.head()

Unnamed: 0,Name,Location,Year,Kilometers_Driven,Owner_Type,Mileage,Engine,Horse_Power,Seats,Price,Fuel_Type_Electric,Fuel_Type_Petrol,Transmission_Manual,Car_Age
4079,Land Rover Range Rover 3.0 Diesel LWB Vogue,Hyderabad,2017,25000,First,13.33,2993.0,255.0,5.0,160.0,False,False,False,7
5781,Lamborghini Gallardo Coupe,Delhi,2011,6500,Third,6.4,5204.0,560.0,2.0,120.0,False,True,False,13
5919,Jaguar F Type 5.0 V8 S,Hyderabad,2015,8000,First,12.5,5000.0,488.1,2.0,100.0,False,True,False,9
1505,Land Rover Range Rover Sport SE,Kochi,2019,26013,First,12.65,2993.0,255.0,5.0,97.07,False,False,False,5
1974,BMW 7 Series 740Li,Coimbatore,2018,28060,First,12.05,2979.0,320.0,5.0,93.67,False,True,False,6


In [37]:
# Summarize average price by location using groupby
average_price_by_location = data.groupby('Location')['Price'].mean().reset_index()
average_price_by_name = data.groupby('Name')['Price'].mean().reset_index()
print(average_price_by_name.head(20))

                                             Name      Price
0                  Ambassador Classic Nova Diesel   1.350000
1                       Audi A3 35 TDI Attraction  16.500000
2                          Audi A3 35 TDI Premium  19.250000
3                     Audi A3 35 TDI Premium Plus  18.900000
4                       Audi A3 35 TDI Technology  22.500000
5                                Audi A4 1.8 TFSI   9.000000
6             Audi A4 1.8 TFSI Technology Edition  17.500000
7                                 Audi A4 2.0 TDI  15.175882
8            Audi A4 2.0 TDI 177 Bhp Premium Plus  20.030000
9      Audi A4 2.0 TDI 177 Bhp Technology Edition  25.500000
10            Audi A4 2.0 TDI Celebration Edition   9.750000
11                    Audi A4 2.0 TDI Multitronic  14.773000
12  Audi A4 2.0 TDI Premium Sport Limited Edition  24.630000
13                               Audi A4 2.0 TFSI  12.000000
14                        Audi A4 3.0 TDI Quattro  24.760000
15                Audi A

In [28]:
average_price_by_location.head(20)

Unnamed: 0,Location,Car_Price
0,Ahmedabad,8.567248
1,Bangalore,13.48267
2,Chennai,7.95834
3,Coimbatore,15.160206
4,Delhi,9.881944
5,Hyderabad,9.997423
6,Jaipur,5.916725
7,Kochi,11.309109
8,Kolkata,5.733924
9,Mumbai,9.592546
