### Importing Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import numpy as np

In [2]:
# Prevents rows from being truncated 
pd.options.display.max_columns = None
pd.options.display.max_rows = None


### Import data 

In [3]:
df = pd.read_csv('car_data_cleaned.csv')

In [4]:
df.head(20)

Unnamed: 0,Year,Model,Mileage,Location,Price,Site Price Type,Color Scheme,History,335i_yn,xdrive_yn,is_model_yn,body_style,car_color,interior_color,accident,num_owners,State
0,2011,335is Convertible,62515,"Somerset, NJ","$25,999",High Price,"Silver exterior, Black interior","No accidents, 3 Owners, Personal use",yes,no,yes,E92,Silver,Black,No,3,NJ
1,2011,335i xDrive Coupe AWD,37739,"Easton, PA","$25,998",Excellent Price,"White exterior, Brown interior","No accidents, 2 Owners, Personal use",yes,yes,no,E92,White,Brown,No,2,PA
2,2011,335i Sedan,67840,"Stroudsburg, PA","$18,990",Great Price,"Blue exterior, Brown interior","No accidents, 5 Owners, Fleet use",yes,no,no,E90,Blue,Brown,No,5,PA
3,2010,335i xDrive Sedan AWD,128507,"Hasbrouck Heights, NJ","$9,995",Excellent Price,"Black exterior, Beige interior","No accidents, 3 Owners, Personal use",yes,yes,no,E90,Black,Beige,No,3,NJ
4,2011,335i Convertible,55578,"Easton, PA","$23,998",High Price,"Black exterior, Unknown interior","No accidents, 2 Owners, Personal use",yes,no,no,E92,Black,Unknown,No,2,PA
5,2011,335i Convertible,71447,"Stroudsburg, PA","$21,990",High Price,"Black exterior, Brown interior","No accidents, 6 Owners, Personal use",yes,no,no,E92,Black,Brown,No,6,PA
6,2010,335i Convertible,74799,"Stroudsburg, PA","$18,990",High Price,"Black exterior, Black interior","No accidents, 3 Owners, Personal use",yes,no,no,E92,Black,Black,No,3,PA
7,2011,335is Convertible,40262,"Stroudsburg, PA","$28,990",High Price,"Blue exterior, Black interior","No accidents, 3 Owners, Personal use",yes,no,yes,E92,Blue,Black,No,3,PA
8,2011,335i xDrive Coupe AWD,34908,"Jersey City, NJ","$19,998",Excellent Price,"Blue exterior, Unknown interior","No accidents, 2 Owners, Personal use",yes,yes,no,E92,Blue,Unknown,No,2,NJ
9,2008,335i Convertible,137855,"Huntingdon Valley, PA","$11,295",High Price,"Black exterior, Beige interior","No accidents, 5 Owners, Personal use",yes,no,no,E92,Black,Beige,No,5,PA


In [5]:
# Questions I want to answer:

# How much do 335is cost on average?
# Price compare between E90 & E92
# Do interior and exterior colors influence price

### Removing '$' and ',' from Price amd ',' from Mileage for EDA

In [6]:
df['Price'] = df['Price'].apply(lambda x: x.replace(',', '').replace('$',''))

In [7]:
df['Mileage'] = df['Mileage'].apply(lambda x: x.replace(',', ''))

### Convert 'Mileage' and 'Price' from object to int

In [8]:
df['Mileage'] = df['Mileage'].astype(int)

In [9]:
df['Price'] = df['Price'].astype(int)

### Preping variables for EDA

In [10]:
df['high_mileage'] = df['Mileage'].apply(lambda x: 'yes' if x > 120000 else 'no')

In [11]:
df['low_mileage'] = df['Mileage'].apply(lambda x: 'yes' if x < 90000 else 'no')

In [12]:
df['highmiles_int'] = df['high_mileage'].map(dict(yes=1, no=0))

In [13]:
df['lowmiles_int'] = df['low_mileage'].map(dict(yes=1, no=0))

In [14]:
df['xdrive'] = df['xdrive_yn'].map(dict(yes=1, no=0))

In [15]:
df['is_model'] = df['is_model_yn'].map(dict(yes=1, no=0))

In [16]:
df['accident_int'] = df['accident'].map(dict(Yes=1, No=0))

In [17]:
df.interior_color.value_counts()

 Brown       120
 Black        90
 Beige        90
 Unknown      90
Name: interior_color, dtype: int64

In [18]:
df = df.drop(['335i_yn','History', 'Color Scheme'], axis=1)

In [19]:
df.head(50)

Unnamed: 0,Year,Model,Mileage,Location,Price,Site Price Type,xdrive_yn,is_model_yn,body_style,car_color,interior_color,accident,num_owners,State,high_mileage,low_mileage,highmiles_int,lowmiles_int,xdrive,is_model,accident_int
0,2011,335is Convertible,62515,"Somerset, NJ",25999,High Price,no,yes,E92,Silver,Black,No,3,NJ,no,yes,0,1,0,1,0
1,2011,335i xDrive Coupe AWD,37739,"Easton, PA",25998,Excellent Price,yes,no,E92,White,Brown,No,2,PA,no,yes,0,1,1,0,0
2,2011,335i Sedan,67840,"Stroudsburg, PA",18990,Great Price,no,no,E90,Blue,Brown,No,5,PA,no,yes,0,1,0,0,0
3,2010,335i xDrive Sedan AWD,128507,"Hasbrouck Heights, NJ",9995,Excellent Price,yes,no,E90,Black,Beige,No,3,NJ,yes,no,1,0,1,0,0
4,2011,335i Convertible,55578,"Easton, PA",23998,High Price,no,no,E92,Black,Unknown,No,2,PA,no,yes,0,1,0,0,0
5,2011,335i Convertible,71447,"Stroudsburg, PA",21990,High Price,no,no,E92,Black,Brown,No,6,PA,no,yes,0,1,0,0,0
6,2010,335i Convertible,74799,"Stroudsburg, PA",18990,High Price,no,no,E92,Black,Black,No,3,PA,no,yes,0,1,0,0,0
7,2011,335is Convertible,40262,"Stroudsburg, PA",28990,High Price,no,yes,E92,Blue,Black,No,3,PA,no,yes,0,1,0,1,0
8,2011,335i xDrive Coupe AWD,34908,"Jersey City, NJ",19998,Excellent Price,yes,no,E92,Blue,Unknown,No,2,NJ,no,yes,0,1,1,0,0
9,2008,335i Convertible,137855,"Huntingdon Valley, PA",11295,High Price,no,no,E92,Black,Beige,No,5,PA,yes,no,1,0,0,0,0


### Average cost of N54 335I's
#### E90 - \$14,000
#### E92 - \$21,000

In [24]:
pd.pivot_table(df,index=['body_style'], values=['Price'])

Unnamed: 0_level_0,Price
body_style,Unnamed: 1_level_1
E90,14161.333333
E92,21623.3


### Average cost of high mileage cars

#### High mileage E90's: \$9,995

#### High mileage E92's: \$11,295

In [27]:
pd.pivot_table(df,index=['body_style','highmiles_int'], values=['Price'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Price
body_style,highmiles_int,Unnamed: 2_level_1
E90,0,16244.5
E90,1,9995.0
E92,0,22770.888889
E92,1,11295.0


### Average cost of low mileage cars

#### Low mileage E90's: \$18,990

#### Low mileage E92's: \$22,770

In [28]:
pd.pivot_table(df,index=['body_style','lowmiles_int'], values=['Price'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Price
body_style,lowmiles_int,Unnamed: 2_level_1
E90,0,11747.0
E90,1,18990.0
E92,0,11295.0
E92,1,22770.888889


### All-Wheel drive cars vs non all wheel drive

In [29]:
pd.pivot_table(df,index=['body_style','xdrive'], values=['Price'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Price
body_style,xdrive,Unnamed: 2_level_1
E90,0,16244.5
E90,1,9995.0
E92,0,21279.625
E92,1,22998.0
