In [1]:
import pandas as pd

import plotly.graph_objects as go

In [2]:
data_audi = pd.read_csv('D:/Data/Car Price/audi.csv')
data_bmw = pd.read_csv('D:/Data/Car Price/bmw.csv')
data_ford = pd.read_csv('D:/Data/Car Price/ford.csv')
data_hyundi = pd.read_csv('D:/Data/Car Price/hyundi.csv').rename(columns= {'tax(£)': 'tax'})
data_merc = pd.read_csv('D:/Data/Car Price/merc.csv')
data_skoda = pd.read_csv('D:/Data/Car Price/skoda.csv')
data_toyota = pd.read_csv('D:/Data/Car Price/toyota.csv')

In [3]:
all_data = {'audi': data_audi, 'bmw': data_bmw, 'ford': data_ford, 'hyundi': data_hyundi, 'merc': data_merc, 'skoda': data_skoda, 'toyota': data_toyota}

In [4]:
# Add new feature "brand"
for key, dataset in all_data.items():
    dataset['brand'] = key

# Merge all dataframes into one
data_temp = pd.DataFrame(columns=data_audi.columns)

for key, df in all_data.items():
    data_temp = data_temp.append(df, ignore_index=True)

# Shuffle final dataframe
data = data_temp.sample(frac=1).reset_index(drop=True)

In [5]:
data

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize,brand
0,C Class,2019,32899,Semi-Auto,4378,Diesel,145,55.4,2.0,merc
1,Fiesta,2013,6298,Manual,40803,Diesel,0,76.4,1.5,ford
2,Aygo,2016,7298,Manual,20605,Petrol,0,69.0,1.0,toyota
3,Kuga,2016,13100,Manual,36972,Diesel,125,60.1,2.0,ford
4,A4,2017,20250,Manual,11368,Petrol,145,51.4,1.4,audi
...,...,...,...,...,...,...,...,...,...,...
70393,5 Series,2019,32600,Semi-Auto,8260,Hybrid,140,156.9,2.0,bmw
70394,Mondeo,2016,15991,Manual,33515,Diesel,30,62.8,2.0,ford
70395,Fiesta,2017,8000,Manual,19841,Petrol,145,65.7,1.0,ford
70396,4 Series,2015,13500,Manual,44748,Diesel,145,53.3,2.0,bmw


# Single variable analysis

In [6]:
fig = go.Figure()

to_plot = data['year'].value_counts()

fig.add_trace(go.Bar(
    x = to_plot.index,
    y = to_plot.values
))

fig.update_layout(
    title_text = 'Number of cars from every year',
    template = 'plotly_dark',
    xaxis_title = 'Year',
    yaxis_title = 'Number of cars'
)

In [7]:
fig = go.Figure()

fig.add_trace(go.Histogram(
    x = data['mileage'],
    nbinsx = 170
))

fig.update_layout(
    title_text = 'Distribution of mileage feature',
    template = 'plotly_dark',
    xaxis_title = 'Mileage'
)

In [8]:
fig = go.Figure()

fig.add_trace(go.Histogram(
    x = data['price'],
    nbinsx = 170
))

fig.update_layout(
    title_text = 'Distribution of price feature',
    template = 'plotly_dark',
    xaxis_title = 'Price'
)

In [9]:
fig = go.Figure()

night_colors = ['rgb(56, 75, 126)', 'rgb(18, 36, 37)', 'rgb(34, 53, 101)',
                'rgb(36, 55, 57)', 'rgb(6, 4, 4)']

to_plot = data['transmission'].value_counts()

fig.add_trace(go.Pie(
    values = to_plot.values,
    labels = to_plot.index,
    textinfo = 'label+percent',
    marker_colors = night_colors
))

fig.update_layout(
    title_text = 'Transmission Pie Plot',
    template = 'plotly_dark'
)

In [10]:
fig = go.Figure()

to_plot = data['brand'].value_counts()

fig.add_trace(go.Pie(
    values = to_plot.values,
    labels = to_plot.index,
    textinfo = 'label+percent'
))

fig.update_layout(
    title_text = 'Brand Pie Plot',
    template = 'plotly_dark'
)

In [11]:
fig = go.Figure()

to_plot = data['fuelType'].value_counts()

fig.add_trace(go.Bar(
    y = to_plot.index,
    x = to_plot.values,
    orientation = 'h',
    text = to_plot.values,
    textposition = 'outside',
))

fig.update_layout(
    autosize = False,
    width = 850,
    template = 'plotly_dark',
    title_text = 'Fuel Type Bar Plot'
)

# Multiple variables analysis

In [35]:
fig = go.Figure()

to_plot = data.sample(frac=0.10, random_state=42)

fig.add_trace(go.Scatter(
    x = to_plot['price'],
    y = to_plot['mileage'],
    mode = 'markers'
))

fig.update_layout(
    title_text = 'Mileage x Price Scatter Plot',
    template = 'plotly_dark',
    xaxis_title = 'Price',
    yaxis_title = 'Mileage'
)

In [37]:
fig = go.Figure()

to_plot = data.sample(frac=0.10, random_state=42)

fig.add_trace(go.Scatter(
    x = to_plot['price'],
    y = to_plot['mpg'],
    mode = 'markers'
))

fig.update_layout(
    title_text = 'Miles per gallon',
    template = 'plotly_dark',
    xaxis_title = 'Price',
    yaxis_title = 'Miles per gallon'
)