### Car Sales Advertisements 
This project is discusses the prices of different manufacturers and vehicle types to give advice to car-selling companies, including how many advertisements are used. We used a Bar Plot to see each manufacturer's price range for different vehicle types. Using a histogram for both car conditions and the model of the year, compare price distribution between manufacturers.


In [22]:
import pandas as pd
import numpy as np
import streamlit as st
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

### Data Correction
1. Data types
2. Missing Values
3. Duplicates


In [23]:
df = pd.read_csv('vehicles_us.csv')

df['date_posted'] = pd.to_datetime(df['date_posted'], format ='%Y-%m-%d')
df.isnull().sum()
df['is_4wd'] = df['is_4wd'].fillna(0)
df['paint_color'] = df['paint_color'].fillna('Unknown')
df['model_year'] = df['model_year'].fillna(df.groupby(['model'])['model_year'].transform('median'))
df['odometer'] = df['odometer'].fillna(df.groupby(['model'])['odometer'].transform('median'))
df['cylinders'] = df['cylinders'].fillna(df.groupby(['model'])['cylinders'].transform('median'))
df.drop_duplicates(inplace=True)
df['model_year'] = df['model_year'].astype(int)
df.head()

Unnamed: 0,price,model_year,model,condition,cylinders,fuel,odometer,transmission,type,paint_color,is_4wd,date_posted,days_listed
0,9400,2011,bmw x5,good,6.0,gas,145000.0,automatic,SUV,Unknown,1.0,2018-06-23,19
1,25500,2011,ford f-150,good,6.0,gas,88705.0,automatic,pickup,white,1.0,2018-10-19,50
2,5500,2013,hyundai sonata,like new,4.0,gas,110000.0,automatic,sedan,red,0.0,2019-02-07,79
3,1500,2003,ford f-150,fair,8.0,gas,121928.0,automatic,pickup,Unknown,0.0,2019-03-22,9
4,14900,2017,chrysler 200,excellent,4.0,gas,80903.0,automatic,sedan,black,0.0,2019-04-02,28


### Rearrange the order of the column
To rearrange the order of the column might gets easier to read for  others.


In [24]:
#Data Enrichment 
df[['brand', 'model_type']] = df['model'].str.extract(r'(\w+)\s+(.+)')

#Rearrange the order for the column
last_two_cols = df.columns[-2:]
new_order = list(last_two_cols) + [col for col in df.columns if col not in last_two_cols]
df = df[new_order]
df.head()

Unnamed: 0,brand,model_type,price,model_year,model,condition,cylinders,fuel,odometer,transmission,type,paint_color,is_4wd,date_posted,days_listed
0,bmw,x5,9400,2011,bmw x5,good,6.0,gas,145000.0,automatic,SUV,Unknown,1.0,2018-06-23,19
1,ford,f-150,25500,2011,ford f-150,good,6.0,gas,88705.0,automatic,pickup,white,1.0,2018-10-19,50
2,hyundai,sonata,5500,2013,hyundai sonata,like new,4.0,gas,110000.0,automatic,sedan,red,0.0,2019-02-07,79
3,ford,f-150,1500,2003,ford f-150,fair,8.0,gas,121928.0,automatic,pickup,Unknown,0.0,2019-03-22,9
4,chrysler,200,14900,2017,chrysler 200,excellent,4.0,gas,80903.0,automatic,sedan,black,0.0,2019-04-02,28


In [25]:
col_to_move = 'model'
cols = [col for col in df.columns if col != col_to_move]
cols.append(col_to_move)
df = df[cols]
df.head()

Unnamed: 0,brand,model_type,price,model_year,condition,cylinders,fuel,odometer,transmission,type,paint_color,is_4wd,date_posted,days_listed,model
0,bmw,x5,9400,2011,good,6.0,gas,145000.0,automatic,SUV,Unknown,1.0,2018-06-23,19,bmw x5
1,ford,f-150,25500,2011,good,6.0,gas,88705.0,automatic,pickup,white,1.0,2018-10-19,50,ford f-150
2,hyundai,sonata,5500,2013,like new,4.0,gas,110000.0,automatic,sedan,red,0.0,2019-02-07,79,hyundai sonata
3,ford,f-150,1500,2003,fair,8.0,gas,121928.0,automatic,pickup,Unknown,0.0,2019-03-22,9,ford f-150
4,chrysler,200,14900,2017,excellent,4.0,gas,80903.0,automatic,sedan,black,0.0,2019-04-02,28,chrysler 200


In [26]:
#Data viewing
brand_choice = df['brand'].unique()

In [27]:
# Sidebar controls for vehicle types selection
vehicle_types = df['type'].unique()  # Extract the unique vehicle types
selected_types = st.sidebar.multiselect('Select Vehicle Types to Display', vehicle_types, default=vehicle_types)

# Filter the DataFrame based on the selected vehicle types
df_filtered = df[df['type'].isin(selected_types)]  # Filter rows based on 'type' values

# Create the Plotly figure with the selected vehicle types
fig = px.bar(df_filtered, x='brand', y='price', color='type', barmode='stack',
             title="Vehicle Types by Manufacturer", labels={'price': 'Price', 'brand': 'Manufacturer'})

# Customize the layout for better readability
fig.update_layout(
    xaxis_title='Manufacturer',
    yaxis_title='Price',
    legend_title_text='Vehicle Type'
)

# Display the plot
st.plotly_chart(fig, use_container_width=True)



DeltaGenerator()

In [28]:
condition_choice = df['condition'].unique()
selecte_condition = st.sidebar.multiselect('Select Condition', condition_choice, default=condition_choice)

df = df[df['model_year'] > 0]
fig1 = px.histogram(df, x='model_year', color='condition', barmode='overlay', nbins=50,
                   title='Histogram of Condition vs Model Year')

# Customize the layout for better visualization
fig1.update_layout(
    xaxis_title='Model Year',
    yaxis_title='Count',
    legend_title_text='Condition',
    hovermode='x unified'
)

# Display the Plotly chart in Streamlit
st.plotly_chart(fig1, use_container_width=True)



DeltaGenerator()

In [29]:
manufacturer_1 = st.selectbox("Select manufacturer 1", df['brand'].unique())
manufacturer_2 = st.selectbox("Select manufacturer 2", df['brand'].unique())

# Checkbox to normalize the histogram
normalize = st.checkbox("Normalize histogram")

# Filter the data based on the selected manufacturers
df_filtered1 = df[df['brand'].isin([manufacturer_1, manufacturer_2])]


# Plot histogram for price distibution between manufacturers
fig2 = px.histogram(
    df_filtered1, 
    x='price', 
    color='brand', 
    barmode='overlay',
    histnorm='percent' if normalize else None,  # Normalize if checkbox is checked
    title="Price Distribution by Manufacturer"
)

# Customize layout
fig2.update_layout(
    xaxis_title='Price',
    yaxis_title='Percent' if normalize else 'Count',
    legend_title_text='Manufacturer',
    hovermode='x unified'
)

# Display the plot
st.plotly_chart(fig2, use_container_width=True)






DeltaGenerator()