In [11]:
import plotly.express as px
import pandas as pd

# Load the dataset
vehicles_us = pd.read_csv('vehicles_us.csv')
# Check for missing values and data types
print(vehicles_us.info())
# Check for duplicates
print(f"Number of duplicate rows: {vehicles_us.duplicated().sum()}")
# Check for missing values
print(f"Number of missing values:\n{vehicles_us.isnull().sum()}")
# Drop rows with missing values
vehicles_us = vehicles_us.dropna()
# Print column names
print(f"Column names: {vehicles_us.columns.tolist()}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51525 entries, 0 to 51524
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   price         51525 non-null  int64  
 1   model_year    47906 non-null  float64
 2   model         51525 non-null  object 
 3   condition     51525 non-null  object 
 4   cylinders     46265 non-null  float64
 5   fuel          51525 non-null  object 
 6   odometer      43633 non-null  float64
 7   transmission  51525 non-null  object 
 8   type          51525 non-null  object 
 9   paint_color   42258 non-null  object 
 10  is_4wd        25572 non-null  float64
 11  date_posted   51525 non-null  object 
 12  days_listed   51525 non-null  int64  
dtypes: float64(4), int64(2), object(7)
memory usage: 5.1+ MB
None
Number of duplicate rows: 0
Number of missing values:
price               0
model_year       3619
model               0
condition           0
cylinders        5260
fuel           

In [12]:
# Distribution of price
fig1 = px.histogram(vehicles_us, x='price', nbins=50, title='Distribution of Vehicle Prices')
fig1.show()



In [13]:
# Distribution of odometer readings
fig2 = px.histogram(vehicles_us, x='odometer', nbins=50, title='Distribution of Odometer Readings')
fig2.show()

In [16]:
# Count of vehicles by model
model_counts = vehicles_us['model'].value_counts().reset_index()
model_counts.columns = ['Model', 'Count']
fig3 = px.bar(model_counts,
              x='Model', y='Count',
              labels={'Model': 'Modelo', 'Count': 'Cantidad de Vehículos'}, # Ajusta las etiquetas
              title='Número de Vehículos por Modelo')
fig3.show()

In [20]:
# Scatter plot: price vs. odometer colored by condition
fig4 = px.scatter(vehicles_us, x='condition', y='price', color='odometer',
                  title='Price vs. Condition Colored by Odometer',
                  hover_data=['model_year', 'model'])
fig4.show()

In [21]:
# Box plot: price by fuel type
fig5 = px.box(vehicles_us, x='fuel', y='price', title='Price Distribution by Fuel Type')
fig5.show()