In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load cleaned data
df = pd.read_csv('data/processed/cleaned_cars.csv')

# Basic info
print(df.info())
print(df.describe())

# Price distribution
plt.figure(figsize=(12,6))
sns.histplot(df['listed_price'], bins=50, kde=True)
plt.title('Used Car Price Distribution')
plt.xlabel('Price ($)')
plt.savefig('visualizations/price_distribution.png')
plt.show()

# Top 10 makes by count
plt.figure(figsize=(12,6))
df['make'].value_counts().head(10).plot(kind='bar')
plt.title('Top 10 Car Makes')
plt.savefig('visualizations/top_makes.png')
plt.show()

# Price vs Mileage
plt.figure(figsize=(12,8))
sns.scatterplot(data=df, x='mileage', y='listed_price', hue='fuel_type', alpha=0.6)
plt.title('Price vs Mileage')
plt.savefig('visualizations/price_vs_mileage.png')
plt.show()

# Correlation matrix
corr = df[['listed_price', 'year', 'mileage', 'vehicle_age', 'mileage_rate']].corr()
plt.figure(figsize=(10,8))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Feature Correlation Matrix')
plt.savefig('visualizations/correlation_matrix.png')
plt.show()