In [25]:
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
file_path = "Car Price Dataset.csv"
df = pd.read_excel(file_path, engine="openpyxl") if file_path.endswith('.xlsx') else pd.read_csv(file_path)

In [None]:
print("Dataset Loaded Successfully!")
print(df.head())
print("\nColumns:", df.columns.tolist())

Dataset Loaded Successfully!
   Car ID  Brand  Year  Engine Size Fuel Type Transmission  Mileage Condition  \
0       1  Tesla  2016          2.3    Petrol       Manual   114832       New   
1       2    BMW  2018          4.4  Electric       Manual   143190      Used   
2       3   Audi  2013          4.5  Electric       Manual   181601       New   
3       4  Tesla  2011          4.1    Diesel    Automatic    68682       New   
4       5   Ford  2009          2.6    Diesel       Manual   223009  Like New   

      Price     Model  
0  26613.92   Model X  
1  14679.61  5 Series  
2  44402.61        A4  
3  86374.33   Model Y  
4  73577.10   Mustang  

Columns: ['Car ID', 'Brand', 'Year', 'Engine Size', 'Fuel Type', 'Transmission', 'Mileage', 'Condition', 'Price', 'Model']


In [28]:
print("\nDataset Info:")
print(df.info())
print("\nMissing Values:\n", df.isnull().sum())


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2500 entries, 0 to 2499
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Car ID        2500 non-null   int64  
 1   Brand         2500 non-null   object 
 2   Year          2500 non-null   int64  
 3   Engine Size   2500 non-null   float64
 4   Fuel Type     2500 non-null   object 
 5   Transmission  2500 non-null   object 
 6   Mileage       2500 non-null   int64  
 7   Condition     2500 non-null   object 
 8   Price         2500 non-null   float64
 9   Model         2500 non-null   object 
dtypes: float64(2), int64(3), object(5)
memory usage: 195.4+ KB
None

Missing Values:
 Car ID          0
Brand           0
Year            0
Engine Size     0
Fuel Type       0
Transmission    0
Mileage         0
Condition       0
Price           0
Model           0
dtype: int64


In [29]:
import os
os.makedirs("charts", exist_ok=True)


In [30]:
plt.figure(figsize=(10,5))
avg_price = df.groupby('Brand')['Price'].mean().sort_values(ascending=False).head(10)
avg_price.plot(kind='bar', color='#0b74de')
plt.title('Average Car Price by Car Name (Top 10)', fontsize=14)
plt.xlabel('Brand Name')
plt.ylabel('Average Selling Price (Lakhs)')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.savefig("charts/bar_chart_price_by_brand.png")
plt.close()
print("Bar Chart Saved")


Bar Chart Saved


In [31]:
plt.figure(figsize=(6,6))
fuel_counts = df['Fuel Type'].value_counts()
plt.pie(fuel_counts, labels=fuel_counts.index, autopct='%1.1f%%',
        colors=['#0b74de', '#34c759', '#ff9500', '#af52de'],
        startangle=140)
plt.title('Fuel Type Distribution', fontsize=14)
plt.tight_layout()
plt.savefig("charts/pie_chart_fuel_type.png")
plt.close()
print("Pie Chart Saved")



Pie Chart Saved


In [32]:
plt.figure(figsize=(10,5))
yearly_avg = df.groupby('Year')['Price'].mean().sort_index()
plt.plot(yearly_avg.index, yearly_avg.values, marker='o', color='#0b74de', linewidth=2)
plt.title('Average Car Price Trend Over Years', fontsize=14)
plt.xlabel('Year')
plt.ylabel('Average Selling Price (Lakhs)')
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig("charts/line_chart_price_trend.png")
plt.close()
print("Line Chart Saved")

Line Chart Saved


In [33]:
print("\nAll visualizations generated and saved successfully in 'charts/' folder.")



All visualizations generated and saved successfully in 'charts/' folder.
