# Vehicle Data Analysis
This notebook analyzes vehicle data to answer various questions.

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("car.csv")

## Question 1: Manufacturing year range

In [None]:
manufacturing_year_min = df["Year"].min()
manufacturing_year_max = df["Year"].max()
print(f"Vehicles are present from manufacturing year {manufacturing_year_min} to {manufacturing_year_max}.")

## Question 2: Lowest price of a vehicle

In [None]:
lowest_price = df["Selling_Price"].min()
print(f"The lowest price to which a vehicle is sold is {lowest_price}.")

## Question 3: Highest price of a vehicle

In [None]:
highest_price = df["Selling_Price"].max()
print(f"The highest price to which a vehicle is sold is {highest_price}.")

## Question 4: Total number of records

In [None]:
num_records = df.shape[0]
print(f"There are {num_records} records in this data.")

## Question 5: Check for missing values

In [None]:
missing_values = df.isnull().sum().sum()
print(f"There are {missing_values} missing records in this data.")

## Question 6: Number of different vehicles

In [None]:
num_different_vehicles = df["Car_Name"].nunique()
print(f"There are {num_different_vehicles} different vehicles in this data.")

## Question 7: Most sold vehicle

In [None]:
most_sold_vehicle = df["Car_Name"].value_counts().idxmax()
print(f"The most sold vehicle in this data is {most_sold_vehicle}.")

## Question 8: Number of CNG vehicles

In [None]:
num_cng_vehicles = df[df["Fuel_Type"] == "CNG"].shape[0]
print(f"There are {num_cng_vehicles} CNG vehicles in this data.")

## Question 9: Vehicles sold by individuals directly

In [None]:
num_individual_sales = df[df["Seller_Type"] == "Individual"].shape[0]
print(f"There are {num_individual_sales} vehicles for sale from individuals directly.")

## Question 10: Auto transmission vehicles

In [None]:
num_auto_transmission = df[df["Transmission"] == "Automatic"].shape[0]
print(f"There are {num_auto_transmission} auto transmission vehicles in this data.")

## Question 11: Single-person owned vehicles

In [None]:
single_owner_vehicles = df[df["Owner"] == 1].shape[0]
print(f"There are {single_owner_vehicles} single-person owned vehicles in this database.")

## Question 12: Most and least cost depreciated vehicles

In [None]:
df["Depreciation"] = df["Present_Price"] - df["Selling_Price"]
most_depreciated_vehicle = df.loc[df["Depreciation"].idxmax(), "Car_Name"]
least_depreciated_vehicle = df.loc[df["Depreciation"].idxmin(), "Car_Name"]
print(f"Most cost depreciated vehicle: {most_depreciated_vehicle}")
print(f"Least cost depreciated vehicle: {least_depreciated_vehicle}")

## Question 13: Brands less affected by cost depreciation

In [None]:
df["Brand"] = df["Car_Name"].apply(lambda x: x.split()[0])
brand_depreciation = df.groupby("Brand")["Depreciation"].mean().sort_values()
least_affected_brands = brand_depreciation.head(5)
print("Brands less affected by cost depreciation:")
print(least_affected_brands)

## Question 14: Factors affecting cost depreciation

In [None]:
correlation = df[["Depreciation", "Year", "Kms_Driven", "Fuel_Type", "Seller_Type", "Transmission"]].corr()
print("Correlation between depreciation and other factors:")
print(correlation["Depreciation"].sort_values(ascending=False))

## Question 15: Selling price affected by age and distance driven

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(12,5))
sns.scatterplot(x=df["Year"], y=df["Selling_Price"], label="Year vs Selling Price")
sns.scatterplot(x=df["Kms_Driven"], y=df["Selling_Price"], label="Kms Driven vs Selling Price", color='red')
plt.legend()
plt.show()

## Question 16: Vehicles manufactured after 2014

In [None]:
new_vehicles = df[df["Year"] > 2014]
print(f"There are {new_vehicles.shape[0]} vehicles manufactured after 2014.")

## Question 17: Filtering two-wheelers

In [None]:
bike_keywords = ["Bike", "Scooter", "Motorcycle"]
df_bikes = df[df["Car_Name"].str.contains('|'.join(bike_keywords), case=False, na=False)]
print(f"There are {df_bikes.shape[0]} two-wheelers in this data.")

## Question 18: Oldest bike sold

In [None]:
oldest_bike = df_bikes[df_bikes["Year"] == df_bikes["Year"].min()]["Car_Name"].values[0]
print(f"The oldest bike sold here is {oldest_bike}.")

## Question 19: Newest bike sold

In [None]:
newest_bike = df_bikes[df_bikes["Year"] == df_bikes["Year"].max()]["Car_Name"].values[0]
print(f"The newest bike sold here is {newest_bike}.")

## Question 20: Most sold bike

In [None]:
most_sold_bike = df_bikes["Car_Name"].value_counts().idxmax()
print(f"The most sold bike here is {most_sold_bike}.")

## Question 21: Two-wheeler deals exceeding general expectation

In [None]:
outlier_bikes = df_bikes[(df_bikes["Selling_Price"] > df_bikes["Present_Price"] * 0.8)]
print(f"There are {outlier_bikes.shape[0]} two-wheeler deals that exceeded general expectations.")
if not outlier_bikes.empty:
    display(outlier_bikes)

## Question 22: Reason for unexpected two-wheeler deals

In [None]:
if not outlier_bikes.empty:
    print("Possible reasons:")
    print("- Low depreciation due to brand popularity")
    print("- Limited availability or high demand")
    print("- Special editions or custom modifications")
else:
    print("No unexpected two-wheeler deals found.")

## Question 23: Filtering only cars from the data

In [None]:
df_cars = df[~df["Car_Name"].str.contains('|'.join(bike_keywords), case=False, na=False)]
print(f"There are {df_cars.shape[0]} cars in this data.")

## Question 24: Oldest car sold

In [None]:
oldest_car = df_cars[df_cars["Year"] == df_cars["Year"].min()]["Car_Name"].values[0]
print(f"The oldest car sold here is {oldest_car}.")

## Question 25: Newest car sold

In [None]:
newest_car = df_cars[df_cars["Year"] == df_cars["Year"].max()]["Car_Name"].values[0]
print(f"The newest car sold here is {newest_car}.")

## Question 26: Car deals exceeding general expectation

In [None]:
outlier_cars = df_cars[(df_cars["Selling_Price"] > df_cars["Present_Price"] * 0.8)]
print(f"There are {outlier_cars.shape[0]} car deals that exceeded general expectations.")
if not outlier_cars.empty:
    display(outlier_cars)

## Question 27: Reason for unexpected car deals

In [None]:
if not outlier_cars.empty:
    print("Possible reasons:")
    print("- Brand reputation and resale value")
    print("- Low depreciation due to premium models")
    print("- Unique features or modifications")
else:
    print("No unexpected car deals found.")