In [None]:
import pandas as pd
from datetime import datetime

### Load the Dataset

In [None]:
try:
    df = pd.read_csv('cardekho_dataset.csv')
    print("'cardekho_dataset.csv' loaded successfully!")
    print(f"Original dataset contains {df.shape[0]} rows.")
except FileNotFoundError:
    print("Error: Make sure 'cardekho_dataset.csv' is in the same directory.")

if 'Unnamed: 0' in df.columns:
    df = df.drop('Unnamed: 0', axis=1)

### Perform Universal Cleaning and Preparation

In [None]:
print("\nPerforming initial data cleaning and preparation")

df.rename(columns={
    'brand': 'company',
    'car_name': 'name',
    'selling_price': 'price',
    'km_driven':'kms_driven',
    'transmission_type': 'transmission',
    'mileage': 'mileage_kmpl',
    'engine': 'engine_cc',
    'max_power': 'max_power_bhp'
}, inplace=True)


df['year'] = datetime.now().year - df['vehicle_age']
df = df.drop('vehicle_age', axis=1)

df['engine_cc'] = pd.to_numeric(df['engine_cc'].astype(str).str.replace(' CC', ''), errors='coerce')
df['max_power_bhp'] = pd.to_numeric(df['max_power_bhp'].astype(str).str.replace(' bhp', ''), errors='coerce')
df['mileage_kmpl'] = pd.to_numeric(df['mileage_kmpl'].astype(str).str.split(' ').str[0], errors='coerce')

for col in ['engine_cc', 'max_power_bhp', 'mileage_kmpl', 'seats']:
    df[col].fillna(df[col].median(), inplace=True)

print("Initial cleaning complete")

### Split the Dataset(based on seller_type)

In [None]:
print("\nSplitting data into Dealer and Private Seller markets")

dealer_df = df[df['seller_type'].isin(['Dealer', 'Trustmark Dealer'])].copy()

private_df = df[df['seller_type'] == 'Individual'].copy()

dealer_df = dealer_df.drop('seller_type', axis=1)
private_df = private_df.drop('seller_type', axis=1)

print(f"Created DEALER dataset with {dealer_df.shape[0]} rows.")
print(f"Created PRIVATE SELLER dataset with {private_df.shape[0]} rows.")

### Save the Two Final CSV Files

In [None]:
dealer_df.to_csv('dealer_market_data.csv', index=False)
private_df.to_csv('private_market_data.csv', index=False)

print("\nTwo new files have been created:")
print("1. dealer_market_data.csv")
print("2. private_market_data.csv")