<a href="https://colab.research.google.com/github/Jan-HeinKok/machinelearning2/blob/main/printpricecategory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

def preprocess_data(file_path):
    df = pd.read_csv(file_path)

    # Rename columns
    column_mapping = {
        'name': 'name',
        'company': 'company',
        'year': 'year',
        'kms_driven': 'kms_driven',
        'fuel_type': 'fuel_type',
        'Price': 'price'
    }
    df = df.rename(columns=column_mapping)

    # Select relevant columns and drop missing values
    df = df[['name', 'company', 'year', 'kms_driven', 'fuel_type', 'price']]
    df = df.dropna()

    # Handle 'Ask For Price' values and convert price to numerical values
    df = df[df['price'] != 'Ask For Price']
    df['price'] = df['price'].str.replace(',', '').astype(float)

    # Bin the prices into categories
    bins = [0, 300000, 600000, float('inf')]
    labels = ['cheap', 'affordable', 'expensive']
    df['label'] = pd.cut(df['price'], bins=bins, labels=labels, right=False)

    # Drop rows with NaN in 'label' column
    df = df.dropna(subset=['label'])

    return df

def categorize_cars(df):
    cheap_cars = df[df['label'] == 'cheap']
    affordable_cars = df[df['label'] == 'affordable']
    expensive_cars = df[df['label'] == 'expensive']

    return cheap_cars, affordable_cars, expensive_cars

if __name__ == "__main__":
    df = preprocess_data('dataset/quikr_car.csv')
    cheap_cars, affordable_cars, expensive_cars = categorize_cars(df)

    print("Cheap Cars:")
    print(cheap_cars[['name', 'price']])
    print("\nAffordable Cars:")
    print(affordable_cars[['name', 'price']])
    print("\nExpensive Cars:")
    print(expensive_cars[['name', 'price']])
