In [8]:
import polars as pl
import altair

In [18]:
car_sales = pl.read_csv('car-sales.csv')
car_sales

Make,Colour,Odometer (KM),Doors,Price
str,str,i64,i64,str
"""Toyota""","""White""",150043,4,"""$4,000.00"""
"""Honda""","""Red""",87899,4,"""$5,000.00"""
"""Toyota""","""Blue""",32549,3,"""$7,000.00"""
"""BMW""","""Black""",11179,5,"""$22,000.00"""
"""Nissan""","""White""",213095,4,"""$3,500.00"""
"""Toyota""","""Green""",99213,4,"""$4,500.00"""
"""Honda""","""Blue""",45698,4,"""$7,500.00"""
"""Honda""","""Blue""",54738,4,"""$7,000.00"""
"""Toyota""","""White""",60000,4,"""$6,250.00"""
"""Nissan""","""White""",31600,4,"""$9,700.00"""


In [19]:
# Adds the 'id' column
car_sales = car_sales.with_row_index('id')

In [20]:
car_sales = car_sales.select(
    pl.col('id'),
    # Lowercase for values and column names
    pl.col('Make', 'Colour').str.to_lowercase().name.to_lowercase(),
    pl.col('Doors').name.to_lowercase(),
    pl.col('Odometer (KM)'),
    pl.col('Price')
        .str.replace_all(r"\$|,|...$", '')
        .str.to_integer(base=10)
        .name.to_lowercase()
)

car_sales = car_sales.rename({'Odometer (KM)': 'odometer_km'})
car_sales

id,make,colour,doors,odometer_km,price
u32,str,str,i64,i64,i64
0,"""toyota""","""white""",4,150043,4000
1,"""honda""","""red""",4,87899,5000
2,"""toyota""","""blue""",3,32549,7000
3,"""bmw""","""black""",5,11179,22000
4,"""nissan""","""white""",4,213095,3500
5,"""toyota""","""green""",4,99213,4500
6,"""honda""","""blue""",4,45698,7500
7,"""honda""","""blue""",4,54738,7000
8,"""toyota""","""white""",4,60000,6250
9,"""nissan""","""white""",4,31600,9700


In [22]:
chart =  (
    car_sales.plot.point(
        x="id",
        y="price",
        color="colour",
    )
    .properties(width=500, title="Car Sales")
    .configure_scale(zero=False)
    .configure_axisX(tickMinStep=1)
)
chart.encoding.x.title = "ID"
chart.encoding.y.title = "Price"
chart

In [25]:
car_sales.write_csv('car-sales-sanitized.csv')