Step 1: SETUP & IMPORT

In [5]:
# Step 1: Import libraries
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)  # Clean up warning output

# Display settings
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


Step 2: LOAD & CLEAN DATA

In [6]:
# Step 2: Load dataset from UCI
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"

col_names = ['symboling', 'normalized-losses', 'fuel-type', 'aspiration', 'num-of-doors',
             'body-style', 'drive-wheels', 'engine-location', 'wheel-base', 'length', 'width',
             'height', 'curb-weight', 'engine-type', 'num-of-cylinders', 'engine-size',
             'fuel-system', 'bore', 'stroke', 'compression-ratio', 'horsepower', 'peak-rpm',
             'city-mpg', 'highway-mpg', 'price']

df = pd.read_csv(url, names=col_names, na_values="?", header=None)


Step 3: SELECT 4 COLUMNS FOR ENCODING

In [7]:
# Step 3: Keep only 4 columns initially
df_car = df[['aspiration', 'num-of-doors', 'drive-wheels', 'num-of-cylinders']]

# 🔍 Screenshot #1: Show selected columns
df_car.head()


Unnamed: 0,aspiration,num-of-doors,drive-wheels,num-of-cylinders
3,std,two,rwd,four
3,std,two,rwd,four
1,std,two,rwd,six
2,std,four,fwd,four
2,std,four,4wd,five


Step 4: ENCODE ORDINAL FEATURES


In [8]:
# Step 4: Encode ordinal values

# Encode 'num-of-doors'
door_mapper = {"two": 2, "four": 4}
df_car['doors'] = df_car["num-of-doors"].replace(door_mapper).astype(float)

# Encode 'num-of-cylinders'
cylinder_mapper = {
    "two": 2, "three": 3, "four": 4, "five": 5,
    "six": 6, "eight": 8, "twelve": 12
}
df_car['cylinders'] = df_car["num-of-cylinders"].replace(cylinder_mapper).astype(float)


df_car[['num-of-doors', 'doors', 'num-of-cylinders', 'cylinders']].head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_car['doors'] = df_car["num-of-doors"].replace(door_mapper).astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_car['cylinders'] = df_car["num-of-cylinders"].replace(cylinder_mapper).astype(float)


Unnamed: 0,num-of-doors,doors,num-of-cylinders,cylinders
3,two,2.0,four,4.0
3,two,2.0,four,4.0
1,two,2.0,six,6.0
2,four,4.0,four,4.0
2,four,4.0,five,5.0


Step 5: ENCODE NON-ORDINAL FEATURES

In [9]:
# Step 5: One-hot encode non-ordinal features
df_car = pd.get_dummies(df_car, columns=['aspiration'], drop_first=True)
df_car = pd.get_dummies(df_car, columns=['drive-wheels'])


df_car[['aspiration_turbo', 'drive-wheels_4wd', 'drive-wheels_fwd', 'drive-wheels_rwd']].head()


Unnamed: 0,aspiration_turbo,drive-wheels_4wd,drive-wheels_fwd,drive-wheels_rwd
3,False,False,False,True
3,False,False,False,True
1,False,False,False,True
2,False,False,True,False
2,False,True,False,False


Step 6: CHALLENGE TASK — Add & Encode 2 More Columns

In [10]:
# Step 6: Reload full dataset and extract extra columns
df_full = pd.read_csv(url, names=col_names, na_values="?", header=None)

# Add two new features
df_car['fuel-type'] = df_full['fuel-type']
df_car['engine-location'] = df_full['engine-location']

# Encode them
df_car = pd.get_dummies(df_car, columns=['fuel-type'], drop_first=True)
df_car = pd.get_dummies(df_car, columns=['engine-location'], drop_first=True)

df_car[['fuel-type_gas', 'engine-location_rear']].head()


Unnamed: 0,fuel-type_gas,engine-location_rear
3,True,False
3,True,False
1,True,False
2,True,False
2,True,False


Setp 7 : FINAL COLUMN CHECK

In [None]:
# Optional final check
df_car.columns
