# 🧠 Car Price Category Classification (Low / Mid / High)
This notebook classifies cars into price categories using AutoGluon.
✅ Evaluation is now fixed using scikit-learn's classification_report.

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from autogluon.tabular import TabularPredictor

In [3]:
df = pd.read_csv('car_price_dataset.csv')
def categorize_price(price):
    if price < 15000:
        return 'Low'
    elif price < 30000:
        return 'Mid'
    else:
        return 'High'
df['Price_Category'] = df['Price'].apply(categorize_price)
df.head()

Unnamed: 0,Brand,Model,Year,Engine_Size,Fuel_Type,Transmission,Mileage,Doors,Owner_Count,Price,Price_Category
0,Kia,Rio,2020,4.2,Diesel,Manual,289944,3,5,8501,Low
1,Chevrolet,Malibu,2012,2.0,Hybrid,Automatic,5356,2,3,12092,Low
2,Mercedes,GLA,2020,4.2,Diesel,Automatic,231440,4,2,11171,Low
3,Audi,Q5,2023,2.0,Electric,Manual,160971,2,1,11780,Low
4,Volkswagen,Golf,2003,2.6,Hybrid,Semi-Automatic,286618,3,3,2867,Low


In [4]:
print("\nMissing Values:")
print(df.isnull().sum())
print("\nClass Distribution:")
print(df['Price_Category'].value_counts())


Missing Values:
Brand             0
Model             0
Year              0
Engine_Size       0
Fuel_Type         0
Transmission      0
Mileage           0
Doors             0
Owner_Count       0
Price             0
Price_Category    0
dtype: int64

Class Distribution:
Price_Category
Low    9790
Mid     210
Name: count, dtype: int64


In [5]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
print(f'Train: {train_data.shape}, Test: {test_data.shape}')

Train: (8000, 11), Test: (2000, 11)


In [6]:
predictor = TabularPredictor(label='Price_Category', problem_type='multiclass', eval_metric='accuracy')
predictor.fit(train_data, time_limit=600)

No path specified. Models will be saved in: "AutogluonModels\ag-20250416_004346"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.11.11
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          22
Memory Avail:       14.39 GB / 31.43 GB (45.8%)
Disk Space Avail:   229.46 GB / 401.65 GB (57.1%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	pres

<autogluon.tabular.predictor.predictor.TabularPredictor at 0x207a828f110>

In [7]:
# 🔁 Evaluate using sklearn instead of predictor.evaluate()
from sklearn.metrics import classification_report
y_true = test_data['Price_Category']
y_pred = predictor.predict(test_data.drop(columns=['Price_Category']))
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

         Low       1.00      1.00      1.00      1964
         Mid       1.00      1.00      1.00        36

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [8]:
predictor.leaderboard(test_data, silent=True)

If you only need to load model weights and optimizer state, use the safe `Learner.load` instead.
  warn("load_learner` uses Python's insecure pickle module, which can execute malicious arbitrary code when loading. Only load files you trust.\nIf you only need to load model weights and optimizer state, use the safe `Learner.load` instead.")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM,1.0,0.99875,accuracy,0.031894,0.019105,1.355742,0.031894,0.019105,1.355742,1,True,4
1,RandomForestEntr,1.0,1.0,accuracy,0.187815,0.134712,1.604811,0.187815,0.134712,1.604811,1,True,6
2,WeightedEnsemble_L2,1.0,1.0,accuracy,0.191358,0.135712,1.786676,0.003543,0.001,0.181865,2,True,13
3,RandomForestGini,1.0,0.99875,accuracy,0.230909,0.1549,1.659474,0.230909,0.1549,1.659474,1,True,5
4,LightGBMLarge,0.9995,0.99875,accuracy,0.037306,0.017947,3.562958,0.037306,0.017947,3.562958,1,True,12
5,XGBoost,0.997,0.9975,accuracy,0.081158,0.011407,1.246967,0.081158,0.011407,1.246967,1,True,11
6,ExtraTreesEntr,0.997,0.995,accuracy,0.21033,0.141475,1.594858,0.21033,0.141475,1.594858,1,True,9
7,CatBoost,0.9965,0.9975,accuracy,0.027397,0.008007,46.636385,0.027397,0.008007,46.636385,1,True,7
8,ExtraTreesGini,0.9965,0.995,accuracy,0.213686,0.135867,1.569688,0.213686,0.135867,1.569688,1,True,8
9,LightGBMXT,0.996,0.99375,accuracy,0.03686,0.017293,2.679216,0.03686,0.017293,2.679216,1,True,3


In [9]:
comparison = pd.DataFrame({
    'Actual': y_true,
    'Predicted': y_pred
})
comparison.head()

Unnamed: 0,Actual,Predicted
6252,Low,Low
4684,Low,Low
1731,Low,Low
4742,Low,Low
4521,Low,Low
