In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df=pd.read_csv(r'D:\demo\smart_price\synthetic_slpna_dataset.csv')
df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Product             300 non-null    object 
 1   Brand               300 non-null    object 
 2   Model               300 non-null    object 
 3   Age                 300 non-null    float64
 4   Condition           300 non-null    object 
 5   Seller_Urgency      300 non-null    int64  
 6   Region              300 non-null    object 
 7   Original_Price      300 non-null    int64  
 8   Listings_in_Region  300 non-null    int64  
 9   Local_Demand_Score  300 non-null    float64
 10  Fair_Price          300 non-null    int64  
 11  Negotiable          300 non-null    object 
dtypes: float64(2), int64(4), object(6)
memory usage: 28.3+ KB


In [4]:
df.isnull().sum()
for col in df.columns:
  print(col,df[col].unique())

Product ['Laptop' 'Mobile' 'Bike' 'Furniture']
Brand ['Dell' 'Samsung' 'Apple' 'HP' 'Honda' 'OnePlus' 'Pepperfry' 'IKEA'
 'Yamaha' 'Bajaj' 'UrbanLadder']
Model ['Inspiron' 'Galaxy S10' 'iPhone XR' 'Pavilion' 'CB Shine' '7T' 'Sofa'
 'Chair' 'FZ' 'Pulsar' 'Table' 'MacBook Air']
Age [3.3 1.2 0.7 0.5 0.6 7.8 5.6 7.7 2.2 6.7 5.5 4.8 7.5 2.  6.1 5.1 6.3 7.4
 1.  7.2 5.9 3.8 4.7 3.6 2.7 5.3 7.3 2.5 5.8 4.2 4.6 2.3 4.5 6.9 4.3 7.6
 6.  2.4 1.8 6.5 0.8 7.1 3.7 2.8 2.9 6.6 1.1 3.9 1.7 2.6 3.1 0.9 2.1 5.4
 6.4 7.  6.2 5.2 4.9 3.4 4.4 3.  1.5 1.9 1.4 4.1 6.8 3.2 7.9 1.6 5.  8.
 3.5 4.  5.7 1.3]
Condition ['Fair' 'Very Good' 'Good' 'Poor' 'Like New']
Seller_Urgency [5 3 2 4 1]
Region ['Chennai' 'Delhi' 'Mumbai' 'Hyderabad' 'Bangalore']
Original_Price [79820 47131 62735 86104 28658 70435 83038 46001 26483 83077 69557 48758
 35606 92789 89807 41044 65592 42504  6561 33535 99354 11110  3206 53015
 21141 36827 46585 98259 21047 69842 97856 96557 54005 70172  3854 90092
 55662 55083 96384 59250 14411 14

In [5]:
df.describe()

Unnamed: 0,Age,Seller_Urgency,Original_Price,Listings_in_Region,Local_Demand_Score,Fair_Price
count,300.0,300.0,300.0,300.0,300.0,300.0
mean,4.234,3.01,50671.26,27.203333,5.579333,27950.976667
std,2.228092,1.408253,28985.694942,12.904926,2.666221,21080.911833
min,0.5,1.0,3077.0,5.0,1.0,1000.0
25%,2.2,2.0,26431.0,16.0,3.275,10754.75
50%,4.4,3.0,49927.0,28.0,5.85,23185.5
75%,6.1,4.0,75395.25,39.0,7.8,41050.25
max,8.0,5.0,99826.0,49.0,10.0,90760.0


In [6]:
x=df.drop(['Fair_Price', 'Negotiable'], axis=1)
y_reg=df['Fair_Price']
y_cls=df['Negotiable']

In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

categorical_col=['Product', 'Brand', 'Model', 'Condition', 'Region']
numerical_col=['Age', 'Seller_Urgency', 'Original_Price', 'Listings_in_Region', 'Local_Demand_Score']

preprocessor=ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_col),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_col)
    ]
)

In [8]:
from sklearn.model_selection import train_test_split
x_train_reg, x_test_reg, y_train_reg, y_test_reg=train_test_split(x, y_reg, test_size=0.2, random_state=42)
x_train_cls, x_test_cls, y_train_cls, y_test_cls=train_test_split(x, y_cls, test_size=0.2, random_state=42)

In [21]:
#pipeline
#%pip install xgboost
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor, XGBClassifier

regressor = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', XGBRegressor(n_estimators=300, max_depth=5, learning_rate=0.1, random_state=42))
])
classifier = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', XGBClassifier(n_estimators=100, random_state=42))
])

In [23]:
from sklearn.preprocessing import LabelEncoder

# Encode target labels for classification
le = LabelEncoder()
y_train_cls_enc = le.fit_transform(y_train_cls)
y_test_cls_enc = le.transform(y_test_cls)

regressor.fit(x_train_reg, y_train_reg)
y_pred = regressor.predict(x_test_reg)

classifier.fit(x_train_cls, y_train_cls_enc)
y_pred_cls_enc = classifier.predict(x_test_cls)
y_pred_cls = le.inverse_transform(y_pred_cls_enc)

In [25]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
mse=mean_squared_error(y_test_reg, y_pred)
mae=mean_absolute_error(y_test_reg, y_pred)
r2=r2_score(y_test_reg, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'R-squared: {r2}')

Mean Squared Error: 6294272.5
Mean Absolute Error: 1831.1314697265625
R-squared: 0.9806357026100159


In [27]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy=accuracy_score(y_test_cls, y_pred_cls)
precision=precision_score(y_test_cls, y_pred_cls,pos_label='Yes')
recall=recall_score(y_test_cls, y_pred_cls,pos_label='Yes')
f1=f1_score(y_test_cls, y_pred_cls,pos_label='Yes')
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


In [28]:
import joblib
joblib.dump(regressor, 'smart_price_regressor.pkl')
joblib.dump(classifier, 'smart_price_classifier.pkl')

['smart_price_classifier.pkl']

In [29]:
# Select a sample row from x for prediction
sample_row = x.iloc[[0]]  # You can change index to test other entries

# Reload models
reg_model = joblib.load('smart_price_regressor.pkl')
cls_model = joblib.load('smart_price_classifier.pkl')

print("\n🔍 Sample Test Case (Row 0):")
print("Regressor prediction (Fair Price):", reg_model.predict(sample_row)[0])
print("Classifier prediction (Negotiable):", cls_model.predict(sample_row)[0])



🔍 Sample Test Case (Row 0):
Regressor prediction (Fair Price): 53875.977
Classifier prediction (Negotiable): 1
