In [1]:
# 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

In [None]:
# 2. Load Dataset
train_df = pd.read_csv('Train.csv')

In [None]:
# 3. Data Preprocessing
# Fill missing values with the mean
numeric_cols = train_df.select_dtypes(include=[np.number]).columns
train_df[numeric_cols] = train_df[numeric_cols].fillna(train_df[numeric_cols].mean())

In [None]:
# Encode categorical features using one-hot encoding
train_df = pd.get_dummies(train_df, columns=['Product_Brand', 'Item_Category'], drop_first=True)

In [None]:
# 4. Split Data
X = train_df.drop('Selling_Price', axis=1)
y = train_df['Selling_Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 5. Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# 6. Train Model (Random Forest)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

In [None]:
# 7. Evaluate Model
y_pred = model.predict(X_test_scaled)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("RÂ² Score:", r2_score(y_test, y_pred))

In [None]:
# 8. Save Model and Scaler
joblib.dump(model, 'dynamic_pricing_pipeline.pkl')
joblib.dump(scaler, 'scaler.pkl')