# Model Development

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('Processed_Dataset.csv')

In [3]:
data = df.copy()

In [4]:
data

Unnamed: 0,brand_name,os,screen_size,4g,5g,main_camera_mp,selfie_camera_mp,int_memory,ram,battery,weight,release_year,days_used,new_price,Used_Price
0,10,0,1.978301,1,0,0.781651,-0.119840,0.584311,0.0,0.033857,-0.421187,1.930266,-2.405987,-0.817247,86.96000
1,10,0,1.978301,1,1,0.781651,2.054621,2.298155,0.0,1.057278,1.303062,1.930266,-1.567541,0.255023,161.49000
2,10,0,1.978301,1,1,0.781651,0.473195,2.298155,0.0,0.977324,1.303062,1.930266,-2.257777,1.111781,233.22875
3,10,0,1.978301,1,1,0.781651,0.473195,0.584311,0.0,2.956205,1.997909,1.930266,-1.482850,0.484934,180.23000
4,10,0,0.321578,1,0,0.781651,0.473195,0.584311,0.0,1.616962,0.582481,1.930266,-1.703048,-0.589593,103.80000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3363,2,3,0.248048,1,0,-0.300204,0.275517,0.584311,0.0,0.105816,0.814096,1.473277,-2.062986,2.388522,233.22875
3364,3,0,0.211283,1,0,0.781651,0.473195,2.298155,0.0,0.817414,0.968507,1.016287,-0.652874,2.351967,233.22875
3365,1,0,1.013220,1,0,0.781651,-0.119840,-0.272611,0.0,0.817414,0.067780,1.930266,-2.092628,-0.964892,69.81000
3366,1,0,1.013220,1,0,0.781651,-0.119840,-0.272611,0.0,0.817414,-0.060896,1.930266,-2.312826,-0.892782,76.07000


## Train-Test-Split

In [5]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [8]:
print(f"{X_train.shape}, {X_test.shape}")

(2526, 14), (842, 14)


In [9]:
from sklearn.linear_model import LinearRegression

In [10]:
regressor = LinearRegression()

In [11]:
regressor

In [12]:
regressor.fit(X_train, y_train)

In [14]:
y_pred = regressor.predict(X_test)

In [15]:
from sklearn.metrics import mean_squared_error, r2_score

In [16]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)   

print(f"Mean Square Error = {mse}")
print(f"R2 Score = {r2}")

Mean Square Error = 176.7420871801805
R2 Score = 0.9527833988797798


In [17]:
import joblib

# Save the model to a file
joblib.dump(regressor, "regressor_model.pkl")
print(f"Model saved as -> regressor_model.pkl")

Model saved as -> regressor_model.pkl


In [25]:
# Pick one row from X_test
sample_df = data.drop('Used_Price', axis=1)
sample = sample_df.iloc[2]
print("Sample features:\n", sample)

# Reshape it for prediction (1, n_features)
sample_reshaped = sample.values.reshape(1, -1)

# Predict using the saved model
y_pred_sample = regressor.predict(sample_reshaped)
print(f"Predicted price: {y_pred_sample[0]:.2f}")


Sample features:
 brand_name          10.000000
os                   0.000000
screen_size          1.978301
4g                   1.000000
5g                   1.000000
main_camera_mp       0.781651
selfie_camera_mp     0.473195
int_memory           2.298155
ram                  0.000000
battery              0.977324
weight               1.303062
release_year         1.930266
days_used           -2.257777
new_price            1.111781
Name: 2, dtype: float64
Predicted price: 193.98
