# Phone Price Prediction using Machine Learning

This project predicts the price of mobile phones based on their specifications using Machine Learning regression models.


In [1]:
import pandas as pd

df = pd.read_csv('/content/mobile_phone_price.csv')
df.head()


Unnamed: 0,Brand,Model,Storage,RAM,Screen Size (inches),Camera (MP),Battery Capacity (mAh),Price ($)
0,Apple,iPhone 13 Pro,128 GB,6 GB,6.1,12 + 12 + 12,3095,999
1,Samsung,Galaxy S21 Ultra,256 GB,12 GB,6.8,108 + 10 + 10 + 12,5000,1199
2,OnePlus,9 Pro,128 GB,8 GB,6.7,48 + 50 + 8 + 2,4500,899
3,Xiaomi,Redmi Note 10 Pro,128 GB,6 GB,6.67,64 + 8 + 5 + 2,5020,279
4,Google,Pixel 6,128 GB,8 GB,6.4,50 + 12.2,4614,799


In [2]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 407 entries, 0 to 406
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Brand                   407 non-null    object
 1   Model                   407 non-null    object
 2   Storage                 407 non-null    object
 3   RAM                     407 non-null    object
 4   Screen Size (inches)    407 non-null    object
 5   Camera (MP)             407 non-null    object
 6   Battery Capacity (mAh)  407 non-null    int64 
 7   Price ($)               407 non-null    object
dtypes: int64(1), object(7)
memory usage: 25.6+ KB


In [3]:
df['Price ($)'] = df['Price ($)'].astype(str)

df['Price ($)'] = (
    df['Price ($)']
    .str.replace('$', '', regex=False)
    .str.replace(',', '', regex=False)
    .str.strip()
)

df['Price ($)'] = df['Price ($)'].astype(int)
df['Price ($)'].dtype


dtype('int64')

In [4]:
# Remove extra spaces from column names
df.columns = df.columns.str.strip()

# Now clean Storage and RAM
df['Storage'] = df['Storage'].str.replace('GB', '', regex=False).str.strip().astype(int)
df['RAM'] = df['RAM'].str.replace('GB', '', regex=False).str.strip().astype(int)

df[['Storage', 'RAM']].head()


Unnamed: 0,Storage,RAM
0,128,6
1,256,12
2,128,8
3,128,6
4,128,8


In [5]:
df['Screen Size (inches)'] = df['Screen Size (inches)'].astype(str)

df['Screen Size (inches)'] = (
    df['Screen Size (inches)']
    .str.extract(r'([\d\.]+)')[0]
)

df['Screen Size (inches)'] = df['Screen Size (inches)'].astype(float)

df['Screen Size (inches)'].dtype


dtype('float64')

In [6]:
df['Camera (MP)'] = df['Camera (MP)'].astype(str)

df['Camera (MP)'] = (
    df['Camera (MP)']
    .str.findall(r'[\d\.]+')
    .apply(lambda x: max([float(i) for i in x]))
)

df['Camera (MP)'].head()


Unnamed: 0,Camera (MP)
0,12.0
1,108.0
2,50.0
3,64.0
4,50.0


In [7]:
# Select features and target
X = df.drop(['Model', 'Price ($)'], axis=1)
y = df['Price ($)']

# One-hot encode Brand
X = pd.get_dummies(X, columns=['Brand'], drop_first=True)

X.head()


Unnamed: 0,Storage,RAM,Screen Size (inches),Camera (MP),Battery Capacity (mAh),Brand_Asus,Brand_Blackberry,Brand_CAT,Brand_Google,Brand_Huawei,Brand_LG,Brand_Motorola,Brand_Nokia,Brand_OnePlus,Brand_Oppo,Brand_Realme,Brand_Samsung,Brand_Sony,Brand_Vivo,Brand_Xiaomi
0,128,6,6.1,12.0,3095,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,256,12,6.8,108.0,5000,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
2,128,8,6.7,50.0,4500,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
3,128,6,6.67,64.0,5020,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
4,128,8,6.4,50.0,4614,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train.shape, X_test.shape


((325, 20), (82, 20))

In [9]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# Train model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predictions
y_pred_lr = lr.predict(X_test)

# Evaluation
mae_lr = mean_absolute_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

mae_lr, r2_lr


(88.16186073662126, 0.7909960521763157)

In [10]:
print("Linear Regression Results")
print("Mean Absolute Error (MAE):", round(mae_lr, 2))
print("R2 Score:", round(r2_lr, 2))


Linear Regression Results
Mean Absolute Error (MAE): 88.16
R2 Score: 0.79


In [11]:
from sklearn.ensemble import RandomForestRegressor

# Train model
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)

# Predictions
y_pred_rf = rf.predict(X_test)

# Evaluation
mae_rf = mean_absolute_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

mae_rf, r2_rf


(54.76293556793918, 0.8865422966144687)

In [12]:
print("MODEL COMPARISON")
print("-" * 30)

print("Linear Regression")
print("MAE:", round(mae_lr, 2))
print("R2 Score:", round(r2_lr, 2))

print("\nRandom Forest Regressor")
print("MAE:", round(mae_rf, 2))
print("R2 Score:", round(r2_rf, 2))


MODEL COMPARISON
------------------------------
Linear Regression
MAE: 88.16
R2 Score: 0.79

Random Forest Regressor
MAE: 54.76
R2 Score: 0.89


In [13]:
# Example input (you can change values later)
sample_phone = {
    'Storage': 128,
    'RAM': 8,
    'Screen Size (inches)': 6.5,
    'Camera (MP)': 50,
    'Battery Capacity (mAh)': 4500,
    'Brand_Apple': 1,
    'Brand_Samsung': 0,
    'Brand_OnePlus': 0,
    'Brand_Xiaomi': 0,
    'Brand_Google': 0
}

# Convert input to DataFrame
input_df = pd.DataFrame([sample_phone])

# Align columns with training data
input_df = input_df.reindex(columns=X.columns, fill_value=0)

# Predict price
predicted_price = rf.predict(input_df)

print("Predicted Phone Price ($):", int(predicted_price[0]))


Predicted Phone Price ($): 579


In [14]:
import joblib

# Save the trained model
joblib.dump(rf, 'random_forest_phone_price_model.pkl')

print("Model saved successfully as 'random_forest_phone_price_model.pkl'")


Model saved successfully as 'random_forest_phone_price_model.pkl'
