Loading the Dataset

In [120]:
import pandas as pd

In [121]:
df = pd.read_csv("phone_data1.csv", encoding='latin-1')
df.head()

Unnamed: 0,Phone Name,Model,Color,Storage,Price
0,Samsung,Galaxy A15 5G,Light Blue,128GB,17999
1,Samsung,Galaxy A15 5G,Blue,128GB,17999
2,Samsung,Galaxy A15 5G,Blue Black,128GB,17999
3,Oneplus,12R 5G,Iron Gray,128GB,39999
4,Oneplus,12R 5G,Cool Blue,128GB,39999


In [122]:
df.info()
print("\n data size:",df.size)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1432 entries, 0 to 1431
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Phone Name  1432 non-null   object
 1   Model       1432 non-null   object
 2   Color       1432 non-null   object
 3   Storage     1432 non-null   object
 4   Price       1432 non-null   int64 
dtypes: int64(1), object(4)
memory usage: 56.1+ KB

 data size: 7160


In [123]:
print(df.isna().sum())

Phone Name    0
Model         0
Color         0
Storage       0
Price         0
dtype: int64


Label Encoding

In [124]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [125]:
df['Model'] = df['Model'].astype(str)
df['Storage'] = df['Storage'].astype(str)


le = LabelEncoder()
df['Phone Name'] = le.fit_transform(df['Phone Name'])
df['Model'] = le.fit_transform(df['Model'])
df['Color'] = le.fit_transform(df['Color'])
df['Storage'] = le.fit_transform(df['Storage'])


In [126]:
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1432 entries, 0 to 1431
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   Phone Name  1432 non-null   int32
 1   Model       1432 non-null   int32
 2   Color       1432 non-null   int32
 3   Storage     1432 non-null   int32
 4   Price       1432 non-null   int64
dtypes: int32(4), int64(1)
memory usage: 33.7 KB


Unnamed: 0,Phone Name,Model,Color,Storage,Price
0,18,71,94,8,17999
1,18,71,20,8,17999
2,18,71,21,8,17999
3,11,21,87,8,39999
4,11,21,36,8,39999


Training the Model

In [127]:
X = df.drop(['Price'], axis=1)
y = df[['Price']]

In [128]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [129]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
import xgboost as xgb
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import mean_squared_error, r2_score


Random Forest Model

In [130]:
# Train the Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
rf_predictions = rf_model.predict(X_test)

# Evaluate the model
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)
print(f'Random Forest Regressor Model Performance:')
print(f'Mean Squared Error: {rf_mse}')
print(f'R^2 Score: {rf_r2}')


Random Forest Regressor Model Performance:
Mean Squared Error: 28520336.451682866
R^2 Score: 0.9703658834093642


  return fit_method(estimator, *args, **kwargs)


Naive Bayes Model

In [131]:
# Train the Naive Bayes Classifier model
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Make predictions
nb_predictions = nb_model.predict(X_test)

# Evaluate the model
nb_accuracy = nb_model.score(X_test, y_test)
print(f'Naive Bayes Classifier Model Performance:')
print(f'Accuracy: {nb_accuracy}')

Naive Bayes Classifier Model Performance:
Accuracy: 0.4634146341463415


  y = column_or_1d(y, warn=True)


AdaBoost Model

In [132]:
# Train the AdaBoost Regressor model
ab_model = AdaBoostRegressor(random_state=42)
ab_model.fit(X_train, y_train)

# Make predictions
ab_predictions = ab_model.predict(X_test)

# Evaluate the model
ab_mse = mean_squared_error(y_test, ab_predictions)
ab_r2 = r2_score(y_test, ab_predictions)
print(f'AdaBoost Regressor Model Performance:')
print(f'Mean Squared Error: {ab_mse}')
print(f'R^2 Score: {ab_r2}')

AdaBoost Regressor Model Performance:
Mean Squared Error: 121649936.03832954
R^2 Score: 0.8735993737692909


  y = column_or_1d(y, warn=True)


XGBoost Model

In [133]:
# Train the XGBoost model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', max_depth=6, learning_rate=0.1, n_estimators=1000, n_jobs=-1)
xgb_model.fit(X_train, y_train)

# Make predictions
xgb_predictions = xgb_model.predict(X_test)

# Evaluate the model
xgb_mse = mean_squared_error(y_test, xgb_predictions)
xgb_r2 = r2_score(y_test, xgb_predictions)
print(f'XGBoost Model Performance:')
print(f'Mean Squared Error: {xgb_mse}')
print(f'R^2 Score: {xgb_r2}')

XGBoost Model Performance:
Mean Squared Error: 15267166.61901627
R^2 Score: 0.9841366178704426


In [134]:
import pickle
with open('predict_model.pkl', 'wb') as f:
    pickle.dump(xgb_model, f)

In [135]:
with open('predict_model.pkl', 'rb') as f:
    model = pickle.load(f)


In [136]:
df1 = pd.DataFrame({'Phone Name': ['samsung'], 'Model': ['galaxy M31'], 'Color': ['blue'], 'Storage': ['128gb']})
df1

Unnamed: 0,Phone Name,Model,Color,Storage
0,samsung,galaxy M31,blue,128gb


In [137]:
df1['Model'] = df1['Model'].astype(str)
df1['Storage'] = df1['Storage'].astype(str)


le = LabelEncoder()
df1['Phone Name'] = le.fit_transform(df1['Phone Name'])
df1['Model'] = le.fit_transform(df1['Model'])
df1['Color'] = le.fit_transform(df1['Color'])
df1['Storage'] = le.fit_transform(df1['Storage'])


In [138]:
predicted_price = model.predict(df1)
print('Predicted Price:', predicted_price[0])

Predicted Price: 52960.316


In [139]:
import pickle

# Load the saved model
with open('predict_model.pkl', 'rb') as file:
    model = pickle.load(file)

# Prepare the input data for prediction
new_data = pd.DataFrame({
    'Phone Name': [18],
    'Model': [71],
    'Color': [94],
    'Storage': [8]
})

# Make predictions using the loaded model
predictions = model.predict(new_data)

# Print the predictions
print(predictions)


[17999.154]
