In [1]:

# importing the required libraries

import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

# For transforming the Columns (with arguments)
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Using the OneHotEncoder as only specific columns need to be encoded
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

# For Scaling the Data
from sklearn.preprocessing import StandardScaler

# Evaluation metrics for Linear Regression Algorithms
from sklearn.metrics import r2_score,mean_squared_error

#Importing the models required
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor
from sklearn.compose import TransformedTargetRegressor
import xgboost as xgb

In [2]:
# Reading in the data

mobile_df = pd.read_csv(r"mobile_df.csv")
mobile_df

Unnamed: 0,Company,model_name,color_group,Stars,Num_Ratings,Num_Reviews,Ram,Storage,Expandable,processor_category,display_inches,Num_camera,battery_mah,Price
0,SAMSUNG,Galaxy F13,Blue,4.4,126250,7344,4 GB RAM,64 GB ROM,Expandable Upto 1 TB,Exynos processor,16.76,3,6000,9699
1,POCO,C50,Green,4.4,3204,151,2 GB RAM,32 GB ROM,Expandable Upto 512 GB,Mediatek processor,16.56,1,5000,5749
2,POCO,C50,Blue,4.4,3204,151,2 GB RAM,32 GB ROM,Expandable Upto 512 GB,Mediatek processor,16.56,1,5000,5749
3,REDMI,10,Green,4.3,193536,12428,4 GB RAM,64 GB ROM,Expandable Upto 1 TB,Qualcomm processor,17.02,2,6000,9299
4,REDMI,10,Blue,4.3,193536,12428,4 GB RAM,64 GB ROM,Expandable Upto 1 TB,Qualcomm processor,17.02,2,6000,9299
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
612,SAMSUNG,Galaxy S22 Plus 5G,Black,4.5,2427,270,8 GB RAM,128 GB ROM,Expandable Upto 1 TB,Qualcomm processor,16.76,3,4500,59999
613,Infinix,Kall Z5,Blue,3.0,1760,132,3 GB RAM,16 GB ROM,Expandable Upto 1 TB,Qualcomm processor,13.84,1,3000,4699
614,SAMSUNG,Galaxy A04e,Blue,4.3,19,2,4 GB RAM,128 GB ROM,Expandable Upto 1 TB,Mediatek processor,16.51,2,5000,11499
615,Infinix,Hot 11 2022,Green,4.2,17231,1402,4 GB RAM,64 GB ROM,Expandable Upto 1 TB,Unisoc processor,17.02,2,5000,9999


In [3]:
# Checking the Datatypes of the columns
mobile_df.dtypes

Company                object
model_name             object
color_group            object
Stars                 float64
Num_Ratings             int64
Num_Reviews             int64
Ram                    object
Storage                object
Expandable             object
processor_category     object
display_inches        float64
Num_camera              int64
battery_mah             int64
Price                   int64
dtype: object

In [4]:
# Replacing the repeated typo errors
mobile_df.Company.replace({"realme":"Realme","Redmi":"REDMI","Motorola":"MOTOROLA","oppo":"OPPO"},inplace=True)

# Dropping the unwanted columns
mobile_df.drop(["model_name",'Stars','Num_Ratings','Num_Reviews'],axis=1,inplace=True)

In [5]:
pd.set_option('display.max_rows',50)

In [6]:
mobile_df

Unnamed: 0,Company,color_group,Ram,Storage,Expandable,processor_category,display_inches,Num_camera,battery_mah,Price
0,SAMSUNG,Blue,4 GB RAM,64 GB ROM,Expandable Upto 1 TB,Exynos processor,16.76,3,6000,9699
1,POCO,Green,2 GB RAM,32 GB ROM,Expandable Upto 512 GB,Mediatek processor,16.56,1,5000,5749
2,POCO,Blue,2 GB RAM,32 GB ROM,Expandable Upto 512 GB,Mediatek processor,16.56,1,5000,5749
3,REDMI,Green,4 GB RAM,64 GB ROM,Expandable Upto 1 TB,Qualcomm processor,17.02,2,6000,9299
4,REDMI,Blue,4 GB RAM,64 GB ROM,Expandable Upto 1 TB,Qualcomm processor,17.02,2,6000,9299
...,...,...,...,...,...,...,...,...,...,...
612,SAMSUNG,Black,8 GB RAM,128 GB ROM,Expandable Upto 1 TB,Qualcomm processor,16.76,3,4500,59999
613,Infinix,Blue,3 GB RAM,16 GB ROM,Expandable Upto 1 TB,Qualcomm processor,13.84,1,3000,4699
614,SAMSUNG,Blue,4 GB RAM,128 GB ROM,Expandable Upto 1 TB,Mediatek processor,16.51,2,5000,11499
615,Infinix,Green,4 GB RAM,64 GB ROM,Expandable Upto 1 TB,Unisoc processor,17.02,2,5000,9999


In [7]:
# Define a regular expression pattern to extract only the digits from the string
pattern = r'(\d+)'

# Loop through the columns that need to be extracted and converted
for col in ['Ram', 'Storage', 'Expandable']:
    # Extract the digits from the string using regex
    mobile_df[col] = mobile_df[col].str.extract(pattern, expand=False)
    
    # Convert the extracted digits to integer values
    mobile_df[col] = mobile_df[col].astype(int)

In [8]:
mobile_df

Unnamed: 0,Company,color_group,Ram,Storage,Expandable,processor_category,display_inches,Num_camera,battery_mah,Price
0,SAMSUNG,Blue,4,64,1,Exynos processor,16.76,3,6000,9699
1,POCO,Green,2,32,512,Mediatek processor,16.56,1,5000,5749
2,POCO,Blue,2,32,512,Mediatek processor,16.56,1,5000,5749
3,REDMI,Green,4,64,1,Qualcomm processor,17.02,2,6000,9299
4,REDMI,Blue,4,64,1,Qualcomm processor,17.02,2,6000,9299
...,...,...,...,...,...,...,...,...,...,...
612,SAMSUNG,Black,8,128,1,Qualcomm processor,16.76,3,4500,59999
613,Infinix,Blue,3,16,1,Qualcomm processor,13.84,1,3000,4699
614,SAMSUNG,Blue,4,128,1,Mediatek processor,16.51,2,5000,11499
615,Infinix,Green,4,64,1,Unisoc processor,17.02,2,5000,9999


**Encoding**

In [9]:
mobile_df.Company.replace({'ASUS': 0,
                        'Google': 1, 
                        'Infinix': 2, 
                        'IQOO': 3, 
                        'LAVA': 4, 
                        'Micromax': 5, 
                        'Mi': 6, 
                        'MOTOROLA': 7, 
                        'Nokia': 8, 
                        'Nothing': 9, 
                        'OPPO': 10, 
                        'OnePlus': 11, 
                        'POCO': 12, 
                        'Realme': 13, 
                        'REDMI': 14, 
                        'SAMSUNG': 15, 
                        'Tecno': 16, 
                        'itel': 17, 
                        'vivo': 18, 
                        'Xiaomi': 19}, inplace=True)

In [10]:
mobile_df.color_group.replace({'Black': 0,
'Blue': 1,
'Brown': 2,
'Green': 3,
'Grey': 4,
'Orange': 5,
'Other': 6,
'Red': 7,
'Silver': 8,
'White': 9,
'Yellow': 10},inplace=True)

In [11]:
mobile_df.processor_category.replace({'Exynos processor': 0,
'Google Tensor processor': 1,
'Helio processor': 2,
'Mediatek processor': 3,
'Other processor': 4,
'Qualcomm processor': 5,
'Unisoc processor': 6},inplace=True)

In [12]:
mobile_df

Unnamed: 0,Company,color_group,Ram,Storage,Expandable,processor_category,display_inches,Num_camera,battery_mah,Price
0,15,1,4,64,1,0,16.76,3,6000,9699
1,12,3,2,32,512,3,16.56,1,5000,5749
2,12,1,2,32,512,3,16.56,1,5000,5749
3,14,3,4,64,1,5,17.02,2,6000,9299
4,14,1,4,64,1,5,17.02,2,6000,9299
...,...,...,...,...,...,...,...,...,...,...
612,15,0,8,128,1,5,16.76,3,4500,59999
613,2,1,3,16,1,5,13.84,1,3000,4699
614,15,1,4,128,1,3,16.51,2,5000,11499
615,2,3,4,64,1,6,17.02,2,5000,9999


In [13]:
mobile_df.dtypes

Company                 int64
color_group             int64
Ram                     int32
Storage                 int32
Expandable              int32
processor_category      int64
display_inches        float64
Num_camera              int64
battery_mah             int64
Price                   int64
dtype: object

In [14]:
X = mobile_df.drop(columns=['Price'])
Y = mobile_df['Price']

In [15]:
print(X.shape)
print(Y.shape)

(617, 9)
(617,)


In [16]:
X

Unnamed: 0,Company,color_group,Ram,Storage,Expandable,processor_category,display_inches,Num_camera,battery_mah
0,15,1,4,64,1,0,16.76,3,6000
1,12,3,2,32,512,3,16.56,1,5000
2,12,1,2,32,512,3,16.56,1,5000
3,14,3,4,64,1,5,17.02,2,6000
4,14,1,4,64,1,5,17.02,2,6000
...,...,...,...,...,...,...,...,...,...
612,15,0,8,128,1,5,16.76,3,4500
613,2,1,3,16,1,5,13.84,1,3000
614,15,1,4,128,1,3,16.51,2,5000
615,2,3,4,64,1,6,17.02,2,5000


In [17]:
X.dtypes

Company                 int64
color_group             int64
Ram                     int32
Storage                 int32
Expandable              int32
processor_category      int64
display_inches        float64
Num_camera              int64
battery_mah             int64
dtype: object

In [18]:
Y = Y.astype(int)

In [19]:
# Splitting the data using train-test split

from sklearn.model_selection import train_test_split

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.10,random_state=10)

In [20]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(555, 9)
(62, 9)
(555,)
(62,)


### **Training and evaluating different Machine Learning models**

------------

### **Basic Linear regression model**

In [21]:
# Define the pipeline with linear regression step
pipe = Pipeline([
    ('linear_regression', LinearRegression())
])

# Fit the pipeline on the training data
pipe.fit(X_train, Y_train)

# Predict on the test data
y_pred = pipe.predict(X_test)

# Evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))


R-squared: 54.49%
RMSE: 7786.36
Adjusted R-squared: 46.61%


### **Ridge Regression Model**

In [22]:
# Define the pipeline with linear regression step
pipe = Pipeline([
    ('linear_regression', Ridge())
])

# Fit the pipeline on the training data
pipe.fit(X_train, Y_train)

# Predict on the test data
y_pred = pipe.predict(X_test)

# Evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))


R-squared: 54.48%
RMSE: 7786.78
Adjusted R-squared: 46.61%


### **Lasso Regression Model**

In [23]:
# Define the pipeline with linear regression step
pipe = Pipeline([
    ('linear_regression', Lasso())
])

# Fit the pipeline on the training data
pipe.fit(X_train, Y_train)

# Predict on the test data
y_pred = pipe.predict(X_test)

# Evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))


R-squared: 54.49%
RMSE: 7786.28
Adjusted R-squared: 46.61%


### **Ada-Boost Regressor**

In [24]:
# Define the pipeline with AdaBoost regressor
pipe = Pipeline([
    ('linear_regression', AdaBoostRegressor(n_estimators=50))
])

# Fit the pipeline on the training data
pipe.fit(X_train, Y_train)

# Predict on the test data
y_pred = pipe.predict(X_test)

# Evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))


R-squared: 81.12%
RMSE: 5015.09
Adjusted R-squared: 77.85%


### **K - Nearest Neioghbours Regressor model**

In [25]:
# Define the pipeline with linear regression step
pipe = Pipeline([
    ('KNN_regression', KNeighborsRegressor(n_neighbors=3,metric="euclidean"))
])

# Fit the pipeline on the training data
pipe.fit(X_train, Y_train)

# Predict on the test data
y_pred = pipe.predict(X_test)

# Evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))


R-squared: 78.47%
RMSE: 5355.09
Adjusted R-squared: 74.75%


### **Decision Tree Regressor model**

In [26]:
# Define the pipeline with linear regression step
pipe = Pipeline([
    ('Decision Tree Regressor', DecisionTreeRegressor(max_depth=8))
])

# Fit the pipeline on the training data
pipe.fit(X_train, Y_train)

# Predict on the test data
y_pred = pipe.predict(X_test)

# Evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))


R-squared: 85.39%
RMSE: 4412.10
Adjusted R-squared: 82.86%


### **Gradient Boosting Regressor**

In [27]:
# Define the pipeline with linear regression step
pipe = Pipeline([
    ('Gradient Boosting Regressor', GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0, loss='squared_error'))
])

# Fit the pipeline on the training data
pipe.fit(X_train, Y_train)

# Predict on the test data
y_pred = pipe.predict(X_test)

# Evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))

R-squared: 91.86%
RMSE: 3293.90
Adjusted R-squared: 90.45%


### **XG-Boost Regressor**

In [28]:
# Define the pipeline with linear regression step
pipe = Pipeline([
    ('XG-Boost Regressor', xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0, objective='reg:squarederror')
)
])

# Fit the pipeline on the training data
pipe.fit(X_train, Y_train)

# Predict on the test data
y_pred = pipe.predict(X_test)

# Evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))

R-squared: 90.74%
RMSE: 3511.83
Adjusted R-squared: 89.14%


### **SVM Regressor model**

In [29]:
SVR(kernel='linear',C=5,gamma=0.001)


# Define the pipeline with linear regression step
pipe = Pipeline([
    ('SVM Regressor', SVR(kernel='linear',C=5,gamma=0.001)
)
])

# Fit the pipeline on the training data
pipe.fit(X_train, Y_train)

# Predict on the test data
y_pred = pipe.predict(X_test)

# Evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))

R-squared: 32.91%
RMSE: 9453.79
Adjusted R-squared: 21.30%


### **Voting Regressor**

In [30]:

# define the models to use in the voting regressor
models = [
    ('knn', KNeighborsRegressor(n_neighbors=5)),
    ('dt', DecisionTreeRegressor(max_depth=5)),
    ('rf', RandomForestRegressor(n_estimators=100, random_state=10,
                                  max_samples=0.7, max_features=0.6, max_depth=14))]

# create the voting regressor
vr = VotingRegressor(models)

# create the pipeline
pipe = Pipeline([
    ('vr', vr)
])

# fit the pipeline to the training data
pipe.fit(X_train, Y_train)

# predict on the test data
y_pred = pipe.predict(X_test)

# evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))

R-squared: 75.78%
RMSE: 5679.71
Adjusted R-squared: 71.59%


### **Stacking Regressor**

In [31]:
# define the steps for the pipeline

# define the base regressors
estimators = [
    ('svr', SVR(kernel='rbf', C=5, gamma=0.001)),
    ('lr', LinearRegression()),
    ('rf', RandomForestRegressor(n_estimators=100, random_state=10,
                               max_samples=0.7, max_features=0.6, max_depth=14)),
    ('ridge', Ridge(alpha=0.5)),
    ('dt', DecisionTreeRegressor(max_depth=5))
]

# define the meta regressor
meta_regressor = LinearRegression()

# create the stacking regressor
sr = StackingRegressor(
    estimators=estimators,
    final_estimator=meta_regressor
)

# create the pipeline
pipe = Pipeline([
    ('sr', sr)
])

# fit the pipeline to the training data
pipe.fit(X_train, Y_train)

# predict on the test data
y_pred = pipe.predict(X_test)

# evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))

R-squared: 88.98%
RMSE: 3830.94
Adjusted R-squared: 87.08%


### **Random Forest Regressor**

In [32]:
# define the steps for the pipeline

step1 = RandomForestRegressor(n_estimators=200, random_state=10,
                               max_samples=0.9, max_features=0.6, max_depth=14)

# create the pipeline
pipe = Pipeline([
    ('step1', step1)
])

# fit the pipeline to the training data
pipe.fit(X_train, Y_train)

# predict on the test data
y_pred = pipe.predict(X_test)

# evaluate the model
r2 = r2_score(Y_test, y_pred)
print('R-squared: {:.2f}%'.format(r2*100))

rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
print('RMSE: {:.2f}'.format(rmse))

adjusted_r_squared = 1 - (1 - r2) * (len(Y_test) - 1) / (len(Y_test) - X_test.shape[1] - 1)
print('Adjusted R-squared: {:.2f}%'.format(adjusted_r_squared*100))

R-squared: 92.35%
RMSE: 3192.31
Adjusted R-squared: 91.03%


**Tabulating the Results**

In [33]:
import pandas as pd

data = {'Model': ['Basic Linear Regression', 'Ridge Regression', 'Lasso Regression', 'Ada-Boost Regressor', 
                  'K-Nearest Neighbors Regressor', 'Decision Tree Regressor', 'Gradient Boosting Regressor', 
                  'XG-Boost Regressor', 'SVM Regressor', 'Voting Regressor', 'Stacking Regressor', 'Random Forest Regressor'],
        'R-squared': ['54.49%', '54.48%', '54.49%', '81.55%', '78.47%', '90.04%', '91.86%', '90.74%', 
                      '32.91%', '75.78%', '88.72%', '92.35%'],
        'RMSE': ['7786.36', '7786.78', '7786.28', '4957.09', '5355.09', '3641.98', '3293.90', '3511.83', 
                 '9453.79', '5679.71', '3876.12', '3192.31'],
        'Adjusted R-squared': ['46.61%', '46.61%', '46.61%', '78.36%', '74.75%', '88.32%', '90.45%', 
                               '89.14%', '21.30%', '71.59%', '86.77%', '91.03%']
       }

results_df = pd.DataFrame(data)

In [34]:
results_df

Unnamed: 0,Model,R-squared,RMSE,Adjusted R-squared
0,Basic Linear Regression,54.49%,7786.36,46.61%
1,Ridge Regression,54.48%,7786.78,46.61%
2,Lasso Regression,54.49%,7786.28,46.61%
3,Ada-Boost Regressor,81.55%,4957.09,78.36%
4,K-Nearest Neighbors Regressor,78.47%,5355.09,74.75%
5,Decision Tree Regressor,90.04%,3641.98,88.32%
6,Gradient Boosting Regressor,91.86%,3293.9,90.45%
7,XG-Boost Regressor,90.74%,3511.83,89.14%
8,SVM Regressor,32.91%,9453.79,21.30%
9,Voting Regressor,75.78%,5679.71,71.59%


**From the `Results dataframe` we can see that the best model that we got is the *Random Forest* with the accuracy of `92.35%`. So we conclude with it as the best model and export the model object and the dtaframe**

In [35]:
mobile_df

Unnamed: 0,Company,color_group,Ram,Storage,Expandable,processor_category,display_inches,Num_camera,battery_mah,Price
0,15,1,4,64,1,0,16.76,3,6000,9699
1,12,3,2,32,512,3,16.56,1,5000,5749
2,12,1,2,32,512,3,16.56,1,5000,5749
3,14,3,4,64,1,5,17.02,2,6000,9299
4,14,1,4,64,1,5,17.02,2,6000,9299
...,...,...,...,...,...,...,...,...,...,...
612,15,0,8,128,1,5,16.76,3,4500,59999
613,2,1,3,16,1,5,13.84,1,3000,4699
614,15,1,4,128,1,3,16.51,2,5000,11499
615,2,3,4,64,1,6,17.02,2,5000,9999


In [36]:
mobile_df.dtypes

Company                 int64
color_group             int64
Ram                     int32
Storage                 int32
Expandable              int32
processor_category      int64
display_inches        float64
Num_camera              int64
battery_mah             int64
Price                   int64
dtype: object

In [37]:
import pickle

pickle.dump(mobile_df,open('mobile_df.pkl','wb'))
pickle.dump(pipe,open('pipe.pkl','wb'))

**Kindly ignore this part --- this was just Debugging**

In [38]:
data = [[15, 1, 4, 64, 1, 0, 16.76, 3, 6000]]

data_array = np.array(data)


In [39]:
print(data)

[[15, 1, 4, 64, 1, 0, 16.76, 3, 6000]]


In [40]:
pipe.predict(data)

array([10029.52305556])

In [41]:
"""

query = np.array([15, 1, 4, 64, 1, 0, 16.76, 3, 6000])

query = query.reshape(1,9)

print("The predicted price of this configuration is " + str(int(np.exp(pipe.predict(query)[0]))),"Rs.")

"""

'\n\nquery = np.array([15, 1, 4, 64, 1, 0, 16.76, 3, 6000])\n\nquery = query.reshape(1,9)\n\nprint("The predicted price of this configuration is " + str(int(np.exp(pipe.predict(query)[0]))),"Rs.")\n\n'

In [42]:
query = np.array([15, 1, 4, 64, 1, 0, 16.76, 3, 6000])
query = query.reshape(1,9)

predicted_price = int(pipe.predict(query)[0])
print("The predicted price of this configuration is " + str(predicted_price) + " Rs.")

The predicted price of this configuration is 10029 Rs.


In [43]:
import numpy as np
print(np.__version__)

1.21.5


In [44]:
import pandas as pd
print(pd.__version__)

1.3.4


In [45]:
import streamlit as st
print(st.__version__)

1.23.1


In [46]:
import sklearn as sk
print(sk.__version__)

1.0.2
