In [1]:
!python --version

Python 3.10.9


### Problem statement

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler

import pickle
import json

### Data Gathering

In [4]:
df = pd.read_csv('Cellphone.csv')
df

Unnamed: 0,Product_id,Price,Sale,weight,resoloution,ppi,cpu_core,cpu_freq,internal_mem,ram,RearCam,Front_Cam,battery,thickness
0,203,2357,10,135.0,5.20,424,8,1.350,16.0,3.000,13.00,8.0,2610,7.4
1,880,1749,10,125.0,4.00,233,2,1.300,4.0,1.000,3.15,0.0,1700,9.9
2,40,1916,10,110.0,4.70,312,4,1.200,8.0,1.500,13.00,5.0,2000,7.6
3,99,1315,11,118.5,4.00,233,2,1.300,4.0,0.512,3.15,0.0,1400,11.0
4,880,1749,11,125.0,4.00,233,2,1.300,4.0,1.000,3.15,0.0,1700,9.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,1206,3551,4638,178.0,5.46,538,4,1.875,128.0,6.000,12.00,16.0,4080,8.4
157,1296,3211,8016,170.0,5.50,534,4,1.975,128.0,6.000,20.00,8.0,3400,7.9
158,856,3260,8809,150.0,5.50,401,8,2.200,64.0,4.000,20.00,20.0,3000,6.8
159,1296,3211,8946,170.0,5.50,534,4,1.975,128.0,6.000,20.00,8.0,3400,7.9


### EDA

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 161 entries, 0 to 160
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Product_id    161 non-null    int64  
 1   Price         161 non-null    int64  
 2   Sale          161 non-null    int64  
 3   weight        161 non-null    float64
 4   resoloution   161 non-null    float64
 5   ppi           161 non-null    int64  
 6   cpu_core      161 non-null    int64  
 7   cpu_freq      161 non-null    float64
 8   internal_mem  161 non-null    float64
 9   ram           161 non-null    float64
 10  RearCam       161 non-null    float64
 11  Front_Cam     161 non-null    float64
 12  battery       161 non-null    int64  
 13  thickness     161 non-null    float64
dtypes: float64(8), int64(6)
memory usage: 17.7 KB


In [6]:
df.dtypes

Product_id        int64
Price             int64
Sale              int64
weight          float64
resoloution     float64
ppi               int64
cpu_core          int64
cpu_freq        float64
internal_mem    float64
ram             float64
RearCam         float64
Front_Cam       float64
battery           int64
thickness       float64
dtype: object

In [7]:
df.isna().sum()

Product_id      0
Price           0
Sale            0
weight          0
resoloution     0
ppi             0
cpu_core        0
cpu_freq        0
internal_mem    0
ram             0
RearCam         0
Front_Cam       0
battery         0
thickness       0
dtype: int64

### Feature Engineering

In [8]:
x = df.drop(['Price','Product_id','Sale'],axis=1)
y = df['Price']

In [9]:
scaler = MinMaxScaler()
arr = scaler.fit_transform(x)
x_scale = pd.DataFrame(arr, columns = x.columns)
x_scale

Unnamed: 0,weight,resoloution,ppi,cpu_core,cpu_freq,internal_mem,ram,RearCam,Front_Cam,battery,thickness
0,0.100437,0.351852,0.442336,1.00,0.500000,0.12500,0.500000,0.565217,0.40,0.208046,0.171642
1,0.085881,0.240741,0.163504,0.25,0.481481,0.03125,0.166667,0.136957,0.00,0.103448,0.358209
2,0.064047,0.305556,0.278832,0.50,0.444444,0.06250,0.250000,0.565217,0.25,0.137931,0.186567
3,0.076419,0.240741,0.163504,0.25,0.481481,0.03125,0.085333,0.136957,0.00,0.068966,0.440299
4,0.085881,0.240741,0.163504,0.25,0.481481,0.03125,0.166667,0.136957,0.00,0.103448,0.358209
...,...,...,...,...,...,...,...,...,...,...,...
156,0.163028,0.375926,0.608759,0.50,0.694444,1.00000,1.000000,0.521739,0.80,0.377011,0.246269
157,0.151383,0.379630,0.602920,0.50,0.731481,1.00000,1.000000,0.869565,0.40,0.298851,0.208955
158,0.122271,0.379630,0.408759,1.00,0.814815,0.50000,0.666667,0.869565,1.00,0.252874,0.126866
159,0.151383,0.379630,0.602920,0.50,0.731481,1.00000,1.000000,0.869565,0.40,0.298851,0.208955


### Model Training

In [10]:
x_scale_train,x_scale_test,y_train,y_test = train_test_split(x_scale,y,test_size=0.2,random_state=10)

In [11]:
lin_reg = LinearRegression()
lin_reg.fit(x_scale_train,y_train)

### Model Evaluation

In [12]:
train_score = lin_reg.score(x_scale_train,y_train)
print(f'train_score : {train_score}')

test_score = lin_reg.score(x_scale_test,y_test)
print(f'test_score : {test_score}')

train_score : 0.9601409108985639
test_score : 0.9081632081978334


### Save the files

In [13]:
with open('model.pkl','wb') as file:
    pickle.dump(lin_reg,file)

In [14]:
with open('scale.pkl','wb') as file:
    pickle.dump(scaler,file)

In [15]:
x.columns.tolist()

['weight',
 'resoloution',
 'ppi',
 'cpu_core',
 'cpu_freq',
 'internal_mem',
 'ram',
 'RearCam',
 'Front_Cam',
 'battery',
 'thickness']

In [16]:
project_data = {"column_names":x.columns.tolist()}
project_data         

{'column_names': ['weight',
  'resoloution',
  'ppi',
  'cpu_core',
  'cpu_freq',
  'internal_mem',
  'ram',
  'RearCam',
  'Front_Cam',
  'battery',
  'thickness']}

In [17]:
with open('project_data.json','w') as file:
    json.dump(project_data,file)

### User Defined Function

In [18]:
weight = 135
resoloution = 5.20
ppi = 424
cpu_core = 8
cpu_freq = 1.35
internal_mem = 16
ram = 3
RearCam = 13
Front_Cam = 8
battery = 2610
thickness = 7.4

user_data = np.zeros(len(x.columns))
user_data[0] = weight
user_data[1] = resoloution
user_data[2] = ppi
user_data[3] = cpu_core
user_data[4] = cpu_freq
user_data[5] = internal_mem
user_data[6] = ram
user_data[7] = RearCam
user_data[8] = Front_Cam
user_data[9] = battery
user_data[10] = thickness

### Scaling the user data 
user_data_scale = scaler.transform([user_data])
print(user_data_scale)

result = lin_reg.predict(user_data_scale)
result

[[0.10043668 0.35185185 0.44233577 1.         0.5        0.125
  0.5        0.56521739 0.4        0.20804598 0.17164179]]


array([2666.85818505])