In [42]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression , ElasticNet 
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import LabelEncoder

In [43]:
# STEPS FOR PREDICTIVE ANALYSIS
# 1. UNDERSTAND THE OBJECT AND GOAL OF THE PROJECT
# 2. GET THE RIGHT DATA NEEDED FOR THE PROJECT
# 3. CLEAN AND PROCESS THE DATA IF NEED ARISE 
# 4. DESCRIPTIVE ANALYSIS IF STATED IN THE DESCRIPTION OF THE PROJECT
# 5. VISUAL ANALYSIS IF STATED IN THE DESCRIPTION OF THE PROJECT
# 6. CONVERSION OF STRING OR CATEGORICAL COLUMN TO NUMERIC
# 7. FOR SUPERVISED MACHINE LEARNING SEPARATE THE INDEPENDENTS VARIABLE
# FROM THE DEPENDENT VARIABLE WHILE UNSUPERVISED KNOW THW TARGET COLUMN
# 8. FOR THE SUPERVISED MACHINE LEARNING RANDOMLY SPLIT YOUR DATA INTO TRAIN AND TEST SAMPLE
# THE TRAIN DATA TO TRAIN AND BUILD THE MODEL WHILE THE TEST DATA TO EVALUATE THE MODEL 
# 9. THE EVALUATION OF THE MODEL

In [45]:
df = pd.read_csv('used_device_data.csv')
df.head()

Unnamed: 0,device_brand,os,screen_size,4g,5g,rear_camera_mp,front_camera_mp,internal_memory,ram,battery,weight,release_year,days_used,normalized_used_price,normalized_new_price
0,Honor,Android,14.5,yes,no,13.0,5.0,64.0,3.0,3020.0,146.0,2020,127,4.307572,4.7151
1,Honor,Android,17.3,yes,yes,13.0,16.0,128.0,8.0,4300.0,213.0,2020,325,5.162097,5.519018
2,Honor,Android,16.69,yes,yes,13.0,8.0,128.0,8.0,4200.0,213.0,2020,162,5.111084,5.884631
3,Honor,Android,25.5,yes,yes,13.0,8.0,64.0,6.0,7250.0,480.0,2020,345,5.135387,5.630961
4,Honor,Android,15.32,yes,no,13.0,8.0,64.0,3.0,5000.0,185.0,2020,293,4.389995,4.947837


In [46]:
df.isnull().sum()

device_brand               0
os                         0
screen_size                0
4g                         0
5g                         0
rear_camera_mp           179
front_camera_mp            2
internal_memory            4
ram                        4
battery                    6
weight                     7
release_year               0
days_used                  0
normalized_used_price      0
normalized_new_price       0
dtype: int64

In [47]:
df.shape

(3454, 15)

In [50]:
df['rear_camera_mp'].fillna(round(np.mean(df['rear_camera_mp']),1),inplace=True)
df['front_camera_mp'].fillna(method='ffill',inplace=True)
df['internal_memory'].fillna(method='ffill',inplace=True)
df['ram'].fillna(method='ffill',inplace=True)
df['battery'].fillna(method='ffill',inplace=True)
df['weight'].fillna(method='ffill',inplace=True)


In [51]:
df.isnull().sum()

device_brand             0
os                       0
screen_size              0
4g                       0
5g                       0
rear_camera_mp           0
front_camera_mp          0
internal_memory          0
ram                      0
battery                  0
weight                   0
release_year             0
days_used                0
normalized_used_price    0
normalized_new_price     0
dtype: int64

In [52]:
pd.pivot_table(df,index='device_brand', columns = 'os',values='battery')

os,Android,Others,Windows,iOS
device_brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Acer,3006.458333,4500.0,2085.0,
Alcatel,2561.100917,1470.833333,,
Apple,,8105.833333,,4339.166667
Asus,3499.836066,,,
BlackBerry,3402.0,2339.583333,,
Celkon,1427.272727,1572.727273,,
Coolpad,2945.0,,,
Gionee,3117.946429,,,
Google,3628.333333,,,
HTC,2622.336449,,2333.333333,


In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3454 entries, 0 to 3453
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   device_brand           3454 non-null   object 
 1   os                     3454 non-null   object 
 2   screen_size            3454 non-null   float64
 3   4g                     3454 non-null   object 
 4   5g                     3454 non-null   object 
 5   rear_camera_mp         3454 non-null   float64
 6   front_camera_mp        3454 non-null   float64
 7   internal_memory        3454 non-null   float64
 8   ram                    3454 non-null   float64
 9   battery                3454 non-null   float64
 10  weight                 3454 non-null   float64
 11  release_year           3454 non-null   int64  
 12  days_used              3454 non-null   int64  
 13  normalized_used_price  3454 non-null   float64
 14  normalized_new_price   3454 non-null   float64
dtypes: f

In [55]:
df['4g']=pd.get_dummies(df['4g'],drop_first=True)
df.head()

Unnamed: 0,device_brand,os,screen_size,4g,5g,rear_camera_mp,front_camera_mp,internal_memory,ram,battery,weight,release_year,days_used,normalized_used_price,normalized_new_price
0,Honor,Android,14.5,1,no,13.0,5.0,64.0,3.0,3020.0,146.0,2020,127,4.307572,4.7151
1,Honor,Android,17.3,1,yes,13.0,16.0,128.0,8.0,4300.0,213.0,2020,325,5.162097,5.519018
2,Honor,Android,16.69,1,yes,13.0,8.0,128.0,8.0,4200.0,213.0,2020,162,5.111084,5.884631
3,Honor,Android,25.5,1,yes,13.0,8.0,64.0,6.0,7250.0,480.0,2020,345,5.135387,5.630961
4,Honor,Android,15.32,1,no,13.0,8.0,64.0,3.0,5000.0,185.0,2020,293,4.389995,4.947837


In [56]:
categorical=['device_brand','os','5g']
enc = LabelEncoder()
for x in categorical:
    df[x] = enc.fit_transform(df[x])
    
df.head()

Unnamed: 0,device_brand,os,screen_size,4g,5g,rear_camera_mp,front_camera_mp,internal_memory,ram,battery,weight,release_year,days_used,normalized_used_price,normalized_new_price
0,10,0,14.5,1,0,13.0,5.0,64.0,3.0,3020.0,146.0,2020,127,4.307572,4.7151
1,10,0,17.3,1,1,13.0,16.0,128.0,8.0,4300.0,213.0,2020,325,5.162097,5.519018
2,10,0,16.69,1,1,13.0,8.0,128.0,8.0,4200.0,213.0,2020,162,5.111084,5.884631
3,10,0,25.5,1,1,13.0,8.0,64.0,6.0,7250.0,480.0,2020,345,5.135387,5.630961
4,10,0,15.32,1,0,13.0,8.0,64.0,3.0,5000.0,185.0,2020,293,4.389995,4.947837


In [57]:
df.columns

Index(['device_brand', 'os', 'screen_size', '4g', '5g', 'rear_camera_mp',
       'front_camera_mp', 'internal_memory', 'ram', 'battery', 'weight',
       'release_year', 'days_used', 'normalized_used_price',
       'normalized_new_price'],
      dtype='object')

In [58]:
X = df[['device_brand', 'os', 'screen_size', '4g', '5g', 'rear_camera_mp',
       'front_camera_mp', 'internal_memory', 'ram', 'battery', 'weight',
       'release_year', 'days_used', 'normalized_used_price',
       'normalized_new_price']]
y = df['normalized_new_price']

In [60]:
xtrain,xtest,ytrain,ytest = train_test_split(X,y, test_size=0.2,random_state=1)

In [61]:
model1 = LinearRegression()
model1.fit(xtrain,ytrain)

LinearRegression()

In [62]:
pred1 = model1.predict(xtest)
pred1

array([5.19078783, 3.88465181, 4.48728714, 5.63942172, 5.30041517,
       5.63432521, 4.24434378, 5.79981966, 6.04065939, 4.87900685,
       4.61788896, 5.99173951, 5.24539116, 5.48197137, 5.29896715,
       3.67832394, 5.55563043, 5.63782072, 5.99256394, 5.85944629,
       5.94253687, 5.85901828, 5.70458215, 5.29916701, 4.70238764,
       5.18660338, 4.86730365, 5.73466726, 5.80163487, 5.29370675,
       5.13573961, 4.86838025, 5.18872569, 5.19162263, 5.38993893,
       5.00996841, 5.99386167, 6.78086465, 5.01309892, 4.50313746,
       5.63653807, 4.6903382 , 4.86290838, 5.70684445, 5.70381581,
       5.01946288, 4.90126676, 5.02137739, 5.38980206, 5.63814112,
       4.37134479, 5.80000133, 4.87718009, 6.59486479, 4.2297492 ,
       6.52287191, 5.99376191, 5.29981624, 5.4405545 , 5.75215985,
       5.19517661, 4.78189276, 5.13326582, 4.40549899, 5.44155154,
       4.7061913 , 5.43686118, 5.795328  , 5.63514668, 6.2146681 ,
       5.76713279, 5.63096085, 5.51278338, 5.98941244, 5.94174

In [67]:
print(mean_absolute_error(ytest,pred1))
print(mean_absolute_percentage_error(ytest,pred1))
accuracy1 = 100 - np.mean(mean_absolute_percentage_error(ytest,pred1))
print(accuracy1)

9.13885465132835e-16
1.808186844408996e-16
100.0


In [69]:
model2 = ElasticNet (alpha= 1.02 , l1_ratio= 0.5)
model2.fit(xtrain,ytrain)

ElasticNet(alpha=1.02)

In [71]:
pred2 = model2.predict(xtest)
pred2

array([5.07006275, 5.16176619, 5.50668766, 5.46697366, 5.31595611,
       5.36539271, 4.67366884, 5.51059117, 5.5202987 , 4.91444877,
       4.80254605, 5.52546016, 5.30727889, 5.35326314, 5.66090383,
       5.35644254, 5.30748869, 5.1375363 , 5.9488347 , 5.34918607,
       5.40947883, 5.35884582, 5.61185846, 5.11773771, 5.29077715,
       5.24215138, 5.00064745, 5.14545441, 5.48279112, 5.38292009,
       5.69383144, 5.20930063, 5.36565489, 5.32381315, 5.43277761,
       4.93688745, 5.56463836, 5.91843444, 5.31966138, 5.13328243,
       6.06034135, 4.90170034, 5.17414071, 5.30530874, 5.55857023,
       4.8994258 , 5.6023119 , 4.65513686, 5.46143404, 5.53687359,
       5.000872  , 5.74134143, 5.43776704, 5.91033156, 4.90888128,
       5.21476914, 5.44970138, 5.18918326, 5.28612717, 5.87054283,
       5.32298185, 5.64636319, 4.97686672, 5.24286816, 4.96105923,
       4.74299352, 5.28971012, 5.68836768, 5.61248025, 5.51757247,
       5.61262223, 6.19955807, 5.39625967, 5.84287494, 5.28078

In [73]:
print(mean_absolute_error(ytest,pred2))
print(mean_absolute_percentage_error(ytest,pred2))
accuracy2 = 100 - np.mean(mean_absolute_percentage_error(ytest,pred2))
print(accuracy2)

0.4273306018482224
0.08544048780678684
99.91455951219321


In [74]:
model3 = DecisionTreeRegressor()
model3.fit(xtrain,ytrain)

DecisionTreeRegressor()

In [75]:
pred3 = model3.predict(xtest)
pred3

array([5.19067648, 3.8918203 , 4.48751214, 5.63917284, 5.30051495,
       5.63414654, 4.24420032, 5.79994078, 6.03996896, 4.87832221,
       4.61709876, 5.99173951, 5.24512754, 5.48147191, 5.29901712,
       3.67478053, 5.5560942 , 5.63913728, 5.9927637 , 5.85921804,
       5.94190659, 5.85921804, 5.70478198, 5.29916701, 4.70175228,
       5.18587629, 4.86738059, 5.73482884, 5.80229963, 5.29385744,
       5.13573961, 4.86899492, 5.18917192, 5.19173388, 5.39030385,
       5.01003511, 5.9934376 , 6.77984236, 5.01309892, 4.50302671,
       5.63742897, 4.6903382 , 4.86290838, 5.70674475, 5.7033157 ,
       5.0196611 , 4.90126676, 5.02117951, 5.39030385, 5.63778511,
       4.37159744, 5.80066717, 4.87557893, 6.5940714 , 4.22814675,
       6.53497999, 5.99333779, 5.29981624, 5.44003392, 5.75215986,
       5.19539831, 4.78197656, 5.13320685, 4.40659729, 5.44129154,
       4.7067335 , 5.43690471, 5.79648319, 5.63546794, 6.2146081 ,
       5.76703892, 5.63085329, 5.51330777, 5.98856033, 5.94190

In [76]:
print(mean_absolute_error(ytest,pred3))
print(mean_absolute_percentage_error(ytest,pred3))
accuracy3 = 100 - np.mean(mean_absolute_percentage_error(ytest,pred3))
print(accuracy3)

0.002631277764109976
0.0004940139533428345
99.99950598604666
