# Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder

# In this project we predicting the laptop prices by their different key points 

# Accesing Dataset

In [2]:
df=pd.read_csv("Laptop price prediction dataset.csv")
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,brand,name,price,spec_rating,processor,CPU,Ram,Ram_type,ROM,ROM_type,GPU,display_size,resolution_width,resolution_height,OS,warranty
0,0,0,HP,Victus 15-fb0157AX Gaming Laptop,49900,73.0,5th Gen AMD Ryzen 5 5600H,"Hexa Core, 12 Threads",8GB,DDR4,512GB,SSD,4GB AMD Radeon RX 6500M,15.6,1920,1080,Windows 11 OS,1
1,1,1,HP,15s-fq5007TU Laptop,39900,60.0,12th Gen Intel Core i3 1215U,"Hexa Core (2P + 4E), 8 Threads",8GB,DDR4,512GB,SSD,Intel UHD Graphics,15.6,1920,1080,Windows 11 OS,1
2,2,2,Acer,One 14 Z8-415 Laptop,26990,69.323529,11th Gen Intel Core i3 1115G4,"Dual Core, 4 Threads",8GB,DDR4,512GB,SSD,Intel Iris Xe Graphics,14.0,1920,1080,Windows 11 OS,1
3,3,3,Lenovo,Yoga Slim 6 14IAP8 82WU0095IN Laptop,59729,66.0,12th Gen Intel Core i5 1240P,"12 Cores (4P + 8E), 16 Threads",16GB,LPDDR5,512GB,SSD,Intel Integrated Iris Xe,14.0,2240,1400,Windows 11 OS,1
4,4,4,Apple,MacBook Air 2020 MGND3HN Laptop,69990,69.323529,Apple M1,Octa Core (4P + 4E),8GB,DDR4,256GB,SSD,Apple M1 Integrated Graphics,13.3,2560,1600,Mac OS,1


In [3]:
df.shape

(893, 18)

# Here we Drop unnecessary columns 

In [4]:
df.drop(['Unnamed: 0.1','Unnamed: 0','name','processor','CPU','Ram_type','ROM_type','GPU','display_size','resolution_width','resolution_height','warranty'],axis=1,inplace=True)

In [5]:
df

Unnamed: 0,brand,price,spec_rating,Ram,ROM,OS
0,HP,49900,73.000000,8GB,512GB,Windows 11 OS
1,HP,39900,60.000000,8GB,512GB,Windows 11 OS
2,Acer,26990,69.323529,8GB,512GB,Windows 11 OS
3,Lenovo,59729,66.000000,16GB,512GB,Windows 11 OS
4,Apple,69990,69.323529,8GB,256GB,Mac OS
...,...,...,...,...,...,...
888,Asus,44990,69.323529,8GB,512GB,Windows 11 OS
889,Asus,110000,71.000000,16GB,1TB,Windows 11 OS
890,Asus,189990,89.000000,32GB,1TB,Windows 11 OS
891,Asus,129990,73.000000,16GB,512GB,Windows 11 OS


#  let's check  there is any null value ?

In [6]:
df.isnull().sum()

brand          0
price          0
spec_rating    0
Ram            0
ROM            0
OS             0
dtype: int64

In [7]:
df.describe()

Unnamed: 0,price,spec_rating
count,893.0,893.0
mean,79907.409854,69.379026
std,60880.043823,5.541555
min,9999.0,60.0
25%,44500.0,66.0
50%,61990.0,69.323529
75%,90990.0,71.0
max,450039.0,89.0


In [8]:
df.shape

(893, 6)

In [9]:
df['brand'].value_counts()

HP           186
Lenovo       169
Asus         157
Dell         107
Acer          84
MSI           65
Samsung       28
Apple         16
Infinix       15
LG             9
Xiaomi         8
Gigabyte       8
Fujitsu        6
Zebronics      4
Ultimus        4
Tecno          3
Wings          3
Chuwi          3
Realme         3
Microsoft      2
AXL            2
Huawei         2
Honor          2
Walker         1
Vaio           1
Avita          1
Primebook      1
Ninkear        1
Razer          1
iBall          1
Name: brand, dtype: int64

In [10]:
df['Ram'].value_counts()

16GB    456
8GB     369
32GB     40
4GB      22
64GB      3
12GB      2
2GB       1
Name: Ram, dtype: int64

In [11]:
df['ROM'].value_counts()

512GB    634
1TB      188
256GB     42
128GB     12
2TB       10
64GB       5
32GB       2
Name: ROM, dtype: int64

In [12]:
df['OS'].value_counts()

Windows 11 OS         798
Windows 10 OS          38
DOS OS                 18
Mac OS                 12
Chrome OS               9
Windows OS              9
Ubuntu OS               2
DOS 3.0 OS              2
Mac 10.15.3\t OS        2
Android 11 OS           1
Mac Catalina OS         1
Mac High Sierra OS      1
Name: OS, dtype: int64

# Data Cleaning

#  using onehot encoding WE convert all categorical columns into a numerical

# here we coverting 'os' column

In [13]:
df1=df[['OS']]
df1.head()

Unnamed: 0,OS
0,Windows 11 OS
1,Windows 11 OS
2,Windows 11 OS
3,Windows 11 OS
4,Mac OS


In [14]:
oh_enc = OneHotEncoder(sparse=False,)
oh_enc_arr = oh_enc.fit_transform(df1[['OS']])

oh_enc_arr



array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [15]:
dummy_df=pd.get_dummies(df1[['OS']])
dummy_df.head(4)

Unnamed: 0,OS_Android 11 OS,OS_Chrome OS,OS_DOS 3.0 OS,OS_DOS OS,OS_Mac 10.15.3\t OS,OS_Mac Catalina OS,OS_Mac High Sierra OS,OS_Mac OS,OS_Ubuntu OS,OS_Windows 10 OS,OS_Windows 11 OS,OS_Windows OS
0,0,0,0,0,0,0,0,0,0,0,1,0
1,0,0,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,1,0


In [16]:
oh_enc = OneHotEncoder(sparse=False,drop='first')
oh_enc_arr = oh_enc.fit_transform(df1[['OS']])
oh_enc_arr



array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [17]:
oh_enc_df = pd.DataFrame(oh_enc_arr, columns=['OS_Chrome OS','OS_DOS 3.0 OS','OS_DOS OS','OS_Mac 10.15.3\t OS','OS_Mac Catalina OS','OS_Mac High Sierra OS','OS_Mac OS','OS_Ubuntu OS','OS_Windows 10 OS','OS_Windows 11 OS','OS_Windows OS'] ,)

oh_enc_df

Unnamed: 0,OS_Chrome OS,OS_DOS 3.0 OS,OS_DOS OS,OS_Mac 10.15.3\t OS,OS_Mac Catalina OS,OS_Mac High Sierra OS,OS_Mac OS,OS_Ubuntu OS,OS_Windows 10 OS,OS_Windows 11 OS,OS_Windows OS
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
888,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
890,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
891,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [18]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_Windows OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_Windows OS']].copy()


In [19]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_Windows 11 OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_Windows 11 OS']].copy()


In [20]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_Windows 10 OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_Windows 10 OS']].copy()


In [21]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_Ubuntu OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_Ubuntu OS']].copy()


In [22]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_Mac OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_Mac OS']].copy()


In [23]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_Mac High Sierra OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_Mac High Sierra OS']].copy()


In [24]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_Mac Catalina OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_Mac Catalina OS']].copy()


In [25]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_Mac 10.15.3\t OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_Mac 10.15.3\t OS']].copy()


In [26]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_DOS OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_DOS OS']].copy()


In [27]:
df1['OS_Chrome OS'] = oh_enc_df[['OS_DOS 3.0 OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS_Chrome OS'] = oh_enc_df[['OS_DOS 3.0 OS']].copy()


In [28]:
df1['OS'] = oh_enc_df[['OS_Chrome OS']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['OS'] = oh_enc_df[['OS_Chrome OS']].copy()


In [29]:
df.update(df1)
df.head()

Unnamed: 0,brand,price,spec_rating,Ram,ROM,OS
0,HP,49900,73.0,8GB,512GB,0.0
1,HP,39900,60.0,8GB,512GB,0.0
2,Acer,26990,69.323529,8GB,512GB,0.0
3,Lenovo,59729,66.0,16GB,512GB,0.0
4,Apple,69990,69.323529,8GB,256GB,0.0


# here we coverting 'Rom' column

In [30]:
df2=df[['ROM']]
df2.head()

Unnamed: 0,ROM
0,512GB
1,512GB
2,512GB
3,512GB
4,256GB


In [31]:
oh_enc = OneHotEncoder(sparse=False,)
oh_enc_arr = oh_enc.fit_transform(df2[['ROM']])

oh_enc_arr



array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 1., 0., ..., 0., 0., 0.]])

In [32]:
dummy_df=pd.get_dummies(df2[['ROM']])
dummy_df.head(4)

Unnamed: 0,ROM_128GB,ROM_1TB,ROM_256GB,ROM_2TB,ROM_32GB,ROM_512GB,ROM_64GB
0,0,0,0,0,0,1,0
1,0,0,0,0,0,1,0
2,0,0,0,0,0,1,0
3,0,0,0,0,0,1,0


In [33]:
oh_enc = OneHotEncoder(sparse=False,drop='first')
oh_enc_arr = oh_enc.fit_transform(df2[['ROM']])
oh_enc_arr



array([[0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       ...,
       [1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0.]])

In [34]:
oh_enc_df = pd.DataFrame(oh_enc_arr, columns=['ROM_1TB','ROM_256GB','ROM_2TB','ROM_32GB','ROM_512GB','ROM_64GB'] ,)

oh_enc_df

Unnamed: 0,ROM_1TB,ROM_256GB,ROM_2TB,ROM_32GB,ROM_512GB,ROM_64GB
0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
888,0.0,0.0,0.0,0.0,1.0,0.0
889,1.0,0.0,0.0,0.0,0.0,0.0
890,1.0,0.0,0.0,0.0,0.0,0.0
891,0.0,0.0,0.0,0.0,1.0,0.0


In [35]:
df2['ROM_1TB'] = oh_enc_df[['ROM_64GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['ROM_1TB'] = oh_enc_df[['ROM_64GB']].copy()


In [36]:
df2['ROM_1TB'] = oh_enc_df[['ROM_512GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['ROM_1TB'] = oh_enc_df[['ROM_512GB']].copy()


In [37]:
df2['ROM_1TB'] = oh_enc_df[['ROM_32GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['ROM_1TB'] = oh_enc_df[['ROM_32GB']].copy()


In [38]:
df2['ROM_1TB'] = oh_enc_df[['ROM_2TB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['ROM_1TB'] = oh_enc_df[['ROM_2TB']].copy()


In [39]:
df2['ROM_1TB'] = oh_enc_df[['ROM_256GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['ROM_1TB'] = oh_enc_df[['ROM_256GB']].copy()


In [40]:
df2['ROM'] = oh_enc_df[['ROM_1TB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['ROM'] = oh_enc_df[['ROM_1TB']].copy()


In [41]:
df.update(df2)
df

Unnamed: 0,brand,price,spec_rating,Ram,ROM,OS
0,HP,49900,73.000000,8GB,0.0,0.0
1,HP,39900,60.000000,8GB,0.0,0.0
2,Acer,26990,69.323529,8GB,0.0,0.0
3,Lenovo,59729,66.000000,16GB,0.0,0.0
4,Apple,69990,69.323529,8GB,0.0,0.0
...,...,...,...,...,...,...
888,Asus,44990,69.323529,8GB,0.0,0.0
889,Asus,110000,71.000000,16GB,1.0,0.0
890,Asus,189990,89.000000,32GB,1.0,0.0
891,Asus,129990,73.000000,16GB,0.0,0.0


# here we coverting 'Ram' column

In [42]:
df3=df[['Ram']]
df3.head()

Unnamed: 0,Ram
0,8GB
1,8GB
2,8GB
3,16GB
4,8GB


In [43]:
oh_enc = OneHotEncoder(sparse=False,)
oh_enc_arr = oh_enc.fit_transform(df3[['Ram']])

oh_enc_arr



array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]])

In [44]:
dummy_df=pd.get_dummies(df3[['Ram']])
dummy_df.head(4)

Unnamed: 0,Ram_12GB,Ram_16GB,Ram_2GB,Ram_32GB,Ram_4GB,Ram_64GB,Ram_8GB
0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,1
2,0,0,0,0,0,0,1
3,0,1,0,0,0,0,0


In [45]:
oh_enc = OneHotEncoder(sparse=False,drop='first')
oh_enc_arr = oh_enc.fit_transform(df3[['Ram']])
oh_enc_arr



array([[0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       ...,
       [0., 0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.]])

In [46]:
oh_enc_df = pd.DataFrame(oh_enc_arr, columns=['Ram_16GB','Ram_2GB','Ram_32GB','Ram_4GB','Ram_64GB','Ram_8GB'],)

oh_enc_df

Unnamed: 0,Ram_16GB,Ram_2GB,Ram_32GB,Ram_4GB,Ram_64GB,Ram_8GB
0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...
888,0.0,0.0,0.0,0.0,0.0,1.0
889,1.0,0.0,0.0,0.0,0.0,0.0
890,0.0,0.0,1.0,0.0,0.0,0.0
891,1.0,0.0,0.0,0.0,0.0,0.0


In [47]:
df3['Ram_16GB'] = oh_enc_df[['Ram_8GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Ram_16GB'] = oh_enc_df[['Ram_8GB']].copy()


In [48]:
df3['Ram_16GB'] = oh_enc_df[['Ram_64GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Ram_16GB'] = oh_enc_df[['Ram_64GB']].copy()


In [49]:
df3['Ram_16GB'] = oh_enc_df[['Ram_4GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Ram_16GB'] = oh_enc_df[['Ram_4GB']].copy()


In [50]:
df3['Ram_16GB'] = oh_enc_df[['Ram_32GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Ram_16GB'] = oh_enc_df[['Ram_32GB']].copy()


In [51]:
df3['Ram_16GB'] = oh_enc_df[['Ram_2GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Ram_16GB'] = oh_enc_df[['Ram_2GB']].copy()


In [52]:
df3['Ram'] = oh_enc_df[['Ram_16GB']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Ram'] = oh_enc_df[['Ram_16GB']].copy()


In [53]:
df.update(df3)
df

Unnamed: 0,brand,price,spec_rating,Ram,ROM,OS
0,HP,49900,73.000000,0.0,0.0,0.0
1,HP,39900,60.000000,0.0,0.0,0.0
2,Acer,26990,69.323529,0.0,0.0,0.0
3,Lenovo,59729,66.000000,1.0,0.0,0.0
4,Apple,69990,69.323529,0.0,0.0,0.0
...,...,...,...,...,...,...
888,Asus,44990,69.323529,0.0,0.0,0.0
889,Asus,110000,71.000000,1.0,1.0,0.0
890,Asus,189990,89.000000,0.0,1.0,0.0
891,Asus,129990,73.000000,1.0,0.0,0.0


# here we coverting 'brand' column

In [54]:
df4=df[['brand']]
df4.head()

Unnamed: 0,brand
0,HP
1,HP
2,Acer
3,Lenovo
4,Apple


In [55]:
oh_enc = OneHotEncoder(sparse=False,)
oh_enc_arr = oh_enc.fit_transform(df4[['brand']])

oh_enc_arr



array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [56]:
dummy_df=pd.get_dummies(df4[['brand']])
dummy_df.head()

Unnamed: 0,brand_AXL,brand_Acer,brand_Apple,brand_Asus,brand_Avita,brand_Chuwi,brand_Dell,brand_Fujitsu,brand_Gigabyte,brand_HP,...,brand_Realme,brand_Samsung,brand_Tecno,brand_Ultimus,brand_Vaio,brand_Walker,brand_Wings,brand_Xiaomi,brand_Zebronics,brand_iBall
0,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [57]:
oh_enc = OneHotEncoder(sparse=False,drop='first')
oh_enc_arr = oh_enc.fit_transform(df4[['brand']])
oh_enc_arr



array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]])

In [58]:
oh_enc_df = pd.DataFrame(oh_enc_arr, columns=['brand_Acer', 'brand_Apple', 'brand_Asus', 'brand_Avita',
       'brand_Chuwi', 'brand_Dell', 'brand_Fujitsu', 'brand_Gigabyte',
       'brand_HP', 'brand_Honor', 'brand_Huawei', 'brand_Infinix', 'brand_LG',
       'brand_Lenovo', 'brand_MSI', 'brand_Microsoft', 'brand_Ninkear',
       'brand_Primebook', 'brand_Razer', 'brand_Realme', 'brand_Samsung',
       'brand_Tecno', 'brand_Ultimus', 'brand_Vaio', 'brand_Walker',
       'brand_Wings', 'brand_Xiaomi', 'brand_Zebronics', 'brand_iBall'],)

oh_enc_df

Unnamed: 0,brand_Acer,brand_Apple,brand_Asus,brand_Avita,brand_Chuwi,brand_Dell,brand_Fujitsu,brand_Gigabyte,brand_HP,brand_Honor,...,brand_Realme,brand_Samsung,brand_Tecno,brand_Ultimus,brand_Vaio,brand_Walker,brand_Wings,brand_Xiaomi,brand_Zebronics,brand_iBall
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
888,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
889,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
890,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
891,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
df4['brand_Acer'] = oh_enc_df[['brand_Apple']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Apple']].copy()


In [60]:
df4['brand_Acer'] = oh_enc_df[['brand_Asus']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Asus']].copy()


In [61]:
df4['brand_Acer'] = oh_enc_df[['brand_Avita']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Avita']].copy()


In [62]:
df4['brand_Acer'] = oh_enc_df[['brand_Chuwi']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Chuwi']].copy()


In [63]:
df4['brand_Acer'] = oh_enc_df[['brand_Dell']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Dell']].copy()


In [64]:
df4['brand_Acer'] = oh_enc_df[['brand_Fujitsu']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Fujitsu']].copy()


In [65]:
df4['brand_Acer'] = oh_enc_df[['brand_Gigabyte']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Gigabyte']].copy()


In [66]:
df4['brand_Acer'] = oh_enc_df[['brand_HP']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_HP']].copy()


In [67]:
df4['brand_Acer'] = oh_enc_df[['brand_Honor']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Honor']].copy()


In [68]:
df4['brand_Acer'] = oh_enc_df[['brand_Huawei']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Huawei']].copy()


In [69]:
df4['brand_Apple'] = oh_enc_df[['brand_Infinix']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Apple'] = oh_enc_df[['brand_Infinix']].copy()


In [70]:
df4['brand_Acer'] = oh_enc_df[['brand_LG']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_LG']].copy()


In [71]:
df4['brand_Acer'] = oh_enc_df[['brand_Lenovo']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Lenovo']].copy()


In [72]:
df4['brand_Acer'] = oh_enc_df[['brand_MSI']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_MSI']].copy()


In [73]:
df4['brand_Acer'] = oh_enc_df[['brand_Microsoft']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Microsoft']].copy()


In [74]:
df4['brand_Acer'] = oh_enc_df[['brand_Ninkear']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Ninkear']].copy()


In [75]:
df4['brand_Acer'] = oh_enc_df[['brand_Primebook']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Primebook']].copy()


In [76]:
df4['brand_Acer'] = oh_enc_df[['brand_Razer']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Razer']].copy()


In [77]:
df4['brand_Acer'] = oh_enc_df[['brand_Realme']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Realme']].copy()


In [78]:
df4['brand_Acer'] = oh_enc_df[['brand_Samsung']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Samsung']].copy()


In [79]:
df4['brand_Acer'] = oh_enc_df[['brand_Tecno']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Tecno']].copy()


In [80]:
df4['brand_Acer'] = oh_enc_df[['brand_Ultimus']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Ultimus']].copy()


In [81]:
df4['brand_Acer'] = oh_enc_df[['brand_Vaio']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Vaio']].copy()


In [82]:
df4['brand_Acer'] = oh_enc_df[['brand_Walker']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Walker']].copy()


In [83]:
df4['brand_Acer'] = oh_enc_df[['brand_Wings']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Wings']].copy()


In [84]:
df4['brand_Acer'] = oh_enc_df[['brand_Xiaomi']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Xiaomi']].copy()


In [85]:
df4['brand_Acer'] = oh_enc_df[['brand_Zebronics']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_Zebronics']].copy()


In [86]:
df4['brand_Acer'] = oh_enc_df[['brand_iBall']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand_Acer'] = oh_enc_df[['brand_iBall']].copy()


In [87]:
df4['brand'] = oh_enc_df[['brand_Acer']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['brand'] = oh_enc_df[['brand_Acer']].copy()


In [88]:
df.update(df4)
df

Unnamed: 0,brand,price,spec_rating,Ram,ROM,OS
0,0.0,49900,73.000000,0.0,0.0,0.0
1,0.0,39900,60.000000,0.0,0.0,0.0
2,1.0,26990,69.323529,0.0,0.0,0.0
3,0.0,59729,66.000000,1.0,0.0,0.0
4,0.0,69990,69.323529,0.0,0.0,0.0
...,...,...,...,...,...,...
888,0.0,44990,69.323529,0.0,0.0,0.0
889,0.0,110000,71.000000,1.0,1.0,0.0
890,0.0,189990,89.000000,0.0,1.0,0.0
891,0.0,129990,73.000000,1.0,0.0,0.0


In [89]:
df.head()# checking changes in original df

Unnamed: 0,brand,price,spec_rating,Ram,ROM,OS
0,0.0,49900,73.0,0.0,0.0,0.0
1,0.0,39900,60.0,0.0,0.0,0.0
2,1.0,26990,69.323529,0.0,0.0,0.0
3,0.0,59729,66.0,1.0,0.0,0.0
4,0.0,69990,69.323529,0.0,0.0,0.0


In [90]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 893 entries, 0 to 892
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   brand        893 non-null    object 
 1   price        893 non-null    int64  
 2   spec_rating  893 non-null    float64
 3   Ram          893 non-null    object 
 4   ROM          893 non-null    object 
 5   OS           893 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 42.0+ KB


# So Our Final converted Dataset is ready to work

# Spilting Data

In [91]:
x=df.drop("price",axis=1)
y=df["price"]
print("Shape of x",x.shape)
print("Shape of y",y.shape)


Shape of x (893, 5)
Shape of y (893,)


In [92]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.20, random_state=51)
print('Shape of X_train = ', x_train.shape)
print('Shape of y_train = ', y_train.shape)
print('Shape of X_test = ', x_test.shape)
print('Shape of y_test = ', y_test.shape)

Shape of X_train =  (714, 5)
Shape of y_train =  (714,)
Shape of X_test =  (179, 5)
Shape of y_test =  (179,)


# feature Scalling

In [93]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
sc.fit(x_train)
x_train=sc.transform(x_train)
x_test=sc.transform(x_test)

# using SVR

In [94]:
from sklearn.svm import SVR

In [95]:
from sklearn.linear_model import LinearRegression
lr= LinearRegression()
lr.fit(x_train,y_train)

In [96]:
svr_rbf=SVR(kernel='rbf')
svr_rbf.fit(x_train, y_train)
svr_rbf.score(x_test, y_test)

-0.10989734036689636

In [97]:
svr_linear=SVR(kernel='linear')
svr_linear.fit(x_train, y_train)
svr_linear.score(x_test, y_test)

-0.08714250549074976

In [98]:
svr_linear=SVR(kernel='poly')
svr_linear.fit(x_train, y_train)
svr_linear.score(x_test, y_test)

-0.0960626655084642

# using DecisionTreeRegressor

In [99]:
from sklearn.tree import DecisionTreeRegressor

In [100]:
regressor = DecisionTreeRegressor(criterion='squared_error')
regressor.fit(x_train,y_train)

In [101]:
y_test

30      56889
844     57990
535     56990
780     29990
653     33990
        ...  
865     74990
551    267999
379     60990
691     81990
137     27990
Name: price, Length: 179, dtype: int64

In [102]:
regressor.score(x_test,y_test)

0.6279062114314625

# using RandomForestRegressor

In [103]:
from sklearn.ensemble import RandomForestRegressor

In [104]:
Regressor=RandomForestRegressor(criterion="absolute_error",n_estimators=19)
Regressor.fit(x_train,y_train)


In [105]:
y_test

30      56889
844     57990
535     56990
780     29990
653     33990
        ...  
865     74990
551    267999
379     60990
691     81990
137     27990
Name: price, Length: 179, dtype: int64

In [106]:
Regressor.score(x_test,y_test)

0.7117155871903378

# using KNeighborsRegressor

In [107]:
from sklearn.neighbors import KNeighborsRegressor

In [108]:
regressor = KNeighborsRegressor(n_neighbors=18)
regressor.fit(x_train,y_train)

In [109]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()

In [110]:
sc.fit(x_train)

In [111]:
y_pred=lr.predict(x_test)

In [112]:
regressor.score(x_test,y_test)

0.6700318658337145

In [113]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [114]:
print("MAE",mean_absolute_error(y_test,y_pred))

MAE 24838.4764951524


In [115]:
print("MSE",mean_squared_error(y_test,y_pred))

MSE 998426162.7246134


In [116]:
print("RMSE",np.sqrt(mean_squared_error(y_test,y_pred)))

RMSE 31597.882250628973


In [117]:
print("R_Squared",r2_score(y_test,y_pred))



R_Squared 0.6043970706232793


# using k fold

In [118]:
from sklearn.model_selection import cross_val_score

In [119]:
score_lr=cross_val_score(KNeighborsRegressor(n_neighbors=5), x,y,cv=3)
print(score_lr)
print("Avg :",np.average(score_lr))

[0.32850301 0.40639917 0.33677093]
Avg : 0.3572243724521


In [120]:
score_lr1=cross_val_score(KNeighborsRegressor(n_neighbors=9), x,y,cv=3)
print(score_lr1)
print("Avg :",np.average(score_lr1))

[0.36330553 0.45561482 0.39871007]
Avg : 0.4058768040727901


# k fold on RandomForestRegressor

In [121]:
score_rf=cross_val_score(RandomForestRegressor(n_estimators=5),x_train,y_train,cv=5)
print(score_rf)
print("Avg:",np.average(score_rf))

[0.41341785 0.36635786 0.53517574 0.08468994 0.50504308]
Avg: 0.38093689383973994


In [122]:
score_rf2=cross_val_score(RandomForestRegressor(n_estimators=35),x_train,y_train,cv=4)
print(score_rf2)
print("Avg:",np.average(score_rf2))

[0.51219195 0.49385121 0.3160485  0.36006377]
Avg: 0.420538860393584


# In this our price prediction Our diffrent algorithms gives a Different score which is as followes ....

1.DecisionTreeRegressor:-  0.6279062114314625

2.KNeighborsRegressor  :-  0.6700318658337145

3.RandomForestRegressor:-  0.7117155871903378

# conclusion:-

as we see above all algorithms scores , but the RandomForestRegressor algoritham has highest score so this algorithm is suitable for our dataset.