# Importing Libraries

In [1]:
import pandas as pd
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
import warnings
warnings.filterwarnings('ignore')

# Defining class and functions

In [2]:
class Laptop:
    def __init__(self, x, y,ml):
        self.x = x
        self.y = y
        self.x_train = None 
        self.x_test = None 
        self.y_train = None 
        self.y_test = None
        self.ml=ml

    def split(self):
        x_train, x_test, y_train, y_test = train_test_split(self.x, self.y, train_size=0.8,random_state=80)
        self.x_train = x_train
        self.x_test = x_test 
        self.y_train = y_train 
        self.y_test = y_test 
        self.ml.fit(x_train, y_train)

    def mscore(self):
        print('Training Score:', self.ml.score(self.x_train, self.y_train))
        print('Testing Score:', self.ml.score(self.x_test, self.y_test))

    def met(self):
        y_pred = self.ml.predict(self.x_test)
        mse = mean_squared_error(y_pred, self.y_test)
        print('Mean Squared Error:', mse)
        rmse = np.sqrt(mse)
        print('Root Mean Squared Error:', rmse)
        mae = mean_absolute_error(y_pred, self.y_test)
        print('Mean Absolute Error:', mae)
        r2 = r2_score(y_pred, self.y_test)
        print('R-squared:', r2)

# Reading the data

In [3]:
df=pd.read_csv(r"C:\Users\aarza\Desktop\Dataset\imarticus\basant ml\laptopPrice.csv")

In [4]:
df #DATA

Unnamed: 0,brand,processor_brand,processor_name,processor_gnrtn,ram_gb,ram_type,ssd,hdd,os,os_bit,graphic_card_gb,weight,warranty,Touchscreen,msoffice,Price,rating,Number of Ratings,Number of Reviews
0,ASUS,Intel,Core i3,10th,4 GB,DDR4,0 GB,1024 GB,Windows,64-bit,0 GB,Casual,No warranty,No,No,34649,2 stars,3,0
1,Lenovo,Intel,Core i3,10th,4 GB,DDR4,0 GB,1024 GB,Windows,64-bit,0 GB,Casual,No warranty,No,No,38999,3 stars,65,5
2,Lenovo,Intel,Core i3,10th,4 GB,DDR4,0 GB,1024 GB,Windows,64-bit,0 GB,Casual,No warranty,No,No,39999,3 stars,8,1
3,ASUS,Intel,Core i5,10th,8 GB,DDR4,512 GB,0 GB,Windows,32-bit,2 GB,Casual,No warranty,No,No,69990,3 stars,0,0
4,ASUS,Intel,Celeron Dual,Not Available,4 GB,DDR4,0 GB,512 GB,Windows,64-bit,0 GB,Casual,No warranty,No,No,26990,3 stars,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
818,ASUS,AMD,Ryzen 9,Not Available,4 GB,DDR4,1024 GB,0 GB,Windows,64-bit,0 GB,Casual,1 year,No,No,135990,3 stars,0,0
819,ASUS,AMD,Ryzen 9,Not Available,4 GB,DDR4,1024 GB,0 GB,Windows,64-bit,0 GB,Casual,1 year,No,No,144990,3 stars,0,0
820,ASUS,AMD,Ryzen 9,Not Available,4 GB,DDR4,1024 GB,0 GB,Windows,64-bit,4 GB,Casual,1 year,No,No,149990,3 stars,0,0
821,ASUS,AMD,Ryzen 9,Not Available,4 GB,DDR4,1024 GB,0 GB,Windows,64-bit,4 GB,Casual,1 year,No,No,142990,3 stars,0,0


In [5]:
df.isnull().sum() # checking null values

brand                0
processor_brand      0
processor_name       0
processor_gnrtn      0
ram_gb               0
ram_type             0
ssd                  0
hdd                  0
os                   0
os_bit               0
graphic_card_gb      0
weight               0
warranty             0
Touchscreen          0
msoffice             0
Price                0
rating               0
Number of Ratings    0
Number of Reviews    0
dtype: int64

In [6]:
for i in df.columns:
    print(f'{i}: {df[i].unique()}\n') #checking uniques of all columns

brand: ['ASUS' 'Lenovo' 'acer' 'Avita' 'HP' 'DELL' 'MSI' 'APPLE']

processor_brand: ['Intel' 'AMD' 'M1']

processor_name: ['Core i3' 'Core i5' 'Celeron Dual' 'Ryzen 5' 'Core i7' 'Core i9' 'M1'
 'Pentium Quad' 'Ryzen 3' 'Ryzen 7' 'Ryzen 9']

processor_gnrtn: ['10th' 'Not Available' '11th' '7th' '8th' '9th' '4th' '12th']

ram_gb: ['4 GB' '8 GB' '16 GB' '32 GB']

ram_type: ['DDR4' 'LPDDR4' 'LPDDR4X' 'DDR5' 'DDR3' 'LPDDR3']

ssd: ['0 GB' '512 GB' '256 GB' '128 GB' '1024 GB' '2048 GB' '3072 GB']

hdd: ['1024 GB' '0 GB' '512 GB' '2048 GB']

os: ['Windows' 'DOS' 'Mac']

os_bit: ['64-bit' '32-bit']

graphic_card_gb: ['0 GB' '2 GB' '4 GB' '6 GB' '8 GB']

weight: ['Casual' 'ThinNlight' 'Gaming']

warranty: ['No warranty' '1 year' '2 years' '3 years']

Touchscreen: ['No' 'Yes']

msoffice: ['No' 'Yes']

Price: [ 34649  38999  39999  69990  26990  22990  21990  58799  49999  59990
  93700  72990  17490  35990  56490  65390  31999  32490  31799  29890
  23990  32955  46200  34429  37990  33980  3999

In [7]:
df.info() #trying to get information of data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 823 entries, 0 to 822
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   brand              823 non-null    object
 1   processor_brand    823 non-null    object
 2   processor_name     823 non-null    object
 3   processor_gnrtn    823 non-null    object
 4   ram_gb             823 non-null    object
 5   ram_type           823 non-null    object
 6   ssd                823 non-null    object
 7   hdd                823 non-null    object
 8   os                 823 non-null    object
 9   os_bit             823 non-null    object
 10  graphic_card_gb    823 non-null    object
 11  weight             823 non-null    object
 12  warranty           823 non-null    object
 13  Touchscreen        823 non-null    object
 14  msoffice           823 non-null    object
 15  Price              823 non-null    int64 
 16  rating             823 non-null    object
 1

# Splitting independent and target variable

In [8]:
x=df.drop('Price',axis=1)

In [9]:
y=df['Price'] #price is our target variable

In [10]:
x=pd.get_dummies(x,drop_first=True)#converting categorical value into dummies

In [11]:
x.shape

(823, 63)

# Linear Regression

In [12]:
# Create an instance of the Laptop class
linear = Laptop(x,y,LinearRegression())  # Replace x and y with your actual data and model

In [13]:
# Call the split method
linear.split() 

In [14]:
# Call the mscore method
linear.mscore()  


Training Score: 0.824427438210721
Testing Score: 0.8007994018273087


In [15]:
# Call the met method
linear.met() 

Mean Squared Error: 416681355.48577476
Root Mean Squared Error: 20412.774321139565
Mean Absolute Error: 13499.828371722386
R-squared: 0.763693830892578


As the report indicate a high error rate , so standardization will be used to reduce it

# Scaling Price column Using Min Max scaler

In [16]:
y_minmax= (y - y.min()) / (y.max() - y.min())
y_minmax

0      0.041551
1      0.051786
2      0.054139
3      0.124706
4      0.023529
         ...   
818    0.280000
819    0.301176
820    0.312941
821    0.296471
822    0.095294
Name: Price, Length: 823, dtype: float64

# Linear Regression 

In [17]:
# Create an instance of the Laptop class
linear_minmax= Laptop(x,y_minmax,LinearRegression())  

# Call the split method
linear_minmax.split()  

# Call the mscore method
linear_minmax.mscore()

# Call the met method
linear_minmax.met() 


Training Score: 0.824427438210721
Testing Score: 0.8007994018273089
Mean Squared Error: 0.002306886397153076
Root Mean Squared Error: 0.04803005722621071
Mean Absolute Error: 0.031764302051111484
R-squared: 0.7636938308925783


As the report indicate a lower error rate but accuracy score is lower in testing data set as compare to training data set which indicate *OVERFITTING*

# Ridge Regression

In [18]:
# Applying alpha = 1 and running the algorithms for maximum of 500 iterations
ridge_minmax= Laptop(x,y_minmax,Ridge(alpha = 1, max_iter = 500)) 

# Call the split method
ridge_minmax.split() 

# Call the mscore method
ridge_minmax.mscore()  

# Call the met method
ridge_minmax.met() 

Training Score: 0.8009620361387227
Testing Score: 0.7994156492080756
Mean Squared Error: 0.002322911249104408
Root Mean Squared Error: 0.048196589600348366
Mean Absolute Error: 0.03200401494687756
R-squared: 0.7512527134689834


# Lasso Regression

In [19]:
# Applying alpha = 1 and running the algorithms for maximum of 500 iterations
lasso_minmax= Laptop(x,y_minmax,Lasso(alpha = 0.01, max_iter = 500))  

# Call the split method
lasso_minmax.split()  

# Call the mscore method
lasso_minmax.mscore()  

# Call the met method
lasso_minmax.met()

Training Score: 0.2295925086821582
Testing Score: 0.3151455670951674
Mean Squared Error: 0.007931107585974768
Root Mean Squared Error: 0.0890567660875622
Mean Absolute Error: 0.06188732267636368
R-squared: -3.9894147872995855


# Elastic Net

In [20]:
# apply both lasso and ridge regression
enet_minmax= Laptop(x,y_minmax,ElasticNet(alpha = 1.01, l1_ratio = 0.0099, max_iter = 500))  
# Call the split method
enet_minmax.split()  

# Call the mscore method
enet_minmax.mscore() 

# Call the met method
enet_minmax.met()  

Training Score: 0.06646580542260139
Testing Score: 0.07467480415730643
Mean Squared Error: 0.010715932215133055
Root Mean Squared Error: 0.10351778695051907
Mean Absolute Error: 0.07352420531616694
R-squared: -10.178282587570997


# Scaling Price column Using Robust Scaler

In [21]:
y_robust = (y - y.quantile(0.25)) / (y.quantile(0.75) - y.quantile(0.25))
y_robust

0     -0.262879
1     -0.162973
2     -0.140006
3      0.548793
4     -0.438782
         ...   
818    2.064606
819    2.271308
820    2.386142
821    2.225374
822    0.261707
Name: Price, Length: 823, dtype: float64

# Linear Regession

In [22]:
# Create an instance of the Laptop class
linear_robust= Laptop(x,y_robust,LinearRegression())  

# Call the split method
linear_robust.split() 

# Call the mscore method
linear_robust.mscore()  

# Call the met method
linear_robust.met() 


Training Score: 0.824427438210721
Testing Score: 0.8007994018273088
Mean Squared Error: 0.21978966641787961
Root Mean Squared Error: 0.46881730601363214
Mean Absolute Error: 0.310048652344282
R-squared: 0.763693830892578


# Ridge Regession

In [23]:
# Applying alpha = 1 and running the algorithms for maximum of 500 iterations
ridge_robust= Laptop(x,y_robust,Ridge(alpha = 1, max_iter = 500)) 

# Call the split method
ridge_robust.split() 

# Call the mscore method
ridge_robust.mscore() 

# Call the met method
ridge_robust.met() 

Training Score: 0.8009620361387229
Testing Score: 0.7994156492080756
Mean Squared Error: 0.2213164415851031
Root Mean Squared Error: 0.47044281436228047
Mean Absolute Error: 0.3123884695441758
R-squared: 0.7512527134689833


# Lasso Regession

In [24]:
# Applying alpha = 1 and running the algorithms for maximum of 500 iterations
lasso_robust= Laptop(x,y_robust,Lasso(alpha = 0.01, max_iter = 500)) 

# Call the split method
lasso_robust.split()  

# Call the mscore method
lasso_robust.mscore() 

# Call the met method
lasso_robust.met()

Training Score: 0.7002059590349374
Testing Score: 0.728932960585603
Mean Squared Error: 0.2990841127802406
Root Mean Squared Error: 0.5468858315775246
Mean Absolute Error: 0.356314451772546
R-squared: 0.5432334823165509


# Elastic Net

In [25]:
# apply both lasso and ridge regression
enet_robust= Laptop(x,y_robust,ElasticNet(alpha = 1.01, l1_ratio = 0.0099, max_iter = 500))  

# Call the split method
enet_robust.split() 

# Call the mscore method
enet_robust.mscore()  

# Call the met method
enet_robust.met()  

Training Score: 0.2349560464792153
Testing Score: 0.2449418424216896
Mean Squared Error: 0.8330998104552209
Root Mean Squared Error: 0.9127430144653099
Mean Absolute Error: 0.6252133123650774
R-squared: -5.894892282587283


`Hence, Linear Regression  with  RobustScaler  help  to  get  better  accuracy  than  other  Algorithms 

# Decision tree

In [26]:
from sklearn.tree import DecisionTreeRegressor

In [27]:
dt=Laptop(x,y_robust,DecisionTreeRegressor())

In [28]:
# Call the split method
dt.split() 

# Call the mscore method
dt.mscore()  

# Call the met method
dt.met()  

Training Score: 0.9974406059800198
Testing Score: 0.6990595040975631
Mean Squared Error: 0.3320452439037688
Root Mean Squared Error: 0.5762336712686693
Mean Absolute Error: 0.3303186431272603
R-squared: 0.6447185359304509


# Random forest

In [29]:
from sklearn.ensemble import RandomForestRegressor

In [30]:
rf=Laptop(x,y_robust,RandomForestRegressor())

In [31]:
# Call the split method
rf.split() 

# Call the mscore method
rf.mscore()  

# Call the met method
rf.met()  

Training Score: 0.9417980687566152
Testing Score: 0.8195306145575977
Mean Squared Error: 0.19912242427424168
Root Mean Squared Error: 0.4462313573408324
Mean Absolute Error: 0.271048088883931
R-squared: 0.7531355631891127


# Boosting

### AdaBoost

In [32]:
from sklearn.ensemble import AdaBoostRegressor

In [33]:
ab=Laptop(x,y_robust,AdaBoostRegressor())

In [34]:
# Call the split method
ab.split() 

# Call the mscore method
ab.mscore()  

# Call the met method
ab.met()  

Training Score: 0.5214552949689193
Testing Score: 0.5000489767033454
Mean Squared Error: 0.5516251941190883
Root Mean Squared Error: 0.7427147461300928
Mean Absolute Error: 0.6244158786007906
R-squared: -0.12059780538877596


### Gradient Boosting

In [35]:
from sklearn.ensemble import GradientBoostingRegressor

In [36]:
gb=Laptop(x,y_robust,GradientBoostingRegressor())

In [37]:
# Call the split method
gb.split() 

# Call the mscore method
gb.mscore()  

# Call the met method
gb.met()  

Training Score: 0.9141911702411986
Testing Score: 0.8233608035606991
Mean Squared Error: 0.1948963528114478
Root Mean Squared Error: 0.4414706703864344
Mean Absolute Error: 0.28386969191311723
R-squared: 0.7686683484919588


### XG Boosting

In [38]:
from xgboost import XGBRegressor

In [39]:
xgb=Laptop(x,y_robust,XGBRegressor())

In [40]:
# Call the split method
xgb.split() 

# Call the mscore method
xgb.mscore()  

# Call the met method
xgb.met()  

Training Score: 0.9948562920251238
Testing Score: 0.8033907345073303
Mean Squared Error: 0.21693049756726238
Root Mean Squared Error: 0.46575798175368116
Mean Absolute Error: 0.27420639044880735
R-squared: 0.7728921694564826
