## **Importing the Libraries**

In [13]:
import pandas as pd
import numpy as np

In [42]:
# Reading the excel data
df = pd.read_excel("./Computer_Data.xlsx")
df.head()

Unnamed: 0.1,Unnamed: 0,price,speed,hd,ram,screen,cd,multi,premium,ads,trend
0,1.0,1499.0,25.0,80.0,4.0,14.0,no,no,yes,94.0,1.0
1,2.0,1795.0,33.0,85.0,2.0,14.0,no,no,yes,94.0,1.0
2,3.0,1595.0,25.0,170.0,4.0,15.0,no,no,yes,94.0,1.0
3,4.0,1849.0,25.0,170.0,8.0,14.0,no,no,no,94.0,1.0
4,5.0,3295.0,33.0,340.0,16.0,14.0,no,no,yes,94.0,1.0


In [43]:
# Removing the unnecessary column 'Unnamed' from the dataset
df = df.iloc[:,1:]
df.head()

Unnamed: 0,price,speed,hd,ram,screen,cd,multi,premium,ads,trend
0,1499.0,25.0,80.0,4.0,14.0,no,no,yes,94.0,1.0
1,1795.0,33.0,85.0,2.0,14.0,no,no,yes,94.0,1.0
2,1595.0,25.0,170.0,4.0,15.0,no,no,yes,94.0,1.0
3,1849.0,25.0,170.0,8.0,14.0,no,no,no,94.0,1.0
4,3295.0,33.0,340.0,16.0,14.0,no,no,yes,94.0,1.0


## **Visualizing the patterns**

In [31]:
import plotly.express as px

In [36]:
px.scatter(df, x='ram', y='price', color='speed')

In [37]:
px.bar(df, x='cd', y='price', color='premium')

In [38]:
px.density_heatmap(df, x='ram', y='price')

In [39]:
px.pie(df, values='trend', names='ram')

In [41]:
px.scatter(df, x='ram', y='price', animation_frame='trend', animation_group='price', range_x=[1,34], range_y=[1,5000])

## **Building the Regression models**

In [102]:
# Taking the columns other than price column as independent variables
X = df.iloc[:,1:]
# Taking the price column as dependent variable and reshaping it
Y = df.iloc[:,0].values.reshape(-1,1)

In [103]:
# Label Encoder - Converts the categorical values to numerical values
# For eg. [A, B, C, D, A, C, A] ==> [0, 1, 2, 3, 0, 2, 0]
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
# Converting the below column values to numerical values 
X['premium'] = le.fit_transform(X['premium'])
X['multi'] = le.fit_transform(X['multi'])
X['cd'] = le.fit_transform(X['cd'])

In [104]:
X = X.values # Taking the values

In [105]:
# Splitting the whole data into training and testing data
# training data used to training the model
# testing data used to evaluating the model
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.2, random_state=0)

In [106]:
# Scaling the data
# Scaling will normalize the range of independent variables/features of the data
# It is used to handle the highly varying magnitudes or the values.
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
xtrain = sc_x.fit_transform(xtrain)


### **Multiple Linear Regression model**

In [107]:
# Building MLR 
from sklearn.linear_model import LinearRegression
mlr = LinearRegression()
# Training the model using training data
mlr.fit(xtrain, ytrain)

In [108]:
# Evaluating/Predicting the model using testing data
mlrpred = mlr.predict(sc_x.transform(xtest))

In [110]:
# Checking the Accuracy of the model using R2 score
from sklearn.metrics import r2_score, mean_squared_error as mse
print(f"R2 score: {r2_score(ytest, mlrpred)}")
print(f"MSE: {mse(ytest, mlrpred)}")

R2 score: 0.7842770001633781
MSE: 75557.18562442459


### **Support Vector Regression model**

In [111]:
# Building SVR
from sklearn.svm import SVR
svr = SVR(kernel='rbf') # Using rbf kernel for regression. You can check the various kernels in SVR at Google.
# Training the model using training data
svr.fit(xtrain, ytrain.ravel())

In [112]:
# Evaluating/Predicting the model using testing data
svrpred = svr.predict(sc_x.transform(xtest))

In [113]:
# Checking the Accuracy of the model using R2 score
from sklearn.metrics import r2_score, mean_squared_error as mse
print(f"R2 score: {r2_score(ytest, svrpred)}")
print(f"MSE: {mse(ytest, svrpred)}")

R2 score: 0.3750107154651787
MSE: 218903.09063306998


### **Decision Tree Regression model**

In [114]:
# Building DTR
from sklearn.tree import DecisionTreeRegressor
dtr = DecisionTreeRegressor()
# Training the model using training data
dtr.fit(xtrain, ytrain)

In [116]:
# Evaluating/Predicting the model using testing data
dtrpred = dtr.predict(sc_x.transform(xtest))

In [117]:
# Checking the Accuracy of the model using R2 score
from sklearn.metrics import r2_score, mean_squared_error as mse
print(f"R2 score: {r2_score(ytest, dtrpred)}")
print(f"MSE: {mse(ytest, dtrpred)}")

R2 score: 0.9094708364822558
MSE: 31707.92552261217


### **Random Forest Regression model**

In [118]:
# Building RFR
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(n_estimators=20) # setting no of estimators equal to 20  
# Training the model using training data
rfr.fit(xtrain, ytrain.ravel())

In [119]:
# Evaluating/Predicting the model using testing data
rfrpred = rfr.predict(sc_x.transform(xtest))

In [120]:
# Checking the Accuracy of the model using R2 score
from sklearn.metrics import r2_score, mean_squared_error as mse
print(f"R2 score: {r2_score(ytest, rfrpred)}")
print(f"MSE: {mse(ytest, rfrpred)}")

R2 score: 0.9267796193988391
MSE: 25645.507863152478


## **Comparing the models - Accuracy of each model**
### **Multiple Linear Regression - 0.7842770001633781**
### **Support Vector Regression - 0.3750107154651787**
### **Decision Tree Regression - 0.9079616486524871**
### **Random Forest Regression - 0.9260019550766895**

## By observing the above accuracies of the models, we can say that the Random Forest Regression model is accurate and better than the other models