# Importing Libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
import sklearn
print(sklearn.__version__)

1.1.1


## Importing Datasets

In [3]:
dataset= pd.read_excel('last_time_data.xlsx')

In [4]:
dataset.shape

(254, 16)

## Select the important columns necessary for training/building our model

In [5]:
dataset = dataset[['BRAND', 'CPU BRAND', 'CPU CORE', 'CPU GENERATION', 'CPU FAMILY', 'RAM SIZE', 'RAM(DDR) TYPE', 'DISK TYPE', 'SSD SIZE',
                   'HDD SIZE', 'GPU BRAND', 'GPU TYPE', 'SCREEN SIZE', 'SCREEN RESOLUTION', 'STATE', 'PRICE']]

## Convert the DataType of categorical datas

In [6]:
## CATEGORICALS
dataset["BRAND"] = dataset["BRAND"].astype("str")
dataset["CPU BRAND"] = dataset["CPU BRAND"].astype("str")
dataset["CPU CORE"] = dataset["CPU CORE"].astype("str")
dataset["CPU FAMILY"] = dataset["CPU FAMILY"].astype("str")
dataset["DISK TYPE"] = dataset["DISK TYPE"].astype("str")
dataset["GPU BRAND"] = dataset["GPU BRAND"].astype("str")
dataset["GPU TYPE"] = dataset["GPU TYPE"].astype("str")
dataset["SCREEN RESOLUTION"] = dataset["SCREEN RESOLUTION"].astype("str")
dataset["STATE"] = dataset["STATE"].astype("str")

In [7]:
type(dataset)

pandas.core.frame.DataFrame

In [8]:
dataset.values

array([['ACER', 'Intel', 'Core i3', ..., 'HD', 'USED', 79000],
       ['ACER', 'Intel', 'Core i3', ..., 'HD', 'USED', 79000],
       ['ACER', 'Intel', 'Core i3', ..., 'FHD', 'USED', 88000],
       ...,
       ['LENOVO', 'AMD', 'Ryzen 7', ..., 'FHD', 'USED', 112000],
       ['LENOVO', 'AMD', 'Ryzen 7', ..., 'FHD', 'USED', 135000],
       ['LENOVO', 'AMD', 'Ryzen 7', ..., 'FHD', 'USED', 135000]],
      dtype=object)

## Selecting the X and y columns

In [9]:
X = dataset.drop('PRICE', axis = 'columns')

In [10]:
X.shape

(254, 15)

In [11]:
y = dataset.PRICE

In [12]:
y = y.values.reshape((254,1))
y.shape

(254, 1)

## Splitting data into Train and Test sets

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [15]:
categorical_data = dataset[['BRAND', 'CPU BRAND', 'CPU CORE', 'CPU FAMILY', 'DISK TYPE',
               'GPU BRAND', 'GPU TYPE', 'SCREEN RESOLUTION', 'STATE']]

In [16]:
categorical_columns = ['BRAND', 'CPU BRAND', 'CPU CORE', 'CPU FAMILY', 'DISK TYPE',
               'GPU BRAND', 'GPU TYPE', 'SCREEN RESOLUTION', 'STATE']

In [17]:
numerical_data = dataset[['CPU GENERATION', 'RAM SIZE', 'RAM(DDR) TYPE', 'SSD SIZE', 'HDD SIZE', 'SCREEN SIZE']]

In [18]:
numerical_columns = ['CPU GENERATION', 'RAM SIZE', 'RAM(DDR) TYPE', 'SSD SIZE', 'HDD SIZE', 'SCREEN SIZE']

## Importing Libraries

In [19]:
from sklearn.preprocessing import OneHotEncoder

from sklearn.preprocessing import StandardScaler

from sklearn.preprocessing import MinMaxScaler

from sklearn.compose import ColumnTransformer

from sklearn.pipeline import Pipeline

from sklearn.model_selection import GridSearchCV

## Normalization and Standardization

In [20]:
num_pipeline = Pipeline([
    ("MinMax_Scaler", MinMaxScaler()),
    ('Standered_Scaler', StandardScaler())
])

## ColumnTransformer and OneHotEncoding

In [21]:
column_trans = ColumnTransformer(transformers = [
    ("numerical", num_pipeline, numerical_columns),
    ("categorical", OneHotEncoder(sparse = False, handle_unknown='ignore'), categorical_columns),
    ], remainder = 'passthrough')

## Model Building

In [22]:
from sklearn.metrics import explained_variance_score, mean_absolute_error, r2_score, mean_squared_error

In [23]:
from time import time

from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Lars
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoLars
from sklearn.linear_model import LassoLarsCV
from sklearn.linear_model import LassoLarsIC
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.linear_model import ARDRegression
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.linear_model import MultiTaskLasso
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import TheilSenRegressor

In [24]:
regressors = [
    Ridge(),
    RidgeCV(),
    LinearRegression(),
    SGDRegressor(),
    ElasticNet(),
    Lars(),
    Lasso(),
    LassoLars(),
    LassoLarsCV(),
    LassoLarsIC(),
    OrthogonalMatchingPursuit(),
    ARDRegression(),
    BayesianRidge(),
    MultiTaskElasticNet(),
    MultiTaskLasso(),
    HuberRegressor(),
    RANSACRegressor(),
    PassiveAggressiveRegressor(),
    TheilSenRegressor()
]

In [25]:
# head = 10
for model in regressors:    # [:head]:
    start = time()
    
    
    pipe = Pipeline([
    ('step1', column_trans),
    ('step2', model)
    ])
    
    
    pipe.fit(X_train, y_train)


    train_time = time() - start
    start = time()
    predict_time = time()-start
    
    y_pred = pipe.predict(X_test)
    
    print(model)
    print("\t Training Time: %0.3fs" % train_time)
    print("\t Prediction Time: %0.3fs" % predict_time)
    print("\t Explained Variance Score:", explained_variance_score(y_test, y_pred))
    print("\t Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
    print("\t Mean Squared Error:", mean_squared_error(y_test, y_pred))
    print("\t Root Mean Squared Error:", mean_squared_error(y_test, y_pred, squared = False))
    print("\t R2 Score:", r2_score(y_test, y_pred))
    print()

Ridge()
	 Training Time: 0.037s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.8693716553165736
	 Mean Absolute Error: 11293.331735078144
	 Mean Squared Error: 228015503.55729485
	 Root Mean Squared Error: 15100.182235896851
	 R2 Score: 0.8677919202632969

RidgeCV()
	 Training Time: 0.069s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.8693716553165933
	 Mean Absolute Error: 11293.331735078702
	 Mean Squared Error: 228015503.5572494
	 Root Mean Squared Error: 15100.182235895347
	 R2 Score: 0.8677919202633233

LinearRegression()
	 Training Time: 0.027s
	 Prediction Time: 0.000s
	 Explained Variance Score: -9.14323916113448e+19
	 Mean Absolute Error: 122575877364934.52
	 Mean Squared Error: 1.6042894550690312e+29
	 Root Mean Squared Error: 400535823000768.7
	 R2 Score: -9.302000297678124e+19

SGDRegressor()
	 Training Time: 0.040s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.8680198849943278
	 Mean Absolute Error: 11717.500642472405
	 Mean Squared Error: 2301

  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Lars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


  model = cd_fast.enet_coordinate_descent(


ElasticNet()
	 Training Time: 0.023s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.6767089242251345
	 Mean Absolute Error: 18904.746076850555
	 Mean Squared Error: 563743605.5792098
	 Root Mean Squared Error: 23743.285484094442
	 R2 Score: 0.6731298600546929

Lars()
	 Training Time: 0.053s
	 Prediction Time: 0.000s
	 Explained Variance Score: -5.915406282351776e+31
	 Mean Absolute Error: 9.318082381246702e+19
	 Mean Squared Error: 1.1070399056118678e+41
	 Root Mean Squared Error: 3.327220920846507e+20
	 R2 Score: -6.418845114892346e+31

Lasso()
	 Training Time: 0.039s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.8402751141946687
	 Mean Absolute Error: 12094.382561310356
	 Mean Squared Error: 284761594.52502984
	 Root Mean Squared Error: 16874.880578096836
	 R2 Score: 0.8348893693298541

LassoLars()
	 Training Time: 0.035s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.795161511256328
	 Mean Absolute Error: 14285.67205888025
	 Mean Squared Error: 362734279.

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

LassoLarsCV()
	 Training Time: 0.116s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.7892395542156587
	 Mean Absolute Error: 14602.443176533146
	 Mean Squared Error: 372879737.46052617
	 Root Mean Squared Error: 19310.09418569796
	 R2 Score: 0.7837966572742506

LassoLarsIC()
	 Training Time: 0.034s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.7933816348335134
	 Mean Absolute Error: 14376.03459008997
	 Mean Squared Error: 365841087.50946075
	 Root Mean Squared Error: 19126.97277431692
	 R2 Score: 0.7878778113161966

OrthogonalMatchingPursuit()
	 Training Time: 0.019s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.6742594687895598
	 Mean Absolute Error: 19766.43244010974
	 Mean Squared Error: 577790507.0654249
	 Root Mean Squared Error: 24037.273286823216
	 R2 Score: 0.6649851775977103



If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), OrthogonalMatchingPursuit())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


  y = column_or_1d(y, warn=True)


ARDRegression()
	 Training Time: 0.349s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.804772947070596
	 Mean Absolute Error: 13589.894299600754
	 Mean Squared Error: 337246936.9950775
	 Root Mean Squared Error: 18364.284276689836
	 R2 Score: 0.8044572880282218

BayesianRidge()
	 Training Time: 0.026s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.8691038274513296
	 Mean Absolute Error: 11326.193462641426
	 Mean Squared Error: 228352236.82170227
	 Root Mean Squared Error: 15111.328095892242
	 R2 Score: 0.8675966753892588

MultiTaskElasticNet()
	 Training Time: 0.019s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.6767089242251343
	 Mean Absolute Error: 18904.746076850563
	 Mean Squared Error: 563743605.57921
	 Root Mean Squared Error: 23743.285484094446
	 R2 Score: 0.6731298600546927

MultiTaskLasso()
	 Training Time: 0.049s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.8402751141946889
	 Mean Absolute Error: 12094.38256131032
	 Mean Squared Error: 

  y = column_or_1d(y, warn=True)
  ) = cd_fast.enet_coordinate_descent_multi_task(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


HuberRegressor()
	 Training Time: 0.070s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.8526262922629904
	 Mean Absolute Error: 11820.125038130236
	 Mean Squared Error: 264711742.16941774
	 Root Mean Squared Error: 16269.964418197655
	 R2 Score: 0.8465146861946515

RANSACRegressor()
	 Training Time: 0.229s
	 Prediction Time: 0.000s
	 Explained Variance Score: -3.090379387188738e+22
	 Mean Absolute Error: 1357349660006885.2
	 Mean Squared Error: 5.514128978269563e+31
	 Root Mean Squared Error: 7425718132456660.0
	 R2 Score: -3.197205419211084e+22

PassiveAggressiveRegressor()
	 Training Time: 0.077s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.7388275046452302
	 Mean Absolute Error: 15724.184878388465
	 Mean Squared Error: 471794110.5255156
	 Root Mean Squared Error: 21720.822049948194
	 R2 Score: 0.7264440688876627



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


TheilSenRegressor()
	 Training Time: 11.776s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.87197949676299
	 Mean Absolute Error: 11251.757318465236
	 Mean Squared Error: 225174983.018087
	 Root Mean Squared Error: 15005.83163367119
	 R2 Score: 0.8694389125076071

