# Importing Libraries

In [27]:
import pandas as pd
import numpy as np

In [28]:
import sklearn
print(sklearn.__version__)

1.1.1


## Importing Datasets

In [29]:
dataset= pd.read_excel('last_time_data.xlsx')

In [30]:
dataset.shape

(254, 16)

## Select the important columns necessary for training/building our model

In [31]:
dataset = dataset[['BRAND', 'CPU BRAND', 'CPU CORE', 'CPU GENERATION', 'CPU FAMILY', 'RAM SIZE', 'RAM(DDR) TYPE', 'DISK TYPE', 'SSD SIZE',
                   'HDD SIZE', 'GPU BRAND', 'GPU TYPE', 'SCREEN SIZE', 'SCREEN RESOLUTION', 'STATE', 'PRICE']]

## Convert the DataType of categorical datas

In [32]:
## CATEGORICALS
dataset["BRAND"] = dataset["BRAND"].astype("str")
dataset["CPU BRAND"] = dataset["CPU BRAND"].astype("str")
dataset["CPU CORE"] = dataset["CPU CORE"].astype("str")
dataset["CPU FAMILY"] = dataset["CPU FAMILY"].astype("str")
dataset["DISK TYPE"] = dataset["DISK TYPE"].astype("str")
dataset["GPU BRAND"] = dataset["GPU BRAND"].astype("str")
dataset["GPU TYPE"] = dataset["GPU TYPE"].astype("str")
dataset["SCREEN RESOLUTION"] = dataset["SCREEN RESOLUTION"].astype("str")
dataset["STATE"] = dataset["STATE"].astype("str")

In [33]:
type(dataset)

pandas.core.frame.DataFrame

In [34]:
dataset.values

array([['ACER', 'Intel', 'Core i3', ..., 'HD', 'USED', 79000],
       ['ACER', 'Intel', 'Core i3', ..., 'HD', 'USED', 79000],
       ['ACER', 'Intel', 'Core i3', ..., 'FHD', 'USED', 88000],
       ...,
       ['LENOVO', 'AMD', 'Ryzen 7', ..., 'FHD', 'USED', 112000],
       ['LENOVO', 'AMD', 'Ryzen 7', ..., 'FHD', 'USED', 135000],
       ['LENOVO', 'AMD', 'Ryzen 7', ..., 'FHD', 'USED', 135000]],
      dtype=object)

## Selecting the X and y columns

In [35]:
X = dataset.drop('PRICE', axis = 'columns')

In [36]:
X.shape

(254, 15)

In [37]:
y = dataset.PRICE

In [38]:
y = y.values.reshape((254,1))
y.shape

(254, 1)

## Splitting data into Train and Test sets

In [39]:
from sklearn.model_selection import train_test_split

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [41]:
categorical_data = dataset[['BRAND', 'CPU BRAND', 'CPU CORE', 'CPU FAMILY', 'DISK TYPE',
               'GPU BRAND', 'GPU TYPE', 'SCREEN RESOLUTION', 'STATE']]

In [42]:
categorical_columns = ['BRAND', 'CPU BRAND', 'CPU CORE', 'CPU FAMILY', 'DISK TYPE',
               'GPU BRAND', 'GPU TYPE', 'SCREEN RESOLUTION', 'STATE']

In [43]:
numerical_data = dataset[['CPU GENERATION', 'RAM SIZE', 'RAM(DDR) TYPE', 'SSD SIZE', 'HDD SIZE', 'SCREEN SIZE']]

In [44]:
numerical_columns = ['CPU GENERATION', 'RAM SIZE', 'RAM(DDR) TYPE', 'SSD SIZE', 'HDD SIZE', 'SCREEN SIZE']

## Importing Libraries

In [45]:
from sklearn.preprocessing import OneHotEncoder

from sklearn.preprocessing import StandardScaler

from sklearn.preprocessing import MinMaxScaler

from sklearn.compose import ColumnTransformer

from sklearn.pipeline import Pipeline

from sklearn.model_selection import GridSearchCV

## Normalization and Standardization

In [46]:
num_pipeline = Pipeline([
    ("MinMax_Scaler", MinMaxScaler()),
    ('Standered_Scaler', StandardScaler())
])

## ColumnTransformer and OneHotEncoding

In [47]:
column_trans = ColumnTransformer(transformers = [
    ("numerical", num_pipeline, numerical_columns),
    ("categorical", OneHotEncoder(sparse = False, handle_unknown='ignore'), categorical_columns),
    ], remainder = 'passthrough')

## Model Building

In [48]:
from sklearn.metrics import explained_variance_score, mean_absolute_error, r2_score, mean_squared_error

In [49]:
from time import time

from sklearn.linear_model import Ridge

In [50]:
regressors = [
    Ridge()
]

In [51]:
# head = 10
for model in regressors:    # [:head]:
    start = time()
    
    
    pipe = Pipeline([
    ('step1', column_trans),
    ('step2', model)
    ])
    
    
    pipe.fit(X_train, y_train)


    train_time = time() - start
    start = time()
    predict_time = time()-start
    
    y_pred = pipe.predict(X_test)
    
    print(model)
    print("\t Training Time: %0.3fs" % train_time)
    print("\t Prediction Time: %0.3fs" % predict_time)
    print("\t Explained Variance Score:", explained_variance_score(y_test, y_pred))
    print("\t Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
    print("\t Mean Squared Error:", mean_squared_error(y_test, y_pred))
    print("\t Root Mean Squared Error:", mean_squared_error(y_test, y_pred, squared = False))
    print("\t R2 Score:", r2_score(y_test, y_pred))
    print()

Ridge()
	 Training Time: 0.018s
	 Prediction Time: 0.000s
	 Explained Variance Score: 0.8693716553165736
	 Mean Absolute Error: 11293.331735078144
	 Mean Squared Error: 228015503.55729485
	 R2 Score: 0.8677919202632969



## Model Tuning with HyperParameters

In [52]:
params = {'alpha': np.arange(3,4,0.001)}

ridge = Ridge()

step2 = GridSearchCV(ridge, params, cv = 5, scoring = 'neg_mean_absolute_error')

pipe = Pipeline([
    ('step1', column_trans),
    ('step2', step2)
])

pipe.fit(X_train,y_train)

y_pred = pipe.predict(X_test)

print('R2 score: ', r2_score(y_test,y_pred))
print('MAE: ', mean_absolute_error(y_test,y_pred))
print('RMSE: ', mean_squared_error(y_test, y_pred, squared = False))

R2 score:  0.8628348586267759
MAE:  11603.640645346744
RMSE:  15380.663391066044


In [53]:
# options = {
#            'BRAND': ['HP'], 
#            'CPU BRAND': ['Intel'], 
#            'CPU CORE': ['Core i7'], 
#            'CPU GENERATION': [7], 
#            'CPU FAMILY': ['U'], 
#            'RAM SIZE': [16], 
#            'RAM(DDR) TYPE': [3],
#            'DISK TYPE': ['BOTH'], 
#            'SSD SIZE': [512], 
#            'HDD SIZE': [500],
#            'GPU BRAND': ['NVIDIA'], 
#            'GPU TYPE': ['GeForce GTX 1050'], 
#            'SCREEN SIZE': [14.0],
#            'SCREEN RESOLUTION': ['HD'], 
#            'STATE': ['NEW']
#     }
# xteristics = pd.DataFrame(data = options)
# xteristics

In [54]:
# sunny = pipe.predict(xteristics)
# sunny = sunny.round()
# print("Predicted Price: ", sunny, "DA")

## Exporting the model with Pickle

In [55]:
import pickle
pickle.dump(pipe, open('model.pkl', 'wb'))