# Importing Libraries

In [None]:
import pandas as pd

In [None]:
import sklearn
print(sklearn.__version__)

## Importing Datasets

In [None]:
import os

# Get the absolute path of the current working directory
current_directory = os.getcwd()

# Construct the relative path to your data folder and the Excel file
relative_path = 'data/laptop_data.xlsx' ## NOTE HERE I TRIED WITH OUR ALREADY CLEANED DATA BUT REALISTICALLY, WE REMOVE OUTLIERS FROM OUR DATA WHEN THEY ARE NOT YET CLEANED UP

# Combine the current directory with the relative path to get the absolute path to your Excel file
file_path = os.path.join(current_directory, relative_path)

# Read the Excel file using the constructed file path
dataset = pd.read_excel(file_path)

In [None]:
dataset.shape

## Select the important columns necessary for training/building our model

In [None]:
dataset = dataset[['BRAND', 'CPU BRAND', 'CPU CORE', 'CPU GENERATION', 'CPU FAMILY', 'RAM SIZE', 'RAM(DDR) TYPE', 'DISK TYPE', 'SSD SIZE',
                   'HDD SIZE', 'GPU BRAND', 'GPU TYPE', 'SCREEN SIZE', 'SCREEN RESOLUTION', 'STATE', 'PRICE']]

## Convert the DataType of categorical datas

In [None]:
## CATEGORICAL
dataset["BRAND"] = dataset["BRAND"].astype("str")
dataset["CPU BRAND"] = dataset["CPU BRAND"].astype("str")
dataset["CPU CORE"] = dataset["CPU CORE"].astype("str")
dataset["CPU FAMILY"] = dataset["CPU FAMILY"].astype("str")
dataset["DISK TYPE"] = dataset["DISK TYPE"].astype("str")
dataset["GPU BRAND"] = dataset["GPU BRAND"].astype("str")
dataset["GPU TYPE"] = dataset["GPU TYPE"].astype("str")
dataset["SCREEN RESOLUTION"] = dataset["SCREEN RESOLUTION"].astype("str")
dataset["STATE"] = dataset["STATE"].astype("str")

In [None]:
type(dataset)

In [None]:
dataset.values

## Selecting the X and y columns

- **X**: represent our FEATURES that our model will learn from

- **y**: represent the Label column our model will train with

In [None]:
X = dataset.drop('PRICE', axis = 'columns')

In [None]:
X.shape

In [None]:
y = dataset.PRICE

In [None]:
y = y.values.reshape((254,1))
y.shape

## Splitting data into Train and Test sets

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
categorical_data = dataset[['BRAND', 'CPU BRAND', 'CPU CORE', 'CPU FAMILY', 'DISK TYPE',
               'GPU BRAND', 'GPU TYPE', 'SCREEN RESOLUTION', 'STATE']]

In [None]:
categorical_columns = ['BRAND', 'CPU BRAND', 'CPU CORE', 'CPU FAMILY', 'DISK TYPE',
               'GPU BRAND', 'GPU TYPE', 'SCREEN RESOLUTION', 'STATE']

In [None]:
numerical_data = dataset[['CPU GENERATION', 'RAM SIZE', 'RAM(DDR) TYPE', 'SSD SIZE', 'HDD SIZE', 'SCREEN SIZE']]

In [None]:
numerical_columns = ['CPU GENERATION', 'RAM SIZE', 'RAM(DDR) TYPE', 'SSD SIZE', 'HDD SIZE', 'SCREEN SIZE']

## Importing Libraries

In [None]:
from sklearn.preprocessing import OneHotEncoder

from sklearn.preprocessing import StandardScaler

from sklearn.preprocessing import MinMaxScaler

from sklearn.compose import ColumnTransformer

from sklearn.pipeline import Pipeline

## Normalization and Standardization

In [None]:
num_pipeline = Pipeline([
    ("MinMax_Scaler", MinMaxScaler()),
    ('Standard_Scaler', StandardScaler())
])

## ColumnTransformer and OneHotEncoding

In [None]:
column_trans = ColumnTransformer(transformers = [
    ("numerical", num_pipeline, numerical_columns),
    ("categorical", OneHotEncoder(sparse = False, handle_unknown='ignore'), categorical_columns),
], remainder = 'passthrough')

## Model Building

In [None]:
from sklearn.metrics import explained_variance_score, mean_absolute_error, r2_score, mean_squared_error

In [None]:
from time import time

from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Lars
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoLars
from sklearn.linear_model import LassoLarsCV
from sklearn.linear_model import LassoLarsIC
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.linear_model import ARDRegression
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.linear_model import MultiTaskLasso
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import TheilSenRegressor

In [None]:
regressors = [
    Ridge(),
    RidgeCV(),
    LinearRegression(),
    SGDRegressor(),
    ElasticNet(),
    Lars(),
    Lasso(),
    LassoLars(),
    LassoLarsCV(),
    LassoLarsIC(),
    OrthogonalMatchingPursuit(),
    ARDRegression(),
    BayesianRidge(),
    MultiTaskElasticNet(),
    MultiTaskLasso(),
    HuberRegressor(),
    RANSACRegressor(),
    PassiveAggressiveRegressor(),
    TheilSenRegressor()
]

In [None]:
# head = 10
for model in regressors:    # [:head]:
    start = time()
    
    
    pipe = Pipeline([
        ('step1', column_trans),
        ('step2', model)
    ])
    
    
    pipe.fit(X_train, y_train)


    train_time = time() - start
    start = time()
    predict_time = time()-start
    
    y_prediction = pipe.predict(X_test)
    
    print(model)
    print("\t Training Time: %0.3fs" % train_time)
    print("\t Prediction Time: %0.3fs" % predict_time)
    print("\t Explained Variance Score:", explained_variance_score(y_test, y_prediction))
    print("\t Mean Absolute Error:", mean_absolute_error(y_test, y_prediction))
    print("\t Mean Squared Error:", mean_squared_error(y_test, y_prediction))
    print("\t Root Mean Squared Error:", mean_squared_error(y_test, y_prediction, squared = False))
    print("\t R2 Score:", r2_score(y_test, y_prediction))
    print()