In [None]:
!pip install numpy
!pip install pandas

In [None]:
import numpy as np
import pandas as pd

In [None]:
data = pd.read_csv('laptop_price.csv', encoding='latin-1')

# Data Analysis

In [None]:
data.head(2)

In [None]:
data.shape

In [None]:
data.isnull().sum()

In [None]:
data.info()

In [None]:
data['Ram'] = data['Ram'].str.replace('GB','').astype('int32')
data['Weight'] = data['Weight'].str.replace('kg','').astype('float32')

In [None]:
data.head(2)

In [None]:
data.corr()['Price_euros']

### Company Column

In [None]:
data['Company'].value_counts()

In [None]:
def add_company(inpt):
    if inpt == 'Samsung' or inpt == 'Razer' or inpt == 'Mediacom' or inpt == 'Microsoft'or inpt == 'Xiaomi'or inpt == 'Vero'or inpt == 'Chuwi'or inpt == 'Google'or inpt == 'Fujitsu'or inpt == 'LG'or inpt == 'Huawei':
        return 'Other'
    else:
        return inpt

In [None]:
data['Company'] = data['Company'].apply(add_company)

In [None]:
data['Company'].value_counts()

### Product Column 

In [None]:
len(data['Product'].value_counts())

### TypeName Column 

In [None]:
data['TypeName'].value_counts()

### ScreenResolution Column 

In [None]:
data['ScreenResolution'].value_counts()

In [None]:
data['Touchscreen'] = data['ScreenResolution'].apply(lambda x:1 if 'Touchscreen' in x else 0)
data['Ips'] = data['ScreenResolution'].apply(lambda x:1 if 'IPS' in x else 0)

In [None]:
data.head(2)

### Cpu Column 

In [None]:
data['Cpu'].value_counts()

In [None]:
data['cpu_name'] = data['Cpu'].apply(lambda x:" ".join(x.split()[0:3]))

In [None]:
data['cpu_name'].value_counts()

In [None]:
def set_processor(name):
    if name == 'Intel Core i7' or name == 'Intel Core i5' or name == 'Intel Core i3':
        return name
    else:
        if name.split()[0] == 'AMD':
            return 'AMD'
        else:
            return 'Other'

In [None]:
data['cpu_name'] = data['cpu_name'].apply(set_processor)

In [None]:
data['cpu_name'].value_counts()

### Ram Column 

In [None]:
data['Ram'].value_counts()

### Gpu Column 

In [None]:
data['Gpu'].value_counts()

In [None]:
data['gpu_name'] = data['Gpu'].apply(lambda x:" ".join(x.split()[0:1]))

In [None]:
data['gpu_name'].value_counts()

In [None]:
data.shape

In [None]:
data = data[data['gpu_name'] != 'ARM']

In [None]:
data.shape

In [None]:
data.head(2)

### OpSys Column 

In [None]:
data['OpSys'].value_counts()

In [None]:
def set_os(inpt):
    if inpt == 'Windows 10' or inpt == 'Windows 7' or inpt == 'Windows 10 S':
        return 'Windows'
    elif inpt == 'macOS' or inpt == 'Mac OS X':
        return 'Mac'
    elif inpt == 'Linux':
        return inpt
    else:
        return 'Other'

In [None]:
data['OpSys'] = data['OpSys'].apply(set_os)

In [None]:
data['OpSys'].value_counts()

In [None]:
data.head(2)

### Drop unnecessary columns 

In [None]:
data = data.drop(columns=['laptop_ID', 'Inches', 'Product', 'ScreenResolution', 'Cpu', 'Gpu'])

In [None]:
data.head()

### One-Hot Encoding 

In [None]:
data = pd.get_dummies(data)

In [None]:
data.head(2)

In [None]:
data.shape

# Model Building and Selection

In [None]:
X = data.drop('Price_euros', axis=1)
y = data['Price_euros']

In [None]:
!pip install sklearn

### Divide dataset into training and testing set 

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [None]:
X_train.shape, X_test.shape

### model training 

In [None]:
def model_acc(model):
    model.fit(X_train, y_train)
    acc = model.score(X_test, y_test)
    print(str(model)+ ' --> ' +str(acc))

In [None]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
model_acc(lr)

from sklearn.linear_model import Lasso
lasso = Lasso()
model_acc(lasso)

from sklearn.tree import DecisionTreeRegressor
dt = DecisionTreeRegressor()
model_acc(dt)

from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
model_acc(rf)

### Hyperparameter tunning 

In [None]:
from sklearn.model_selection import GridSearchCV

parameters = {'n_estimators':[10, 50, 100],
              'criterion':['squared_error','absolute_error','poisson']}

grid_obj = GridSearchCV(estimator=rf, param_grid=parameters)

grid_fit = grid_obj.fit(X_train, y_train)

best_model = grid_fit.best_estimator_

best_model.score(X_test, y_test)

In [None]:
X_test.columns

### Save model 

In [None]:
import pickle
with open('predictor.pickle', 'wb') as file:
    pickle.dump(best_model, file)

In [None]:
pred_value = best_model.predict([[8, 1.3, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0]])
pred_value