# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

# Read Data

In [2]:
data = pd.read_csv("data/Process_dataset.csv")

In [3]:
data.head(5)

Unnamed: 0,Company,TypeName,Inches,panel_type_IPS,DisplayType,IsTouchscreen,Resolution_Width,Resolution_Height,CPU_brand,CPU_series_model_gen_type,CPU_speed,Ram_GB,Total_Memory_Capacity_GB,Type,GPU_Brand,GPU_series,GPU_model_no,OpSys,Weight_kg,Price
0,Apple,Ultrabook,13.3,1.0,Retina Display,0.0,2560.0,1600.0,Intel,Core i5,2.3,8.0,128.0,SSD,Intel,Iris Plus Graphics,640,macOS,1.37,71378.6832
1,Apple,Ultrabook,13.3,0.0,,0.0,1440.0,900.0,Intel,Core i5,1.8,8.0,128.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34,47895.5232
2,HP,Notebook,15.6,0.0,Full HD,0.0,1920.0,1080.0,Intel,Core i5 7200U,2.5,8.0,256.0,SSD,Intel,Hd Graphics,620,No OS,1.86,30636.0
3,Apple,Ultrabook,15.4,1.0,Retina Display,0.0,2880.0,1800.0,Intel,Core i7,2.7,16.0,512.0,SSD,Amd,Radeon Pro,455,macOS,1.83,135195.336
4,Apple,Ultrabook,13.3,1.0,Retina Display,0.0,2560.0,1600.0,Intel,Core i5,3.1,8.0,256.0,SSD,Intel,Iris Plus Graphics,650,macOS,1.37,96095.808


# Check Null Values

In [4]:
data.isnull().sum()

Company                        0
TypeName                       0
Inches                         0
panel_type_IPS                 0
DisplayType                  361
IsTouchscreen                  0
Resolution_Width               0
Resolution_Height              0
CPU_brand                      0
CPU_series_model_gen_type      0
CPU_speed                      0
Ram_GB                         0
Total_Memory_Capacity_GB       1
Type                           1
GPU_Brand                      0
GPU_series                     1
GPU_model_no                  70
OpSys                          0
Weight_kg                      1
Price                          0
dtype: int64

In [5]:
null_rows = data[data.isnull().any(axis=1)]
null_rows

Unnamed: 0,Company,TypeName,Inches,panel_type_IPS,DisplayType,IsTouchscreen,Resolution_Width,Resolution_Height,CPU_brand,CPU_series_model_gen_type,CPU_speed,Ram_GB,Total_Memory_Capacity_GB,Type,GPU_Brand,GPU_series,GPU_model_no,OpSys,Weight_kg,Price
1,Apple,Ultrabook,13.3,0.0,,0.0,1440.0,900.0,Intel,Core i5,1.8,8.0,128.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34,47895.5232
5,Acer,Notebook,15.6,0.0,,0.0,1366.0,768.0,Amd,A9-Series 9420,3.0,4.0,500.0,HDD,Amd,Radeon,R5,Windows 10,2.10,21312.0000
6,Apple,Ultrabook,15.4,1.0,Retina Display,0.0,2880.0,1800.0,Intel,Core i7,2.2,16.0,256.0,Flash Storage,Intel,Iris Pro Graphics,,Mac OS X,2.04,114017.6016
7,Apple,Ultrabook,13.3,0.0,,0.0,1440.0,900.0,Intel,Core i5,1.8,8.0,256.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34,61735.5360
10,HP,Notebook,15.6,0.0,,0.0,1366.0,768.0,Intel,Core i5 7200U,2.5,4.0,500.0,HDD,Intel,Hd Graphics,620,No OS,1.86,20986.9920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266,HP,Netbook,11.6,0.0,,0.0,1366.0,768.0,Intel,Celeron Dual Core N3060,1.6,2.0,32.0,Flash Storage,Intel,Hd Graphics,400,Windows 10,1.17,11135.5200
1267,Asus,Notebook,15.6,0.0,,0.0,1366.0,768.0,Intel,Core i7 6500U,2.5,4.0,500.0,HDD,Nvidia,Geforce,920M,Windows 10,2.20,38378.6496
1270,Lenovo,Notebook,14.0,0.0,,0.0,1366.0,768.0,Intel,Celeron Dual Core N3050,1.6,2.0,64.0,Flash Storage,Intel,Hd Graphics,,Windows 10,1.50,12201.1200
1271,HP,Notebook,15.6,0.0,,0.0,1366.0,768.0,Intel,Core i7 6500U,2.5,6.0,1024.0,HDD,Amd,Radeon R5,M330,Windows 10,2.19,40705.9200


In [6]:
null_display_type = data[data["DisplayType"].isnull()]
null_display_type

Unnamed: 0,Company,TypeName,Inches,panel_type_IPS,DisplayType,IsTouchscreen,Resolution_Width,Resolution_Height,CPU_brand,CPU_series_model_gen_type,CPU_speed,Ram_GB,Total_Memory_Capacity_GB,Type,GPU_Brand,GPU_series,GPU_model_no,OpSys,Weight_kg,Price
1,Apple,Ultrabook,13.3,0.0,,0.0,1440.0,900.0,Intel,Core i5,1.8,8.0,128.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34,47895.5232
5,Acer,Notebook,15.6,0.0,,0.0,1366.0,768.0,Amd,A9-Series 9420,3.0,4.0,500.0,HDD,Amd,Radeon,R5,Windows 10,2.10,21312.0000
7,Apple,Ultrabook,13.3,0.0,,0.0,1440.0,900.0,Intel,Core i5,1.8,8.0,256.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34,61735.5360
10,HP,Notebook,15.6,0.0,,0.0,1366.0,768.0,Intel,Core i5 7200U,2.5,4.0,500.0,HDD,Intel,Hd Graphics,620,No OS,1.86,20986.9920
21,HP,Notebook,15.6,0.0,,0.0,1366.0,768.0,Amd,E-Series E2-9000e,1.5,4.0,500.0,HDD,Amd,Radeon,R2,No OS,1.86,13746.2400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266,HP,Netbook,11.6,0.0,,0.0,1366.0,768.0,Intel,Celeron Dual Core N3060,1.6,2.0,32.0,Flash Storage,Intel,Hd Graphics,400,Windows 10,1.17,11135.5200
1267,Asus,Notebook,15.6,0.0,,0.0,1366.0,768.0,Intel,Core i7 6500U,2.5,4.0,500.0,HDD,Nvidia,Geforce,920M,Windows 10,2.20,38378.6496
1270,Lenovo,Notebook,14.0,0.0,,0.0,1366.0,768.0,Intel,Celeron Dual Core N3050,1.6,2.0,64.0,Flash Storage,Intel,Hd Graphics,,Windows 10,1.50,12201.1200
1271,HP,Notebook,15.6,0.0,,0.0,1366.0,768.0,Intel,Core i7 6500U,2.5,6.0,1024.0,HDD,Amd,Radeon R5,M330,Windows 10,2.19,40705.9200


# Check Unique Values in Column

In [7]:
data.nunique()

Company                       19
TypeName                       6
Inches                        24
panel_type_IPS                 2
DisplayType                    7
IsTouchscreen                  2
Resolution_Width              13
Resolution_Height             10
CPU_brand                      3
CPU_series_model_gen_type     93
CPU_speed                     25
Ram_GB                        10
Total_Memory_Capacity_GB      22
Type                           7
GPU_Brand                      4
GPU_series                    25
GPU_model_no                  82
OpSys                          9
Weight_kg                    180
Price                        777
dtype: int64

In [27]:
#DisplayTupe and TouchScreen 
null_display_touch = data[data["DisplayType"].notnull() & (data["IsTouchscreen"] == 1)]
null_display_touch.shape[0]


142

In [28]:
data["DisplayType"].value_counts()

DisplayType
Full HD           823
4K Ultra HD        39
Quad HD+           28
Retina Display     17
/ 4K Ultra HD       3
/ Quad HD+          1
/ Full HD           1
Name: count, dtype: int64

In [None]:
data["GPU"]

## Separate Numerical and Categorical features


In [11]:
numerical_cols = data.select_dtypes(include=['float', 'float64']).columns.tolist()
categorical_cols = data.select_dtypes(include=['object', 'category']).columns.tolist()

print (f"{numerical_cols} \n {categorical_cols}")

['Inches', 'panel_type_IPS', 'IsTouchscreen', 'Resolution_Width', 'Resolution_Height', 'CPU_speed', 'Ram_GB', 'Total_Memory_Capacity_GB', 'Weight_kg', 'Price'] 
 ['Company', 'TypeName', 'DisplayType', 'CPU_brand', 'CPU_series_model_gen_type', 'Type', 'GPU_Brand', 'GPU_series', 'GPU_model_no', 'OpSys']


# Exploratory Data Analysis (EDA)

In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1273 entries, 0 to 1272
Data columns (total 20 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Company                    1273 non-null   object 
 1   TypeName                   1273 non-null   object 
 2   Inches                     1273 non-null   float64
 3   panel_type_IPS             1273 non-null   float64
 4   DisplayType                912 non-null    object 
 5   IsTouchscreen              1273 non-null   float64
 6   Resolution_Width           1273 non-null   float64
 7   Resolution_Height          1273 non-null   float64
 8   CPU_brand                  1273 non-null   object 
 9   CPU_series_model_gen_type  1273 non-null   object 
 10  CPU_speed                  1273 non-null   float64
 11  Ram_GB                     1273 non-null   float64
 12  Total_Memory_Capacity_GB   1272 non-null   float64
 13  Type                       1272 non-null   objec

In [10]:
data.describe()

Unnamed: 0,Inches,panel_type_IPS,IsTouchscreen,Resolution_Width,Resolution_Height,CPU_speed,Ram_GB,Total_Memory_Capacity_GB,Weight_kg,Price
count,1273.0,1273.0,1273.0,1273.0,1273.0,1273.0,1273.0,1272.0,1272.0,1273.0
mean,15.129929,0.282011,0.145326,1894.219953,1070.538885,2.298916,8.462687,621.525157,2.077618,59955.814073
std,1.953925,0.450156,0.352568,494.195581,284.400487,0.507118,5.564408,481.133282,0.807808,37332.251005
min,10.1,0.0,0.0,1366.0,768.0,0.9,1.0,8.0,0.0002,9270.72
25%,14.0,0.0,0.0,1600.0,900.0,2.0,4.0,256.0,1.5,31914.72
50%,15.6,0.0,0.0,1920.0,1080.0,2.5,8.0,500.0,2.04,52161.12
75%,15.6,1.0,0.0,1920.0,1080.0,2.7,8.0,1024.0,2.32,79333.3872
max,35.6,1.0,1.0,3840.0,2160.0,3.6,64.0,2560.0,11.1,324954.72
