In [1]:
#importing necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
#without encoding, getting utf-8 error.
laptop_data = pd.read_csv('laptops.csv', encoding='latin-1')
laptop_data.head()

Unnamed: 0.1,Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


Name of the dataframe itself incates that the data is of laptops. Our data contains specifications of the laptops: Company name, Product name, Type of the product, Screen size in inches, Screen Resolution, CPU, System Memory (RAM), Storage Type (Memory), Graphics Processing Unit(GPU), Operating System(opSys), Weight of the product, Product Weight (in kg) and price (in euros).

In [3]:
laptop_data.shape

(1303, 13)

We don't have much more data; we have just 1303 rows and 13 columns. More data can make our prediction better.

In [4]:
print(laptop_data['Company'].unique())

['Apple' 'HP' 'Acer' 'Asus' 'Dell' 'Lenovo' 'Chuwi' 'MSI' 'Microsoft'
 'Toshiba' 'Huawei' 'Xiaomi' 'Vero' 'Razer' 'Mediacom' 'Samsung' 'Google'
 'Fujitsu' 'LG']


In [5]:
print(laptop_data['Company'].value_counts())

Dell         297
Lenovo       297
HP           274
Asus         158
Acer         103
MSI           54
Toshiba       48
Apple         21
Samsung        9
Razer          7
Mediacom       7
Microsoft      6
Xiaomi         4
Vero           4
Chuwi          3
Google         3
Fujitsu        3
LG             3
Huawei         2
Name: Company, dtype: int64


We have different 19 company's data.

In [6]:
laptop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   Product           1303 non-null   object 
 3   TypeName          1303 non-null   object 
 4   Inches            1303 non-null   float64
 5   ScreenResolution  1303 non-null   object 
 6   Cpu               1303 non-null   object 
 7   Ram               1303 non-null   object 
 8   Memory            1303 non-null   object 
 9   Gpu               1303 non-null   object 
 10  OpSys             1303 non-null   object 
 11  Weight            1303 non-null   object 
 12  Price_euros       1303 non-null   float64
dtypes: float64(2), int64(1), object(10)
memory usage: 132.5+ KB


We don't have any missing value. Unnamed is not useful, Ram and weight data type shows object, we need to remove 'GB' and 'kg' to make the datatype float. 

In [7]:
laptop_data.duplicated().sum()

0

we have no duplicate value. If we have duplicate value, we need to use drop_duplicates() to drop the duplicate value.

In [8]:
laptop_data.isnull().sum()

Unnamed: 0          0
Company             0
Product             0
TypeName            0
Inches              0
ScreenResolution    0
Cpu                 0
Ram                 0
Memory              0
Gpu                 0
OpSys               0
Weight              0
Price_euros         0
dtype: int64

No Missing Value

# Removing Unnecessary Columns

In [9]:
laptop_data.drop(columns=['Unnamed: 0'],inplace=True)
laptop_data.sample(10)

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
1246,Dell,Latitude 5480,Notebook,14.0,1366x768,Intel Core i5 7200U 2.5GHz,4GB,500GB HDD,Intel HD Graphics 620,Windows 10,1.6kg,875.0
693,Lenovo,ThinkPad X1,2 in 1 Convertible,14.0,Touchscreen 2560x1440,Intel Core i7 7500U 2.7GHz,8GB,256GB SSD,Intel HD Graphics 620,Windows 10,1.42kg,2590.0
322,Asus,ZenBook UX410UA-GV183T,Notebook,14.0,Full HD 1920x1080,Intel Core i7 7500U 2.7GHz,8GB,256GB SSD,Intel HD Graphics 620,Windows 10,2kg,1094.0
663,HP,15-ay047nv (i3-6006U/6GB/1TB/Radeon,Notebook,15.6,1920x1080,Intel Core i3 6006U 2GHz,6GB,1TB HDD,AMD Radeon R5 M430,Windows 10,2.04kg,539.0
772,Toshiba,Tecra A40-C-1E5,Notebook,14.0,IPS Panel 1366x768,Intel Core i5 6200U 2.3GHz,4GB,128GB SSD,Intel HD Graphics 520,Windows 10,1.75kg,1020.0
1088,HP,Chromebook 13,Notebook,13.3,Quad HD+ 3200x1800,Intel Pentium Dual Core 4405Y 1.5GHz,4GB,32GB Flash Storage,Intel HD Graphics 515,Chrome OS,1.29kg,615.0
867,Lenovo,ThinkPad Yoga,2 in 1 Convertible,13.3,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 7500U 2.7GHz,8GB,256GB SSD,Intel HD Graphics 620,Windows 10,1.37kg,1950.0
19,Dell,XPS 13,Ultrabook,13.3,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i5 8250U 1.6GHz,8GB,128GB SSD,Intel UHD Graphics 620,Windows 10,1.22kg,979.0
762,Google,Pixelbook (Core,Ultrabook,12.3,Touchscreen 2400x1600,Intel Core i5 7Y57 1.2GHz,8GB,256GB SSD,Intel HD Graphics 615,Chrome OS,1.1kg,1559.0
632,Lenovo,V330-15IKB (i5-8250U/4GB/256GB/FHD/W10),Notebook,15.6,Full HD 1920x1080,Intel Core i5 8250U 1.6GHz,4GB,256GB SSD,Intel UHD Graphics 620,Windows 10,1.8kg,829.0


We removed first column 'Unnamed: 0' permanently by using inplace=True.

# Cleaning 'Ram' and 'Weight' columns

In [10]:
laptop_data['Ram'] = laptop_data['Ram'].str.replace('GB', '')
laptop_data['Weight'] = laptop_data['Weight'].str.replace('kg', ' ')

In [11]:
laptop_data.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37,1339.69
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34,898.94
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,1.86,575.0
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,1.83,2537.45
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37,1803.6


In [12]:
laptop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Company           1303 non-null   object 
 1   Product           1303 non-null   object 
 2   TypeName          1303 non-null   object 
 3   Inches            1303 non-null   float64
 4   ScreenResolution  1303 non-null   object 
 5   Cpu               1303 non-null   object 
 6   Ram               1303 non-null   object 
 7   Memory            1303 non-null   object 
 8   Gpu               1303 non-null   object 
 9   OpSys             1303 non-null   object 
 10  Weight            1303 non-null   object 
 11  Price_euros       1303 non-null   float64
dtypes: float64(2), object(10)
memory usage: 122.3+ KB


For better prediction, we need to change data in numeric form. We removed 'GB' and 'kg' from Ram and weight. But still the datatype of the columns is object. In order to change the datatype of this two column, we will use astype() and pass int and float respectively.

In [13]:
laptop_data['Ram'] = laptop_data['Ram'].astype('int32')
laptop_data['Weight'] = laptop_data['Weight'].astype('float32')

In [14]:
laptop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Company           1303 non-null   object 
 1   Product           1303 non-null   object 
 2   TypeName          1303 non-null   object 
 3   Inches            1303 non-null   float64
 4   ScreenResolution  1303 non-null   object 
 5   Cpu               1303 non-null   object 
 6   Ram               1303 non-null   int32  
 7   Memory            1303 non-null   object 
 8   Gpu               1303 non-null   object 
 9   OpSys             1303 non-null   object 
 10  Weight            1303 non-null   float32
 11  Price_euros       1303 non-null   float64
dtypes: float32(1), float64(2), int32(1), object(8)
memory usage: 112.1+ KB


Now, the Ram and Weight column changed to int and float respectively.

# Converting 'weight' and 'price' in desirable unit

In [15]:
laptop_data ['Weight_lb'] = laptop_data ['Weight'] * 2.205
laptop_data['$Price'] = round(laptop_data['Price_euros'] * 1.14,2)
laptop_data

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Weight_lb,$Price
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,3.02085,1527.25
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34,898.94,2.95470,1024.79
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,1.86,575.00,4.10130,655.50
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,1.83,2537.45,4.03515,2892.69
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37,1803.60,3.02085,2056.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,4,128GB SSD,Intel HD Graphics 520,Windows 10,1.80,638.00,3.96900,727.32
1299,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,IPS Panel Quad HD+ / Touchscreen 3200x1800,Intel Core i7 6500U 2.5GHz,16,512GB SSD,Intel HD Graphics 520,Windows 10,1.30,1499.00,2.86650,1708.86
1300,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2,64GB Flash Storage,Intel HD Graphics,Windows 10,1.50,229.00,3.30750,261.06
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19,764.00,4.82895,870.96


We are making this model for USA. The pound is a unit of mass used in United States customary systems of measurement. We changed the weight to lb from kg, price to dollar from euro. Saved these columns permanently.

In [16]:
laptop_data.drop(['Weight', 'Price_euros'], axis=1, inplace=True)

# Fetching the information from 'ScreenResolution' column

In [17]:
laptop_data['TouchScreen'] = laptop_data['ScreenResolution'].apply(lambda x:1 if 'Touchscreen' in x else 0)
laptop_data.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight_lb,$Price,TouchScreen
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,3.02085,1527.25,0
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,2.9547,1024.79,0
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,4.1013,655.5,0
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,4.03515,2892.69,0
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,3.02085,2056.1,0


In [18]:
laptop_data['IPS'] = laptop_data['ScreenResolution'].apply(lambda x:1 if 'IPS' in x else 0)
laptop_data.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,3.02085,1527.25,0,1
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,2.9547,1024.79,0,0
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,4.1013,655.5,0,0
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,4.03515,2892.69,0,1
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,3.02085,2056.1,0,1


In [19]:
laptop_data['ScreenResolution']=laptop_data['ScreenResolution'].apply(lambda x: x.split()[-1])
laptop_data.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS
0,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,3.02085,1527.25,0,1
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,2.9547,1024.79,0,0
2,HP,250 G6,Notebook,15.6,1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,4.1013,655.5,0,0
3,Apple,MacBook Pro,Ultrabook,15.4,2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,4.03515,2892.69,0,1
4,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,3.02085,2056.1,0,1


In [20]:
#laptop_data['ScreenResolution']=laptop_data['ScreenResolution'].str.extract('(\d\d\d\dx\d\d\d\d)')
#laptop_data.head()

ScreenResolution column contains informations like Touchscreen, IPS and resolution of the screen. By using individual lambda function, we fetch the info and saved in diffeent columns. Further, we by using split function, we split x resolution and y resolution.

In [21]:
laptop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Company           1303 non-null   object 
 1   Product           1303 non-null   object 
 2   TypeName          1303 non-null   object 
 3   Inches            1303 non-null   float64
 4   ScreenResolution  1303 non-null   object 
 5   Cpu               1303 non-null   object 
 6   Ram               1303 non-null   int32  
 7   Memory            1303 non-null   object 
 8   Gpu               1303 non-null   object 
 9   OpSys             1303 non-null   object 
 10  Weight_lb         1303 non-null   float32
 11  $Price            1303 non-null   float64
 12  TouchScreen       1303 non-null   int64  
 13  IPS               1303 non-null   int64  
dtypes: float32(1), float64(2), int32(1), int64(2), object(8)
memory usage: 132.5+ KB


In [22]:
new_df = laptop_data['ScreenResolution'].str.split('x', expand=True)
new_df

Unnamed: 0,0,1
0,2560,1600
1,1440,900
2,1920,1080
3,2880,1800
4,2560,1600
...,...,...
1298,1920,1080
1299,3200,1800
1300,1366,768
1301,1366,768


In [23]:
laptop_data['X_res'] = new_df[0]
laptop_data['Y_res'] = new_df[1]

In [24]:
laptop_data.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS,X_res,Y_res
0,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,3.02085,1527.25,0,1,2560,1600
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,2.9547,1024.79,0,0,1440,900
2,HP,250 G6,Notebook,15.6,1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,4.1013,655.5,0,0,1920,1080
3,Apple,MacBook Pro,Ultrabook,15.4,2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,4.03515,2892.69,0,1,2880,1800
4,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,3.02085,2056.1,0,1,2560,1600


In [25]:
laptop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Company           1303 non-null   object 
 1   Product           1303 non-null   object 
 2   TypeName          1303 non-null   object 
 3   Inches            1303 non-null   float64
 4   ScreenResolution  1303 non-null   object 
 5   Cpu               1303 non-null   object 
 6   Ram               1303 non-null   int32  
 7   Memory            1303 non-null   object 
 8   Gpu               1303 non-null   object 
 9   OpSys             1303 non-null   object 
 10  Weight_lb         1303 non-null   float32
 11  $Price            1303 non-null   float64
 12  TouchScreen       1303 non-null   int64  
 13  IPS               1303 non-null   int64  
 14  X_res             1303 non-null   object 
 15  Y_res             1303 non-null   object 
dtypes: float32(1), float64(2), int32(1), int64

In [26]:
laptop_data['X_res']=laptop_data['X_res'].astype('int')
laptop_data['Y_res']=laptop_data['Y_res'].astype('int')
laptop_data.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS,X_res,Y_res
0,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,3.02085,1527.25,0,1,2560,1600
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,2.9547,1024.79,0,0,1440,900
2,HP,250 G6,Notebook,15.6,1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,4.1013,655.5,0,0,1920,1080
3,Apple,MacBook Pro,Ultrabook,15.4,2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,4.03515,2892.69,0,1,2880,1800
4,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,3.02085,2056.1,0,1,2560,1600


In [27]:
laptop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Company           1303 non-null   object 
 1   Product           1303 non-null   object 
 2   TypeName          1303 non-null   object 
 3   Inches            1303 non-null   float64
 4   ScreenResolution  1303 non-null   object 
 5   Cpu               1303 non-null   object 
 6   Ram               1303 non-null   int32  
 7   Memory            1303 non-null   object 
 8   Gpu               1303 non-null   object 
 9   OpSys             1303 non-null   object 
 10  Weight_lb         1303 non-null   float32
 11  $Price            1303 non-null   float64
 12  TouchScreen       1303 non-null   int64  
 13  IPS               1303 non-null   int64  
 14  X_res             1303 non-null   int32  
 15  Y_res             1303 non-null   int32  
dtypes: float32(1), float64(2), int32(3), int64

# Fetching the information from 'Cpu' column

In [28]:
def processor_type(text):
    if text.split()[0] == 'Intel':
        return 'Intel Processor'
    else:
        return 'AMD Processor'

In [29]:
laptop_data['Cpu processor'] = laptop_data['Cpu'].apply(processor_type)

'Cpu' column contains type of the processor and speed of the processor. By creating a function called process_type, we fetch processor type and creating lambda function we fetch speed of the processor.

In [30]:
laptop_data['Processor_speed_GHz'] = laptop_data['Cpu'].apply(lambda x: x.split()[-1])
laptop_data

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS,X_res,Y_res,Cpu processor,Processor_speed_GHz
0,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,3.02085,1527.25,0,1,2560,1600,Intel Processor,2.3GHz
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,2.95470,1024.79,0,0,1440,900,Intel Processor,1.8GHz
2,HP,250 G6,Notebook,15.6,1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,4.10130,655.50,0,0,1920,1080,Intel Processor,2.5GHz
3,Apple,MacBook Pro,Ultrabook,15.4,2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,4.03515,2892.69,0,1,2880,1800,Intel Processor,2.7GHz
4,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,3.02085,2056.10,0,1,2560,1600,Intel Processor,3.1GHz
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,1920x1080,Intel Core i7 6500U 2.5GHz,4,128GB SSD,Intel HD Graphics 520,Windows 10,3.96900,727.32,1,1,1920,1080,Intel Processor,2.5GHz
1299,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,3200x1800,Intel Core i7 6500U 2.5GHz,16,512GB SSD,Intel HD Graphics 520,Windows 10,2.86650,1708.86,1,1,3200,1800,Intel Processor,2.5GHz
1300,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2,64GB Flash Storage,Intel HD Graphics,Windows 10,3.30750,261.06,0,0,1366,768,Intel Processor,1.6GHz
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6,1TB HDD,AMD Radeon R5 M330,Windows 10,4.82895,870.96,0,0,1366,768,Intel Processor,2.5GHz


In [31]:
laptop_data['Processor_speed_GHz']=laptop_data['Processor_speed_GHz'].str.replace('GHz', '')
laptop_data

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS,X_res,Y_res,Cpu processor,Processor_speed_GHz
0,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,3.02085,1527.25,0,1,2560,1600,Intel Processor,2.3
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,2.95470,1024.79,0,0,1440,900,Intel Processor,1.8
2,HP,250 G6,Notebook,15.6,1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,4.10130,655.50,0,0,1920,1080,Intel Processor,2.5
3,Apple,MacBook Pro,Ultrabook,15.4,2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,4.03515,2892.69,0,1,2880,1800,Intel Processor,2.7
4,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,3.02085,2056.10,0,1,2560,1600,Intel Processor,3.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,1920x1080,Intel Core i7 6500U 2.5GHz,4,128GB SSD,Intel HD Graphics 520,Windows 10,3.96900,727.32,1,1,1920,1080,Intel Processor,2.5
1299,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,3200x1800,Intel Core i7 6500U 2.5GHz,16,512GB SSD,Intel HD Graphics 520,Windows 10,2.86650,1708.86,1,1,3200,1800,Intel Processor,2.5
1300,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2,64GB Flash Storage,Intel HD Graphics,Windows 10,3.30750,261.06,0,0,1366,768,Intel Processor,1.6
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6,1TB HDD,AMD Radeon R5 M330,Windows 10,4.82895,870.96,0,0,1366,768,Intel Processor,2.5


In [32]:
laptop_data.drop(['Cpu'], axis=1, inplace=True)
laptop_data.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Ram,Memory,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS,X_res,Y_res,Cpu processor,Processor_speed_GHz
0,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,3.02085,1527.25,0,1,2560,1600,Intel Processor,2.3
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,2.9547,1024.79,0,0,1440,900,Intel Processor,1.8
2,HP,250 G6,Notebook,15.6,1920x1080,8,256GB SSD,Intel HD Graphics 620,No OS,4.1013,655.5,0,0,1920,1080,Intel Processor,2.5
3,Apple,MacBook Pro,Ultrabook,15.4,2880x1800,16,512GB SSD,AMD Radeon Pro 455,macOS,4.03515,2892.69,0,1,2880,1800,Intel Processor,2.7
4,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,3.02085,2056.1,0,1,2560,1600,Intel Processor,3.1


Momory column contains type of the memory: SSD, HDD, Flash Storage and Hybrid. Also, it contains the data storage capacity in GB and TB.

# Fetching the information from 'Memory' column

In [33]:
laptop_data['Memory'].value_counts()

256GB SSD                        412
1TB HDD                          223
500GB HDD                        132
512GB SSD                        118
128GB SSD +  1TB HDD              94
128GB SSD                         76
256GB SSD +  1TB HDD              73
32GB Flash Storage                38
2TB HDD                           16
64GB Flash Storage                15
512GB SSD +  1TB HDD              14
1TB SSD                           14
256GB SSD +  2TB HDD              10
1.0TB Hybrid                       9
256GB Flash Storage                8
16GB Flash Storage                 7
32GB SSD                           6
180GB SSD                          5
128GB Flash Storage                4
512GB SSD +  2TB HDD               3
16GB SSD                           3
512GB Flash Storage                2
1TB SSD +  1TB HDD                 2
256GB SSD +  500GB HDD             2
128GB SSD +  2TB HDD               2
256GB SSD +  256GB SSD             2
512GB SSD +  256GB SSD             1
5

In [34]:
#we removed 0 after the decimal point
#we removed 'GB' in memory column, and replaced 'TB' with '000' (1TB = 1000GB)
#some cell contains more than 1 memory info, they have '+' sign between two piece of info. we split those cell and save in new_df2. 
laptop_data['Memory'] = laptop_data['Memory'].astype(str).replace('\.0', '')
laptop_data['Memory'] = laptop_data['Memory'].str.replace('GB', '')
laptop_data['Memory'] = laptop_data['Memory'].str.replace('TB', '000')
new_df2 = laptop_data['Memory'].str.split('+', expand=True)

#we are joing 0th index column of new_df2 with our original dataframe, creating new column 'para1'
#we are joing 1st index column of new_df2 with our original dataframe, creating new column 'para2'
laptop_data['para1'] = new_df2[0]
laptop_data['para2'] = new_df2[1]

laptop_data

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Ram,Memory,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS,X_res,Y_res,Cpu processor,Processor_speed_GHz,para1,para2
0,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,8,128 SSD,Intel Iris Plus Graphics 640,macOS,3.02085,1527.25,0,1,2560,1600,Intel Processor,2.3,128 SSD,
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,8,128 Flash Storage,Intel HD Graphics 6000,macOS,2.95470,1024.79,0,0,1440,900,Intel Processor,1.8,128 Flash Storage,
2,HP,250 G6,Notebook,15.6,1920x1080,8,256 SSD,Intel HD Graphics 620,No OS,4.10130,655.50,0,0,1920,1080,Intel Processor,2.5,256 SSD,
3,Apple,MacBook Pro,Ultrabook,15.4,2880x1800,16,512 SSD,AMD Radeon Pro 455,macOS,4.03515,2892.69,0,1,2880,1800,Intel Processor,2.7,512 SSD,
4,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,8,256 SSD,Intel Iris Plus Graphics 650,macOS,3.02085,2056.10,0,1,2560,1600,Intel Processor,3.1,256 SSD,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,1920x1080,4,128 SSD,Intel HD Graphics 520,Windows 10,3.96900,727.32,1,1,1920,1080,Intel Processor,2.5,128 SSD,
1299,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,3200x1800,16,512 SSD,Intel HD Graphics 520,Windows 10,2.86650,1708.86,1,1,3200,1800,Intel Processor,2.5,512 SSD,
1300,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,2,64 Flash Storage,Intel HD Graphics,Windows 10,3.30750,261.06,0,0,1366,768,Intel Processor,1.6,64 Flash Storage,
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,6,1000 HDD,AMD Radeon R5 M330,Windows 10,4.82895,870.96,0,0,1366,768,Intel Processor,2.5,1000 HDD,


In [35]:
#We create four columns Layer1HDD, Layer1SSD, Layer1Hybrid, Layer1Flash_Storage.
#If para1 contains HDD, then 1 will store in Layer1HDD column; otherwise 0. We did same with all four column.
laptop_data['Layer1HDD'] = laptop_data['para1'].apply(lambda x: 1 if 'HDD' in x else 0)
laptop_data['Layer1SSD'] = laptop_data['para1'].apply(lambda x: 1 if 'SSD' in x else 0)
laptop_data['Layer1Hybrid'] = laptop_data['para1'].apply(lambda x: 1 if 'Hybrid' in x else 0)
laptop_data['Layer1Flash_Storage'] = laptop_data['para1'].apply(lambda x: 1 if 'Flash Storage' in x else 0)

#We create another four columns Layer2HDD, Layer2SSD, Layer2Hybrid, Layer2Flash_Storage.
#If para2 contains HDD, then 1 will store in Layer2HDD column; otherwise 0. We did same with all four column.
laptop_data['para2'].fillna("0", inplace = True)
laptop_data['Layer2HDD'] = laptop_data['para2'].apply(lambda x: 1 if 'HDD' in x else 0)
laptop_data['Layer2SSD'] = laptop_data['para2'].apply(lambda x: 1 if 'SSD' in x else 0)
laptop_data['Layer2Hybrid'] = laptop_data['para2'].apply(lambda x: 1 if 'Hybrid' in x else 0)
laptop_data['Layer2Flash_Storage'] = laptop_data['para2'].apply(lambda x: 1 if 'Flash Storage' in x else 0)

#We removed string from para1 and para2
laptop_data['para1'] = laptop_data['para1'].str.replace('HDD', '')
laptop_data['para1'] = laptop_data['para1'].str.replace('SSD', '')
laptop_data['para1'] = laptop_data['para1'].str.replace('Hybrid', '')
laptop_data['para1'] = laptop_data['para1'].str.replace('Flash Storage', '')

laptop_data['para2'] = laptop_data['para2'].str.replace('HDD', '')
laptop_data['para2'] = laptop_data['para2'].str.replace('SSD', '')
laptop_data['para2'] = laptop_data['para2'].str.replace('Hybrid', '')
laptop_data['para2'] = laptop_data['para2'].str.replace('Flash Storage', '')

laptop_data['para1'] = laptop_data['para1'].astype('float64')
laptop_data['para2'] = laptop_data['para2'].astype('float64')

#Now we have eight columns, with 1 and 0 values. Those shows the type of the memory.
#But we want the actual storage capacity. so we multiplied all 8 columns with para1 and para2.
laptop_data['HDD'] = laptop_data['para1']*laptop_data['Layer1HDD'] + laptop_data['para2']*laptop_data['Layer2HDD']
laptop_data['SSD'] = laptop_data['para1']*laptop_data['Layer1SSD'] + laptop_data['para2']*laptop_data['Layer2SSD']
laptop_data['Hybrid'] = laptop_data['para1']*laptop_data['Layer1Hybrid'] + laptop_data['para2']*laptop_data['Layer2Hybrid']
laptop_data['Flash_storage'] = laptop_data['para1']*laptop_data['Layer1Flash_Storage'] + laptop_data['para2']*laptop_data['Layer2Flash_Storage']

#laptop_data['HDD'] = laptop_data['HDD'].round()
#laptop_data['SSD'] = laptop_data['SSD'].round(decimals=0)
#laptop_data['Hybrid'] = laptop_data['Hybrid'].round(decimals=0)
#laptop_data['Flash_storage'] = laptop_data['Flash_storage'].round(decimals=0)


In [36]:
laptop_data

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Ram,Memory,Gpu,OpSys,Weight_lb,...,Layer1Hybrid,Layer1Flash_Storage,Layer2HDD,Layer2SSD,Layer2Hybrid,Layer2Flash_Storage,HDD,SSD,Hybrid,Flash_storage
0,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,8,128 SSD,Intel Iris Plus Graphics 640,macOS,3.02085,...,0,0,0,0,0,0,0.0,128.0,0.0,0.0
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,8,128 Flash Storage,Intel HD Graphics 6000,macOS,2.95470,...,0,1,0,0,0,0,0.0,0.0,0.0,128.0
2,HP,250 G6,Notebook,15.6,1920x1080,8,256 SSD,Intel HD Graphics 620,No OS,4.10130,...,0,0,0,0,0,0,0.0,256.0,0.0,0.0
3,Apple,MacBook Pro,Ultrabook,15.4,2880x1800,16,512 SSD,AMD Radeon Pro 455,macOS,4.03515,...,0,0,0,0,0,0,0.0,512.0,0.0,0.0
4,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,8,256 SSD,Intel Iris Plus Graphics 650,macOS,3.02085,...,0,0,0,0,0,0,0.0,256.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,1920x1080,4,128 SSD,Intel HD Graphics 520,Windows 10,3.96900,...,0,0,0,0,0,0,0.0,128.0,0.0,0.0
1299,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,3200x1800,16,512 SSD,Intel HD Graphics 520,Windows 10,2.86650,...,0,0,0,0,0,0,0.0,512.0,0.0,0.0
1300,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,2,64 Flash Storage,Intel HD Graphics,Windows 10,3.30750,...,0,1,0,0,0,0,0.0,0.0,0.0,64.0
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,6,1000 HDD,AMD Radeon R5 M330,Windows 10,4.82895,...,0,0,0,0,0,0,1000.0,0.0,0.0,0.0


In [37]:
#Drop unnecessory columns
laptop_data.drop(columns=['Memory', 'para1', 'para2', 'Layer1HDD', 'Layer1SSD', 'Layer1Hybrid', 'Layer1Flash_Storage', 'Layer2HDD', 'Layer2SSD', 'Layer2Hybrid', 'Layer2Flash_Storage'], inplace=True)
laptop_data.sample(10)

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Ram,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS,X_res,Y_res,Cpu processor,Processor_speed_GHz,HDD,SSD,Hybrid,Flash_storage
367,Lenovo,Legion Y520-15IKBN,Gaming,15.6,1920x1080,8,Nvidia GeForce GTX 1050,No OS,5.292,922.26,0,1,1920,1080,Intel Processor,2.5,1000.0,128.0,0.0,0.0
1234,Apple,MacBook Air,Ultrabook,11.6,1366x768,4,Intel HD Graphics 6000,Mac OS X,2.3814,1093.26,0,0,1366,768,Intel Processor,1.6,0.0,0.0,0.0,256.0
801,Asus,Pro P2540UA-XS51,Notebook,15.6,1920x1080,8,Intel HD Graphics 620,Windows 10,5.22585,1252.86,0,0,1920,1080,Intel Processor,2.5,0.0,256.0,0.0,0.0
986,HP,Probook 450,Notebook,15.6,1920x1080,8,Intel HD Graphics 620,Windows 10,4.4982,918.84,0,0,1920,1080,Intel Processor,2.5,1000.0,0.0,0.0,0.0
306,HP,EliteBook x360,2 in 1 Convertible,13.3,1920x1080,16,Intel HD Graphics 620,Windows 10,2.8224,2251.5,1,0,1920,1080,Intel Processor,2.8,0.0,256.0,0.0,0.0
465,Asus,X541NA-PD1003Y (N4200/4GB/500GB/W10),Notebook,15.6,1366x768,4,Intel HD Graphics 500,Windows 10,4.41,346.56,0,0,1366,768,Intel Processor,1.1,500.0,0.0,0.0,0.0
413,Acer,Aspire R7,2 in 1 Convertible,13.3,1920x1080,8,Intel HD Graphics 520,Windows 10,3.528,899.47,1,1,1920,1080,Intel Processor,2.5,0.0,256.0,0.0,0.0
281,Dell,Inspiron 3567,Notebook,15.6,1920x1080,4,AMD Radeon R5 M430,Linux,4.96125,487.92,0,0,1920,1080,Intel Processor,2.0,1000.0,0.0,0.0,0.0
1250,Dell,Inspiron 3552,Notebook,15.6,1366x768,4,Intel HD Graphics,Linux,4.851,369.36,0,0,1366,768,Intel Processor,1.6,500.0,0.0,0.0,0.0
1031,Acer,Aspire 3,Notebook,15.6,1366x768,6,AMD Radeon R5,Windows 10,4.6305,466.26,0,0,1366,768,AMD Processor,3.0,1000.0,0.0,0.0,0.0


In [38]:
laptop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 20 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Company              1303 non-null   object 
 1   Product              1303 non-null   object 
 2   TypeName             1303 non-null   object 
 3   Inches               1303 non-null   float64
 4   ScreenResolution     1303 non-null   object 
 5   Ram                  1303 non-null   int32  
 6   Gpu                  1303 non-null   object 
 7   OpSys                1303 non-null   object 
 8   Weight_lb            1303 non-null   float32
 9   $Price               1303 non-null   float64
 10  TouchScreen          1303 non-null   int64  
 11  IPS                  1303 non-null   int64  
 12  X_res                1303 non-null   int32  
 13  Y_res                1303 non-null   int32  
 14  Cpu processor        1303 non-null   object 
 15  Processor_speed_GHz  1303 non-null   o

# Fetching the information from 'Gpu' column

In [39]:
laptop_data['Gpu'].value_counts()

Intel HD Graphics 620      281
Intel HD Graphics 520      185
Intel UHD Graphics 620      68
Nvidia GeForce GTX 1050     66
Nvidia GeForce GTX 1060     48
                          ... 
AMD Radeon R5 520            1
AMD Radeon R7                1
Intel HD Graphics 540        1
AMD Radeon 540               1
ARM Mali T860 MP4            1
Name: Gpu, Length: 110, dtype: int64

In [40]:
def gpu(text):
    if text[0] == 'AMD':
        return 'Good'
    elif text[0] == 'MSI':
        return 'Good'
    elif text[0] == 'Nvidia':
        return 'Good'
    else:
        return 'Average'

In [41]:
laptop_data['Gpu'] = laptop_data['Gpu'].apply(gpu)
laptop_data.sample(10)

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Ram,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS,X_res,Y_res,Cpu processor,Processor_speed_GHz,HDD,SSD,Hybrid,Flash_storage
545,HP,ProBook 450,Notebook,15.6,1920x1080,4,Average,Windows 10,4.6305,804.27,0,0,1920,1080,Intel Processor,2.4,0.0,128.0,0.0,0.0
1173,Lenovo,V110-15ISK (i5-6200U/4GB/500GB/No,Notebook,15.6,1366x768,4,Average,No OS,4.6305,453.72,0,0,1366,768,Intel Processor,2.3,500.0,0.0,0.0,0.0
1221,MSI,GP62MVR 6RF,Gaming,15.6,1920x1080,8,Average,Windows 10,4.851,1682.77,0,0,1920,1080,Intel Processor,2.6,1000.0,128.0,0.0,0.0
47,Asus,Rog Strix,Gaming,17.3,1920x1080,8,Average,Windows 10,7.056,1480.86,0,0,1920,1080,AMD Processor,3.0,1000.0,256.0,0.0,0.0
1134,Acer,Aspire E5-576G,Notebook,15.6,1920x1080,4,Average,Windows 10,4.91715,702.24,0,0,1920,1080,Intel Processor,2.5,1000.0,0.0,0.0,0.0
436,HP,15-ra044nv (N3060/4GB/500GB/W10),Notebook,15.6,1366x768,4,Average,Windows 10,4.6305,395.58,0,0,1366,768,Intel Processor,1.6,500.0,0.0,0.0,0.0
922,HP,Envy 13-AB077cl,Ultrabook,13.3,3200x1800,8,Average,Windows 10,3.06495,1309.86,1,0,3200,1800,Intel Processor,2.7,0.0,256.0,0.0,0.0
748,Dell,Latitude 5480,Notebook,14.0,1366x768,8,Average,Windows 10,3.528,1284.45,0,0,1366,768,Intel Processor,2.5,0.0,256.0,0.0,0.0
754,Asus,Pro P2540UA-XO0192R,Notebook,15.6,1366x768,4,Average,Windows 10,5.22585,968.89,0,0,1366,768,Intel Processor,2.7,0.0,256.0,0.0,0.0
559,Dell,Vostro 3568,Notebook,15.6,1366x768,4,Average,Windows 10,4.8069,704.41,0,0,1366,768,Intel Processor,2.0,1000.0,0.0,0.0,0.0


When you’re looking for a Graphics Card,  The best graphics cards are MSI, AMD, and Nvidia. So I considered these cards as good and others as average.

# Fetching the information from 'OpSys' column

In [42]:
laptop_data['OpSys'].value_counts()

Windows 10      1072
No OS             66
Linux             62
Windows 7         45
Chrome OS         27
macOS             13
Mac OS X           8
Windows 10 S       8
Android            2
Name: OpSys, dtype: int64

In [43]:
def operating_system(text):
    if text.split()[0] == 'Windows':
        return 'Windows'
    elif text.split()[0] == 'Linux':
        return 'Linux'
    elif text.split()[0] == 'Mac' and 'macOs':
        return 'Mac'
    elif text.split()[0] == 'Android':
        return 'Android'
    elif text.split()[0] == 'Chrome':
        return 'Chrome'
    else:
        return 'No OS'

In [44]:
laptop_data['OpSys'] = laptop_data['OpSys'].apply(operating_system)
laptop_data

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Ram,Gpu,OpSys,Weight_lb,$Price,TouchScreen,IPS,X_res,Y_res,Cpu processor,Processor_speed_GHz,HDD,SSD,Hybrid,Flash_storage
0,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,8,Average,No OS,3.02085,1527.25,0,1,2560,1600,Intel Processor,2.3,0.0,128.0,0.0,0.0
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,8,Average,No OS,2.95470,1024.79,0,0,1440,900,Intel Processor,1.8,0.0,0.0,0.0,128.0
2,HP,250 G6,Notebook,15.6,1920x1080,8,Average,No OS,4.10130,655.50,0,0,1920,1080,Intel Processor,2.5,0.0,256.0,0.0,0.0
3,Apple,MacBook Pro,Ultrabook,15.4,2880x1800,16,Average,No OS,4.03515,2892.69,0,1,2880,1800,Intel Processor,2.7,0.0,512.0,0.0,0.0
4,Apple,MacBook Pro,Ultrabook,13.3,2560x1600,8,Average,No OS,3.02085,2056.10,0,1,2560,1600,Intel Processor,3.1,0.0,256.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,1920x1080,4,Average,Windows,3.96900,727.32,1,1,1920,1080,Intel Processor,2.5,0.0,128.0,0.0,0.0
1299,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,3200x1800,16,Average,Windows,2.86650,1708.86,1,1,3200,1800,Intel Processor,2.5,0.0,512.0,0.0,0.0
1300,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,2,Average,Windows,3.30750,261.06,0,0,1366,768,Intel Processor,1.6,0.0,0.0,0.0,64.0
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,6,Average,Windows,4.82895,870.96,0,0,1366,768,Intel Processor,2.5,1000.0,0.0,0.0,0.0


and lastly, I assigned idividual operating system

In [None]:
#done