In [1]:
import pandas as pd
import numpy as np

In [2]:
df_laptops = pd.read_csv('laptop_price.csv')

In [3]:
df_laptops.head(3)

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0


## Creating a Conditional Column from 2 Choices: np.where()

In [5]:
# create an array based on price tiers
price_tier = np.where(df_laptops['Price_euros'] > 2000, 'Expensive', 'Inexpensive')

In [6]:
df_laptops['Price_tier'] = price_tier

In [7]:
df_laptops.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_tier
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Inexpensive
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Inexpensive
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0,Inexpensive
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Expensive
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Inexpensive


In [8]:
# count values in price_tier column
df_laptops['Price_tier'].value_counts()

Price_tier
Inexpensive    1119
Expensive       137
Name: count, dtype: int64

### Add Screen Size either Big > 15 or Small < 15

In [10]:
# create an array based on screen size
screen_size = np.where(df_laptops['Inches'] > 15, 'Big', 'Small')

In [11]:
df_laptops['Screen Size'] = screen_size

In [12]:
df_laptops.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_tier,Screen Size
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Inexpensive,Small
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Inexpensive,Small
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0,Inexpensive,Big
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Expensive,Big
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Inexpensive,Small


In [13]:
df_laptops['Screen Size'].value_counts()

Screen Size
Big      804
Small    452
Name: count, dtype: int64

## Creating a Conditional Column from More Than 2 Choices: np.select()

In [15]:
# create an array based on multiple price tiers (+2 choices)
choices = ['Too Expensive', 'Expensive', 'Affordable', 'Cheap']

# create "conditions" and "values" variables
conditions = [
    (df_laptops['Price_euros'] > 3000),
    (df_laptops['Price_euros'] > 2000) & (df_laptops['Price_euros'] <= 3000),
    (df_laptops['Price_euros'] > 800) & (df_laptops['Price_euros'] <= 2000),
    (df_laptops['Price_euros'] <= 800), 
]

In [16]:
# set it to a new column
df_laptops['Price_tier'] = np.select(conditions, choices)

In [17]:
df_laptops

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_tier,Screen Size
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Affordable,Small
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Affordable,Small
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.00,Cheap,Big
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Expensive,Big
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.60,Affordable,Small
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1251,1269,HP,255 G6,Notebook,15.6,1366x768,AMD A6-Series 9220 2.5GHz,4GB,500GB HDD,AMD Radeon R4 Graphics,Windows 10,1.86kg,399.00,Cheap,Big
1252,1270,Lenovo,IdeaPad 310-15ISK,Notebook,15.6,Full HD 1920x1080,Intel Core i3 6100U 2.3GHz,6GB,128GB SSD,Nvidia GeForce 920MX,Windows 10,2.4kg,569.00,Cheap,Big
1253,1271,Lenovo,ThinkPad L460,Notebook,14.0,IPS Panel Full HD 1920x1080,Intel Core i5 6200U 2.3GHz,8GB,256GB SSD,Intel HD Graphics 520,Windows 10,1.9kg,1072.00,Affordable,Small
1254,1272,Dell,Inspiron 3552,Notebook,15.6,1366x768,Intel Pentium Quad Core N3700 1.6GHz,4GB,500GB HDD,Intel HD Graphics,Windows 10,2.2kg,443.99,Cheap,Big


In [18]:
# count values inside "price_tier" column
df_laptops['Price_tier'].value_counts()

Price_tier
Affordable       642
Cheap            477
Expensive        118
Too Expensive     19
Name: count, dtype: int64

### Screen Size Column with the following conditions 
- Too Big > 16
-  Big > 14
-  Small > 12
-  Too Small < 12

In [20]:
# create an array of choices
choices = ['Too Big', 'Big', 'Small', 'Too Small']

# create an array of conditions
conditions = [
    (df_laptops['Inches'] > 16),
    (df_laptops['Inches'] > 14) & (df_laptops['Inches'] <= 16),
    (df_laptops['Inches'] > 12) & (df_laptops['Inches'] <= 14),
    (df_laptops['Inches'] <= 12),
]

In [21]:
# match the choices with conditions with np.select() in Screen Size Column
df_laptops['Screen Size'] = np.select(conditions, choices)

In [22]:
df_laptops

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_tier,Screen Size
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Affordable,Small
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Affordable,Small
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.00,Cheap,Big
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Expensive,Big
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.60,Affordable,Small
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1251,1269,HP,255 G6,Notebook,15.6,1366x768,AMD A6-Series 9220 2.5GHz,4GB,500GB HDD,AMD Radeon R4 Graphics,Windows 10,1.86kg,399.00,Cheap,Big
1252,1270,Lenovo,IdeaPad 310-15ISK,Notebook,15.6,Full HD 1920x1080,Intel Core i3 6100U 2.3GHz,6GB,128GB SSD,Nvidia GeForce 920MX,Windows 10,2.4kg,569.00,Cheap,Big
1253,1271,Lenovo,ThinkPad L460,Notebook,14.0,IPS Panel Full HD 1920x1080,Intel Core i5 6200U 2.3GHz,8GB,256GB SSD,Intel HD Graphics 520,Windows 10,1.9kg,1072.00,Affordable,Small
1254,1272,Dell,Inspiron 3552,Notebook,15.6,1366x768,Intel Pentium Quad Core N3700 1.6GHz,4GB,500GB HDD,Intel HD Graphics,Windows 10,2.2kg,443.99,Cheap,Big


In [42]:
df_laptops['Screen Size'].value_counts()

Screen Size
Big          644
Small        406
Too Big      165
Too Small     41
Name: count, dtype: int64