In [1]:
import pandas as pd

In [2]:
df_laptops = pd.read_csv('laptop_price.csv', encoding='latin1')

In [3]:
df_laptops.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


## 1. Creating a Conditional Column from More Than 2 Choices: np.select()

In [9]:
import numpy as np

In [12]:
# create an array based on multiple price tiers (+2 choices)

# create "conditions" and "values" variables
conditions = [
    df_laptops['Price_euros']>3000,
    (df_laptops['Price_euros']>2000) & (df_laptops['Price_euros']<=3000),
    (df_laptops['Price_euros']>800) & (df_laptops['Price_euros']<=2000),
    df_laptops['Price_euros']<=800
]

values = ["Too Expensive", "Expensive", "Affordable", "Cheap"]

In [18]:
# set it to a new column 
df_laptops['Price_tiers'] = np.select(conditions, values, default="Unknown")

In [19]:
# show dataframe 
df_laptops.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_tiers
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Affordable
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Affordable
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0,Cheap
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Expensive
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Affordable


In [24]:
# count values inside "price_tier" column
df_laptops.value_counts("Price_tiers")

Price_tiers
Affordable       655
Cheap            511
Expensive        118
Too Expensive     19
Name: count, dtype: int64

## 2. Exercise

In [25]:
# create an array based on multiple screen size tiers (+2 choices)
# Too Big>16, Big>14, Small>12, Too Small<12

# create "conditions" and "values" variables
conditions = [
    df_laptops['Inches']>16,
    (df_laptops['Inches']>14) & (df_laptops['Inches']<=16),
    (df_laptops['Inches']>12) & (df_laptops['Inches']<=14),
    df_laptops['Inches']<=12
]

values = ['Too Big', 'Big', 'Small', 'Too Small']

In [26]:
# set it to a new column 
df_laptops['screen_size'] = np.select(conditions, values, default='Unknown')

In [28]:
# show dataframe
df_laptops.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_tiers,screen_size
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Affordable,Small
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Affordable,Small
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0,Cheap,Big
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Expensive,Big
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Affordable,Small


In [29]:
# count values inside "screen_size" column 
df_laptops.value_counts('screen_size')

screen_size
Big          674
Small        419
Too Big      166
Too Small     44
Name: count, dtype: int64