In [1]:
import pandas as pd
import numpy as np

In [2]:
laptops = pd.read_csv('laptops.csv', encoding = 'Latin-1')

In [3]:
laptops.head()

Unnamed: 0,Manufacturer,Model Name,Category,Screen Size,Screen,CPU,RAM,Storage,GPU,Operating System,Operating System Version,Weight,Price (Euros)
0,Apple,MacBook Pro,Ultrabook,"13.3""",IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,,1.37kg,133969
1,Apple,Macbook Air,Ultrabook,"13.3""",1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,,1.34kg,89894
2,HP,250 G6,Notebook,"15.6""",Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,,1.86kg,57500
3,Apple,MacBook Pro,Ultrabook,"15.4""",IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,,1.83kg,253745
4,Apple,MacBook Pro,Ultrabook,"13.3""",IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,,1.37kg,180360


In [4]:
def clean_col(col):
    col = col.strip()
    col = col.replace("Operating System","os")
    col = col.replace(" ","_")
    col = col.replace("(","")
    col = col.replace(")","")
    col = col.lower()
    return col

new_columns = []

for c in laptops.columns:
    c = clean_col(c)
    new_columns.append(c)

laptops.columns = new_columns

In [5]:
laptops.head()

Unnamed: 0,manufacturer,model_name,category,screen_size,screen,cpu,ram,storage,gpu,os,os_version,weight,price_euros
0,Apple,MacBook Pro,Ultrabook,"13.3""",IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,,1.37kg,133969
1,Apple,Macbook Air,Ultrabook,"13.3""",1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,,1.34kg,89894
2,HP,250 G6,Notebook,"15.6""",Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,,1.86kg,57500
3,Apple,MacBook Pro,Ultrabook,"15.4""",IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,,1.83kg,253745
4,Apple,MacBook Pro,Ultrabook,"13.3""",IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,,1.37kg,180360


In [6]:
unique_ram = laptops['ram'].unique()

In [7]:
unique_ram

array(['8GB', '16GB', '4GB', '2GB', '12GB', '6GB', '32GB', '24GB', '64GB'],
      dtype=object)

In [8]:
laptops.dtypes

manufacturer    object
model_name      object
category        object
screen_size     object
screen          object
cpu             object
ram             object
storage         object
gpu             object
os              object
os_version      object
weight          object
price_euros     object
dtype: object

In [9]:
laptops["ram"] = laptops["ram"].str.replace('GB','').astype(int)
laptops.rename({"ram": "ram_gb"}, inplace = True, axis = 1)

In [10]:
laptops.dtypes

manufacturer    object
model_name      object
category        object
screen_size     object
screen          object
cpu             object
ram_gb           int64
storage         object
gpu             object
os              object
os_version      object
weight          object
price_euros     object
dtype: object

In [11]:
laptops["gpu_manufacturer"] = (
                                laptops["gpu"]
                                .str.split()
                                .str[0]
                                )
laptops["cpu_manufacturer"] = (
                                laptops["cpu"]
                                .str.split()
                                .str[0]
                                )

gpu_manufacturer_counts = laptops["gpu_manufacturer"].value_counts()
cpu_manufacturer_counts = laptops["cpu_manufacturer"].value_counts()

print(gpu_manufacturer_counts)
print(cpu_manufacturer_counts)

Intel     722
Nvidia    400
AMD       180
ARM         1
Name: gpu_manufacturer, dtype: int64
Intel      1240
AMD          62
Samsung       1
Name: cpu_manufacturer, dtype: int64


In [12]:
laptops["os"].value_counts()

Windows      1125
No OS          66
Linux          62
Chrome OS      27
macOS          13
Mac OS          8
Android         2
Name: os, dtype: int64

In [13]:
os_dict = {
    'Android': 'Android',
    'Chrome OS': 'Chrome OS',
    'Linux': 'Linux',
    'Mac OS': 'macOS',
    'No OS': 'No OS',
    'Windows': 'Windows',
    'macOS': 'macOS'
}

laptops["os"] = laptops["os"].map(os_dict)

In [14]:
laptops["os"].value_counts()

Windows      1125
No OS          66
Linux          62
Chrome OS      27
macOS          21
Android         2
Name: os, dtype: int64

In [32]:
nulls_by_col_bef = {}

for c in laptops.columns:
    if True in laptops[c].isnull().value_counts():
        nulls_by_col_bef[c] = laptops[c].isnull().sum()
    else:
        nulls_by_col_bef[c] = 0

nulls_by_col_bef

{'manufacturer': 0,
 'model_name': 0,
 'category': 0,
 'screen_size': 0,
 'screen': 0,
 'cpu': 0,
 'ram_gb': 0,
 'storage': 0,
 'gpu': 0,
 'os': 0,
 'os_version': 170,
 'weight': 0,
 'price_euros': 0,
 'gpu_manufacturer': 0,
 'cpu_manufacturer': 0}

In [36]:
laptops.loc[laptops["os_version"].isnull(), "os"].value_counts()

No OS        66
Linux        62
Chrome OS    27
macOS        13
Android       2
Name: os, dtype: int64

In [38]:
laptops.loc[laptops["os"] == "macOS", "os_version"] = "X"
laptops.loc[laptops["os"] == "No OS", "os_version"] = "Version Unknown"
laptops.loc[laptops["os_version"].isnull(), "os"].value_counts()

Linux        62
Chrome OS    27
Android       2
Name: os, dtype: int64

In [41]:
nulls_by_col_aft = {}

for c in laptops.columns:
    if True in laptops[c].isnull().value_counts():
        nulls_by_col_aft[c] = laptops[c].isnull().sum()
    else:
        nulls_by_col_aft[c] = 0

nulls_by_col_aft

{'manufacturer': 0,
 'model_name': 0,
 'category': 0,
 'screen_size': 0,
 'screen': 0,
 'cpu': 0,
 'ram_gb': 0,
 'storage': 0,
 'gpu': 0,
 'os': 0,
 'os_version': 91,
 'weight': 0,
 'price_euros': 0,
 'gpu_manufacturer': 0,
 'cpu_manufacturer': 0}

In [42]:
laptops["weight"] = laptops["weight"].str.replace("kgs", "")
laptops["weight"] = laptops["weight"].str.replace("kg", "")
laptops["weight"] = laptops["weight"].astype(float)
laptops.rename({"weight": "weight_kg"}, inplace = True, axis = 1)

In [44]:
laptops["price_euros"] = laptops["price_euros"].str.replace(",", ".")
laptops["price_euros"] = laptops["price_euros"].astype(float)

In [51]:
laptops["screen_res"] = (
                        laptops["screen"]
                        .str.split()
                        .str[-1]
                        )

screen_res_counts = laptops["screen_res"].value_counts()

print(screen_res_counts)

1920x1080    841
1366x768     308
3840x2160     43
3200x1800     27
1600x900      23
2560x1440     23
2256x1504      6
2560x1600      6
2304x1440      6
1920x1200      5
1440x900       4
2880x1800      4
2400x1600      4
2160x1440      2
2736x1824      1
Name: screen_res, dtype: int64


In [55]:
laptops["cpu_speed"] = (
                        laptops["cpu"]
                        .str.split()
                        .str[-1]
                        )

cpu_speed_counts = laptops["cpu_speed"].value_counts()

print(cpu_speed_counts)

2.5GHz     290
2.8GHz     165
2.7GHz     165
1.6GHz     133
2.3GHz      86
1.8GHz      78
2.6GHz      76
2GHz        67
1.1GHz      53
2.4GHz      52
2.9GHz      21
2.0GHz      19
3GHz        19
1.2GHz      15
1.44GHz     12
2.2GHz      11
1.5GHz      10
1.3GHz       6
3.6GHz       5
0.9GHz       4
2.50GHz      3
3.1GHz       3
2.1GHz       3
1.9GHz       2
1.92GHz      1
1.60GHz      1
1.0GHz       1
2.70GHz      1
3.2GHz       1
Name: cpu_speed, dtype: int64


In [73]:
laptops["screen_size"] = laptops["screen_size"].str.replace('"','').astype(float)
laptops.rename({"screen_size": "screen_size_in"}, inplace = True, axis = 1)

In [127]:
laptops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 17 columns):
manufacturer        1303 non-null object
model_name          1303 non-null object
category            1303 non-null object
screen_size_in      1303 non-null float64
screen              1303 non-null object
cpu                 1303 non-null object
ram_gb              1303 non-null int64
storage             1303 non-null object
gpu                 1303 non-null object
os                  1303 non-null object
os_version          1212 non-null object
weight_kg           1303 non-null float64
price_euros         1303 non-null float64
gpu_manufacturer    1303 non-null object
cpu_manufacturer    1303 non-null object
screen_res          1303 non-null object
cpu_speed           1303 non-null object
dtypes: float64(3), int64(1), object(13)
memory usage: 173.2+ KB


In [75]:
laptops.to_csv('laptops_cleaned.csv', index = False)

# Hi-level Questions on Dataset

## Are laptops made by Apple more expensive than those made by other manufacturers?

In [76]:
laptops.head()

Unnamed: 0,manufacturer,model_name,category,screen_size_in,screen,cpu,ram_gb,storage,gpu,os,os_version,weight_kg,price_euros,gpu_manufacturer,cpu_manufacturer,screen_res,cpu_speed
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,X,1.37,1339.69,Intel,Intel,2560x1600,2.3GHz
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,X,1.34,898.94,Intel,Intel,1440x900,1.8GHz
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,Version Unknown,1.86,575.0,Intel,Intel,1920x1080,2.5GHz
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,X,1.83,2537.45,AMD,Intel,2880x1800,2.7GHz
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,X,1.37,1803.6,Intel,Intel,2560x1600,3.1GHz


In [60]:
apple_prices = laptops.loc[laptops["manufacturer"] == "Apple", "price_euros"]

In [61]:
apple_prices.mean()

1564.1985714285713

In [62]:
apple_prices.median()

1339.69

In [63]:
non_apple_prices = laptops.loc[laptops["manufacturer"] != "Apple", "price_euros"]

In [64]:
non_apple_prices.mean()

1116.4711232449301

In [65]:
non_apple_prices.median()

959.5

## What is the best value laptop with a screen size of 15" or more?

In [116]:
greater_15 = laptops.loc[laptops["screen_size_in"] > 15]

In [117]:
best_value = greater_15.sort_values("price_euros", ascending = True).head(1)

In [118]:
best_value

Unnamed: 0,manufacturer,model_name,category,screen_size_in,screen,cpu,ram_gb,storage,gpu,os,os_version,weight_kg,price_euros,gpu_manufacturer,cpu_manufacturer,screen_res,cpu_speed
290,Acer,Chromebook C910-C2ST,Notebook,15.6,1366x768,Intel Celeron Dual Core 3205U 1.5GHz,2,16GB SSD,Intel HD Graphics,Chrome OS,,2.19,199.0,Intel,Intel,1366x768,1.5GHz


## Which laptop has the most storage space?

In [131]:
laptops["storage"].value_counts()

256GB SSD                        412
1TB HDD                          224
500GB HDD                        132
512GB SSD                        118
128GB SSD +  1TB HDD              94
128GB SSD                         76
256GB SSD +  1TB HDD              73
32GB Flash Storage                38
2TB HDD                           16
64GB Flash Storage                15
512GB SSD +  1TB HDD              14
1TB SSD                           14
256GB SSD +  2TB HDD              10
1TB Hybrid                         9
256GB Flash Storage                8
16GB Flash Storage                 7
32GB SSD                           6
1GB SSD                            5
128GB Flash Storage                4
16GB SSD                           3
512GB SSD +  2TB HDD               3
1TB SSD +  1TB HDD                 2
128GB SSD +  2TB HDD               2
256GB SSD +  500GB HDD             2
512GB Flash Storage                2
256GB SSD +  256GB SSD             2
256GB SSD +  1TB Hybrid            1
1

In [134]:
largest_stor = "512GB SSD +  2TB HDD"

In [135]:
laptops.loc[laptops["storage"] == largest_stor]

Unnamed: 0,manufacturer,model_name,category,screen_size_in,screen,cpu,ram_gb,storage,gpu,os,os_version,weight_kg,price_euros,gpu_manufacturer,cpu_manufacturer,screen_res,cpu_speed
370,Asus,ZenBook Flip,2 in 1 Convertible,15.6,Full HD / Touchscreen 1920x1080,Intel Core i7 7500U 2.7GHz,12,512GB SSD + 2TB HDD,Nvidia GeForce GT 940MX,Windows,10,2.26,1099.0,Nvidia,Intel,1920x1080,2.7GHz
894,MSI,GS73VR Stealth,Gaming,17.3,IPS Panel 4K Ultra HD 3840x2160,Intel Core i7 6700HQ 2.6GHz,16,512GB SSD + 2TB HDD,Nvidia GeForce GTX 1060,Windows,10,2.43,2649.0,Nvidia,Intel,3840x2160,2.6GHz
977,Asus,Q534UX-BHI7T19 (i7-7500U/16GB/2TB,2 in 1 Convertible,15.6,4K Ultra HD / Touchscreen 3840x2160,Intel Core i7 7500U 2.7GHz,16,512GB SSD + 2TB HDD,Nvidia GeForce GTX 950M,Windows,10,2.3,1799.0,Nvidia,Intel,3840x2160,2.7GHz
