# Laptop_Price_Analysis
[Dataset](https://www.kaggle.com/datasets/aemyjutt/laptop-price-analysis)

In [193]:
import pickle
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [194]:
df = pd.read_csv('laptops_train.csv')

In [195]:
df.head()

Unnamed: 0,Manufacturer,Model Name,Category,Screen Size,Screen,CPU,RAM,Storage,GPU,Operating System,Operating System Version,Weight,Price
0,Apple,MacBook Pro,Ultrabook,"13.3""",IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,,1.37kg,11912523.48
1,Apple,Macbook Air,Ultrabook,"13.3""",1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,,1.34kg,7993374.48
2,HP,250 G6,Notebook,"15.6""",Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,,1.86kg,5112900.0
3,Apple,MacBook Pro,Ultrabook,"15.4""",IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,,1.83kg,22563005.4
4,Apple,MacBook Pro,Ultrabook,"13.3""",IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,,1.37kg,16037611.2


In [196]:
df.Manufacturer.unique()

array(['Apple', 'HP', 'Acer', 'Asus', 'Dell', 'Lenovo', 'Chuwi', 'MSI',
       'Microsoft', 'Toshiba', 'Huawei', 'Xiaomi', 'Vero', 'Razer',
       'Mediacom', 'Samsung', 'Google', 'Fujitsu', 'LG'], dtype=object)

In [197]:
df['Screen Size'] = df['Screen Size'].apply(lambda x: x.replace('"', '')).astype(float)

In [198]:
df.Category.unique()

array(['Ultrabook', 'Notebook', 'Netbook', 'Gaming', '2 in 1 Convertible',
       'Workstation'], dtype=object)

In [199]:
df.Screen.unique()

array(['IPS Panel Retina Display 2560x1600', '1440x900',
       'Full HD 1920x1080', 'IPS Panel Retina Display 2880x1800',
       '1366x768', 'IPS Panel Full HD 1920x1080',
       'IPS Panel Retina Display 2304x1440',
       'IPS Panel Full HD / Touchscreen 1920x1080',
       'Full HD / Touchscreen 1920x1080',
       'Touchscreen / Quad HD+ 3200x1800',
       'IPS Panel Touchscreen 1920x1200', 'Touchscreen 2256x1504',
       'Quad HD+ / Touchscreen 3200x1800', 'IPS Panel 1366x768',
       'IPS Panel 4K Ultra HD / Touchscreen 3840x2160',
       'IPS Panel Full HD 2160x1440',
       '4K Ultra HD / Touchscreen 3840x2160', 'Touchscreen 2560x1440',
       '1600x900', 'IPS Panel 4K Ultra HD 3840x2160',
       '4K Ultra HD 3840x2160', 'Touchscreen 1366x768',
       'IPS Panel Full HD 1366x768', 'IPS Panel 2560x1440',
       'IPS Panel Full HD 2560x1440',
       'IPS Panel Retina Display 2736x1824', 'Touchscreen 2400x1600',
       '2560x1440', 'IPS Panel Quad HD+ 2560x1440',
       'IPS Panel 

In [200]:
df['Screen_resolution'] = df.Screen.str.findall('\d+x\d+').apply(lambda x: x[0])

In [201]:
for i in ['IPS Panel', 'Retina Display', 'Touchscreen', 'Quad HD+']:
    df[i] = df.Screen.apply(lambda x: i in x).astype(int)

In [202]:
df.CPU.unique()

array(['Intel Core i5 2.3GHz', 'Intel Core i5 1.8GHz',
       'Intel Core i5 7200U 2.5GHz', 'Intel Core i7 2.7GHz',
       'Intel Core i5 3.1GHz', 'AMD A9-Series 9420 3GHz',
       'Intel Core i7 2.2GHz', 'Intel Core i7 8550U 1.8GHz',
       'Intel Core i5 8250U 1.6GHz', 'Intel Core i3 6006U 2GHz',
       'Intel Core i7 2.8GHz', 'Intel Core M m3 1.2GHz',
       'Intel Core i7 7500U 2.7GHz', 'Intel Core i7 2.9GHz',
       'Intel Core i3 7100U 2.4GHz', 'Intel Atom x5-Z8350 1.44GHz',
       'Intel Core i5 7300HQ 2.5GHz', 'AMD E-Series E2-9000e 1.5GHz',
       'Intel Core i5 1.6GHz', 'Intel Core i7 8650U 1.9GHz',
       'Intel Atom x5-Z8300 1.44GHz', 'AMD E-Series E2-6110 1.5GHz',
       'AMD A6-Series 9220 2.5GHz',
       'Intel Celeron Dual Core N3350 1.1GHz',
       'Intel Core i3 7130U 2.7GHz', 'Intel Core i7 7700HQ 2.8GHz',
       'Intel Core i5 2.0GHz', 'AMD Ryzen 1700 3GHz',
       'Intel Pentium Quad Core N4200 1.1GHz',
       'Intel Atom x5-Z8550 1.44GHz',
       'Intel Celeron Du

In [203]:
df['GHz'] = df.CPU.str.findall('([\.\d]+)GHz').apply(lambda x: x[0]).astype(float)

In [204]:
df['GHz'].unique()

array([2.3 , 1.8 , 2.5 , 2.7 , 3.1 , 3.  , 2.2 , 1.6 , 2.  , 2.8 , 1.2 ,
       2.9 , 2.4 , 1.44, 1.5 , 1.9 , 1.1 , 1.3 , 2.6 , 3.6 , 3.2 , 1.  ,
       2.1 , 0.9 , 1.92])

In [205]:
for i in ['Intel', 'AMD']:
    df[i] = df.CPU.str.contains(i).astype(int)

In [206]:
df.head(3)

Unnamed: 0,Manufacturer,Model Name,Category,Screen Size,Screen,CPU,RAM,Storage,GPU,Operating System,...,Weight,Price,Screen_resolution,IPS Panel,Retina Display,Touchscreen,Quad HD+,GHz,Intel,AMD
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,...,1.37kg,11912523.48,2560x1600,1,1,0,0,2.3,1,0
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,...,1.34kg,7993374.48,1440x900,0,0,0,0,1.8,1,0
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,...,1.86kg,5112900.0,1920x1080,0,0,0,0,2.5,1,0


In [207]:
df['RAM'] = df.RAM.apply(lambda x: x.replace('GB', '')).astype(int)

In [208]:
df.Storage.unique()

array(['128GB SSD', '128GB Flash Storage', '256GB SSD', '512GB SSD',
       '500GB HDD', '256GB Flash Storage', '1TB HDD',
       '32GB Flash Storage', '128GB SSD +  1TB HDD',
       '256GB SSD +  256GB SSD', '64GB Flash Storage',
       '256GB SSD +  1TB HDD', '256GB SSD +  2TB HDD', '32GB SSD',
       '2TB HDD', '64GB SSD', '1TB Hybrid', '512GB SSD +  1TB HDD',
       '1TB SSD', '256GB SSD +  500GB HDD', '128GB SSD +  2TB HDD',
       '512GB SSD +  512GB SSD', '16GB SSD', '16GB Flash Storage',
       '512GB SSD +  256GB SSD', '512GB SSD +  2TB HDD',
       '64GB Flash Storage +  1TB HDD', '1GB SSD', '1TB HDD +  1TB HDD',
       '32GB HDD', '1TB SSD +  1TB HDD', '512GB Flash Storage',
       '128GB HDD', '240GB SSD', '8GB SSD', '508GB Hybrid'], dtype=object)

In [209]:
def func(x):
    if len(x) == 1:
        x = x + ['NaN']
        return x
    return x

df[['Storage_Size', 'Storage_Size2']] = df.Storage.str.findall('\d+[TG]B').apply(func).to_list()

In [210]:
df.head(3)

Unnamed: 0,Manufacturer,Model Name,Category,Screen Size,Screen,CPU,RAM,Storage,GPU,Operating System,...,Screen_resolution,IPS Panel,Retina Display,Touchscreen,Quad HD+,GHz,Intel,AMD,Storage_Size,Storage_Size2
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,...,2560x1600,1,1,0,0,2.3,1,0,128GB,
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,...,1440x900,0,0,0,0,1.8,1,0,128GB,
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,...,1920x1080,0,0,0,0,2.5,1,0,256GB,


In [211]:
df[['Storage_Type', 'Storage_Type2']] = df.Storage.str.findall('\d+[GT]B ([\w\s]+)').apply(func).to_list()

In [212]:
df['Price'] = df['Price'].astype(str).str[:6].astype(float)*0.3

In [213]:
df.columns

Index(['Manufacturer', 'Model Name', 'Category', 'Screen Size', 'Screen',
       'CPU', 'RAM', 'Storage', 'GPU', 'Operating System',
       'Operating System Version', 'Weight', 'Price', 'Screen_resolution',
       'IPS Panel', 'Retina Display', 'Touchscreen', 'Quad HD+', 'GHz',
       'Intel', 'AMD', 'Storage_Size', 'Storage_Size2', 'Storage_Type',
       'Storage_Type2'],
      dtype='object')

In [214]:
df.head(3)

Unnamed: 0,Manufacturer,Model Name,Category,Screen Size,Screen,CPU,RAM,Storage,GPU,Operating System,...,Retina Display,Touchscreen,Quad HD+,GHz,Intel,AMD,Storage_Size,Storage_Size2,Storage_Type,Storage_Type2
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,...,1,0,0,2.3,1,0,128GB,,SSD,
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,...,0,0,0,1.8,1,0,128GB,,Flash Storage,
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,...,0,0,0,2.5,1,0,256GB,,SSD,


In [215]:
df.GPU.unique()

array(['Intel Iris Plus Graphics 640', 'Intel HD Graphics 6000',
       'Intel HD Graphics 620', 'AMD Radeon Pro 455',
       'Intel Iris Plus Graphics 650', 'AMD Radeon R5',
       'Intel Iris Pro Graphics', 'Nvidia GeForce MX150',
       'Intel UHD Graphics 620', 'Intel HD Graphics 520',
       'AMD Radeon Pro 555', 'AMD Radeon R5 M430',
       'Intel HD Graphics 615', 'AMD Radeon Pro 560',
       'Nvidia GeForce 940MX', 'Intel HD Graphics 400',
       'Nvidia GeForce GTX 1050', 'AMD Radeon R2', 'AMD Radeon 530',
       'Nvidia GeForce 930MX', 'Intel HD Graphics',
       'Intel HD Graphics 500', 'Nvidia GeForce 930MX ',
       'Nvidia GeForce GTX 1060', 'Nvidia GeForce 150MX',
       'Intel Iris Graphics 540', 'AMD Radeon RX 580',
       'Nvidia GeForce 920MX', 'AMD Radeon R4 Graphics', 'AMD Radeon 520',
       'Nvidia GeForce GTX 1070', 'Nvidia GeForce GTX 1050 Ti',
       'Nvidia GeForce MX130', 'AMD R4 Graphics',
       'Nvidia GeForce GTX 940MX', 'AMD Radeon RX 560',
       'Nvid

In [216]:
df['Operating System'].unique()

array(['macOS', 'No OS', 'Windows', 'Mac OS', 'Linux', 'Android',
       'Chrome OS'], dtype=object)

In [217]:
df['Operating System Version'].unique()

array([nan, '10', 'X', '10 S', '7'], dtype=object)

In [218]:
Os = []
for i in df[['Operating System', 'Operating System Version']].values:
    version = '' if type(i[1]) == float else ' - '+ i[1]
    Os.append(f'{i[0].strip()}{version}')
df['Os'] = Os

In [219]:
df['Weight_KG'] = df.Weight.apply(lambda x: x.replace('kg', '')).astype(float)

In [220]:
df.Price = df.Price.astype(float)

In [221]:
df.columns

Index(['Manufacturer', 'Model Name', 'Category', 'Screen Size', 'Screen',
       'CPU', 'RAM', 'Storage', 'GPU', 'Operating System',
       'Operating System Version', 'Weight', 'Price', 'Screen_resolution',
       'IPS Panel', 'Retina Display', 'Touchscreen', 'Quad HD+', 'GHz',
       'Intel', 'AMD', 'Storage_Size', 'Storage_Size2', 'Storage_Type',
       'Storage_Type2', 'Os', 'Weight_KG'],
      dtype='object')

In [222]:
df.columns = ['Manufacturer', 'Model_Name', 'Category', 'Screen_Size', 'Screen',
       'CPU', 'RAM', 'Storage', 'GPU', 'Operating_System',
       'Operating_System_Version', 'Weight', 'Price', 'Screen_resolution',
       'IPS_Panel', 'Retina_Display', 'Touchscreen', 'Quad_HD_plus', 'GHz',
       'Intel', 'AMD', 'Storage_Size', 'Storage_Size2', 'Storage_Type',
       'Storage_Type2', 'os', 'Weight_KG']

In [223]:
df.head(2)

Unnamed: 0,Manufacturer,Model_Name,Category,Screen_Size,Screen,CPU,RAM,Storage,GPU,Operating_System,...,Quad_HD_plus,GHz,Intel,AMD,Storage_Size,Storage_Size2,Storage_Type,Storage_Type2,os,Weight_KG
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,...,0,2.3,1,0,128GB,,SSD,,macOS,1.37
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,...,0,1.8,1,0,128GB,,Flash Storage,,macOS,1.34


In [224]:
df1 = df[['Manufacturer', 'Category', 'Screen_Size',
        'RAM', 'GPU', 'Weight', 'Screen_resolution',
       'IPS_Panel', 'Retina_Display', 'Touchscreen', 'Quad_HD_plus', 'GHz',
       'Intel', 'AMD', 'Storage_Size', 'Storage_Size2', 'Storage_Type',
       'Storage_Type2', 'os', 'Weight_KG', 'Price']]

In [225]:
df1.to_csv('clean_dataset.csv', index=False)

In [226]:
df1.head()

Unnamed: 0,Manufacturer,Category,Screen_Size,RAM,GPU,Weight,Screen_resolution,IPS_Panel,Retina_Display,Touchscreen,...,GHz,Intel,AMD,Storage_Size,Storage_Size2,Storage_Type,Storage_Type2,os,Weight_KG,Price
0,Apple,Ultrabook,13.3,8,Intel Iris Plus Graphics 640,1.37kg,2560x1600,1,1,0,...,2.3,1,0,128GB,,SSD,,macOS,1.37,35737.5
1,Apple,Ultrabook,13.3,8,Intel HD Graphics 6000,1.34kg,1440x900,0,0,0,...,1.8,1,0,128GB,,Flash Storage,,macOS,1.34,239801.1
2,HP,Notebook,15.6,8,Intel HD Graphics 620,1.86kg,1920x1080,0,0,0,...,2.5,1,0,256GB,,SSD,,No OS,1.86,153387.0
3,Apple,Ultrabook,15.4,16,AMD Radeon Pro 455,1.83kg,2880x1800,1,1,0,...,2.7,1,0,512GB,,SSD,,macOS,1.83,67689.0
4,Apple,Ultrabook,13.3,8,Intel Iris Plus Graphics 650,1.37kg,2560x1600,1,1,0,...,3.1,1,0,256GB,,SSD,,macOS,1.37,48112.8


In [227]:
def train_preprocessing(df):
    y = df['Price']
    
    df = df[['Manufacturer', 'Category', 'Screen_Size',
             'RAM', 'GPU', 'Weight', 'Screen_resolution',
             'IPS_Panel', 'Retina_Display', 'Touchscreen', 
             'Quad_HD_plus', 'GHz', 'Intel', 'AMD', 'Storage_Size', 
             'Storage_Size2', 'Storage_Type', 'Storage_Type2', 'os', 
             'Weight_KG']]
    
    
    
    df1 = df.select_dtypes('object')
    df2 = df.select_dtypes(['int', 'float'])

    encoders = {}
    
    for i in df1.columns:
        encoder = LabelEncoder()
        df1[i] = encoder.fit_transform(df1[i])
        encoders[i] = encoder

    x = pd.concat([df1, df2], axis=1)[['Manufacturer', 'Category', 'Screen_Size',
             'RAM', 'GPU', 'Weight', 'Screen_resolution',
             'IPS_Panel', 'Retina_Display', 'Touchscreen', 
             'Quad_HD_plus', 'GHz', 'Intel', 'AMD', 'Storage_Size', 
             'Storage_Size2', 'Storage_Type', 'Storage_Type2', 'os', 
             'Weight_KG']]
    
    return x, y, encoders

In [228]:
df1.columns

Index(['Manufacturer', 'Category', 'Screen_Size', 'RAM', 'GPU', 'Weight',
       'Screen_resolution', 'IPS_Panel', 'Retina_Display', 'Touchscreen',
       'Quad_HD_plus', 'GHz', 'Intel', 'AMD', 'Storage_Size', 'Storage_Size2',
       'Storage_Type', 'Storage_Type2', 'os', 'Weight_KG', 'Price'],
      dtype='object')

In [229]:
x, y, encoders = train_preprocessing(df1)

In [230]:
x

Unnamed: 0,Manufacturer,Category,Screen_Size,RAM,GPU,Weight,Screen_resolution,IPS_Panel,Retina_Display,Touchscreen,Quad_HD_plus,GHz,Intel,AMD,Storage_Size,Storage_Size2,Storage_Type,Storage_Type2,os,Weight_KG
0,1,4,13.3,8,51,36,10,1,1,0,0,2.3,1,0,0,5,5,1,8,1.37
1,1,4,13.3,8,45,33,1,0,0,0,0,1.8,1,0,0,5,0,1,8,1.34
2,7,3,15.6,8,47,72,3,0,0,0,0,2.5,1,0,5,5,5,1,4,1.86
3,1,4,15.4,16,7,69,12,1,1,0,0,2.7,1,0,10,5,5,1,8,1.83
4,1,4,13.3,8,52,36,10,1,1,0,0,3.1,1,0,5,5,5,1,8,1.37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
972,4,1,17.3,32,72,161,3,0,0,0,0,2.6,1,0,5,0,6,0,5,4.42
973,16,3,14.0,8,41,81,3,0,0,0,0,2.3,1,0,5,5,5,1,5,1.95
974,2,1,17.3,16,71,131,3,0,0,0,0,2.8,1,0,5,0,6,0,5,2.73
975,7,3,15.6,8,62,88,3,1,0,0,0,2.7,1,0,0,0,6,0,5,2.04


In [231]:
y

0       35737.5
1      239801.1
2      153387.0
3       67689.0
4       48112.8
         ...   
972     74692.8
973     31477.5
974     54683.1
975    261157.8
976    267293.4
Name: Price, Length: 977, dtype: float64

In [232]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [233]:
model = DecisionTreeRegressor()
model.fit(x_train, y_train)
model.score(x_test, y_test)

-0.1261573034436856

In [234]:
model = RandomForestRegressor(n_estimators=200)
model.fit(x_train, y_train)
model.score(x_test, y_test)

0.32141287584783196

In [235]:
if not os.path.exists('pickle_files'):
    print('pickle_files not exist')
    os.mkdir('pickle_files')
else:
    print('pickle_files exist')

pickle_files exist


In [236]:
with open('pickle_files/encoders.pkl', 'wb') as f:
    pickle.dump(encoders, f)

with open('pickle_files/model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [237]:
with open('pickle_files/encoders.pkl', 'rb') as f:
    encoders = pickle.load(f)

with open('pickle_files/model.pkl', 'rb') as f:
    model = pickle.load(f)

In [238]:
def preprocessing(df, encoders):
    df = df[['Manufacturer', 'Category', 'Screen_Size',
             'RAM', 'GPU', 'Weight', 'Screen_resolution',
             'IPS_Panel', 'Retina_Display', 'Touchscreen', 
             'Quad_HD_plus', 'GHz', 'Intel', 'AMD', 'Storage_Size', 
             'Storage_Size2', 'Storage_Type', 'Storage_Type2', 'os', 
             'Weight_KG']]
    
    df1 = df.select_dtypes('object')
    df2 = df.select_dtypes(['int', 'float'])

    for i in df1.columns:
        df1[i] = encoders[i].transform(df1[i])

    x = pd.concat([df1, df2], axis=1)[['Manufacturer', 'Category', 'Screen_Size',
             'RAM', 'GPU', 'Weight', 'Screen_resolution',
             'IPS_Panel', 'Retina_Display', 'Touchscreen', 
             'Quad_HD_plus', 'GHz', 'Intel', 'AMD', 'Storage_Size', 
             'Storage_Size2', 'Storage_Type', 'Storage_Type2', 'os', 
             'Weight_KG']]

    return x

In [239]:
x = preprocessing(df1, encoders)

In [240]:
prediction = model.predict(x)

In [241]:
r2_score(df1['Price'], prediction)

0.7906002443022808

In [242]:
d = {}
for i in df1.columns:
    d[i] = str(df1[i].dtype)
d

{'Manufacturer': 'object',
 'Category': 'object',
 'Screen_Size': 'float64',
 'RAM': 'int64',
 'GPU': 'object',
 'Weight': 'object',
 'Screen_resolution': 'object',
 'IPS_Panel': 'int64',
 'Retina_Display': 'int64',
 'Touchscreen': 'int64',
 'Quad_HD_plus': 'int64',
 'GHz': 'float64',
 'Intel': 'int64',
 'AMD': 'int64',
 'Storage_Size': 'object',
 'Storage_Size2': 'object',
 'Storage_Type': 'object',
 'Storage_Type2': 'object',
 'os': 'object',
 'Weight_KG': 'float64',
 'Price': 'float64'}

In [243]:
uniques = {}
for i in df1.columns:
    if df1[i].dtype == 'O':
        uniques[i] = list(df1[i].unique())
uniques

{'Manufacturer': ['Apple',
  'HP',
  'Acer',
  'Asus',
  'Dell',
  'Lenovo',
  'Chuwi',
  'MSI',
  'Microsoft',
  'Toshiba',
  'Huawei',
  'Xiaomi',
  'Vero',
  'Razer',
  'Mediacom',
  'Samsung',
  'Google',
  'Fujitsu',
  'LG'],
 'Category': ['Ultrabook',
  'Notebook',
  'Netbook',
  'Gaming',
  '2 in 1 Convertible',
  'Workstation'],
 'GPU': ['Intel Iris Plus Graphics 640',
  'Intel HD Graphics 6000',
  'Intel HD Graphics 620',
  'AMD Radeon Pro 455',
  'Intel Iris Plus Graphics 650',
  'AMD Radeon R5',
  'Intel Iris Pro Graphics',
  'Nvidia GeForce MX150',
  'Intel UHD Graphics 620',
  'Intel HD Graphics 520',
  'AMD Radeon Pro 555',
  'AMD Radeon R5 M430',
  'Intel HD Graphics 615',
  'AMD Radeon Pro 560',
  'Nvidia GeForce 940MX',
  'Intel HD Graphics 400',
  'Nvidia GeForce GTX 1050',
  'AMD Radeon R2',
  'AMD Radeon 530',
  'Nvidia GeForce 930MX',
  'Intel HD Graphics',
  'Intel HD Graphics 500',
  'Nvidia GeForce 930MX ',
  'Nvidia GeForce GTX 1060',
  'Nvidia GeForce 150MX',


In [244]:
import json

In [245]:
with open('uniques.json', 'w') as f:
    json.dump(uniques, f, indent=4)

In [246]:
with open('uniques.json', 'r') as f:
    print(json.load(f))

{'Manufacturer': ['Apple', 'HP', 'Acer', 'Asus', 'Dell', 'Lenovo', 'Chuwi', 'MSI', 'Microsoft', 'Toshiba', 'Huawei', 'Xiaomi', 'Vero', 'Razer', 'Mediacom', 'Samsung', 'Google', 'Fujitsu', 'LG'], 'Category': ['Ultrabook', 'Notebook', 'Netbook', 'Gaming', '2 in 1 Convertible', 'Workstation'], 'GPU': ['Intel Iris Plus Graphics 640', 'Intel HD Graphics 6000', 'Intel HD Graphics 620', 'AMD Radeon Pro 455', 'Intel Iris Plus Graphics 650', 'AMD Radeon R5', 'Intel Iris Pro Graphics', 'Nvidia GeForce MX150', 'Intel UHD Graphics 620', 'Intel HD Graphics 520', 'AMD Radeon Pro 555', 'AMD Radeon R5 M430', 'Intel HD Graphics 615', 'AMD Radeon Pro 560', 'Nvidia GeForce 940MX', 'Intel HD Graphics 400', 'Nvidia GeForce GTX 1050', 'AMD Radeon R2', 'AMD Radeon 530', 'Nvidia GeForce 930MX', 'Intel HD Graphics', 'Intel HD Graphics 500', 'Nvidia GeForce 930MX ', 'Nvidia GeForce GTX 1060', 'Nvidia GeForce 150MX', 'Intel Iris Graphics 540', 'AMD Radeon RX 580', 'Nvidia GeForce 920MX', 'AMD Radeon R4 Graphics'

In [247]:
df.iloc[0]

Manufacturer                                             Apple
Model_Name                                         MacBook Pro
Category                                             Ultrabook
Screen_Size                                               13.3
Screen                      IPS Panel Retina Display 2560x1600
CPU                                       Intel Core i5 2.3GHz
RAM                                                          8
Storage                                              128GB SSD
GPU                               Intel Iris Plus Graphics 640
Operating_System                                         macOS
Operating_System_Version                                   NaN
Weight                                                  1.37kg
Price                                                  35737.5
Screen_resolution                                    2560x1600
IPS_Panel                                                    1
Retina_Display                                         

In [248]:
type(df1.iloc[0].to_list()[15])

str

In [250]:
kg = [
        "1.37kg",
        "1.34kg",
        "1.86kg",
        "1.83kg",
        "2.1kg",
        "2.04kg",
        "1.3kg",
        "1.6kg",
        "2.2kg",
        "0.92kg",
        "1.22kg",
        "0.98kg",
        "2.5kg",
        "1.62kg",
        "1.91kg",
        "2.3kg",
        "1.35kg",
        "1.88kg",
        "1.89kg",
        "1.65kg",
        "2.71kg",
        "1.2kg",
        "1.44kg",
        "2.8kg",
        "2kg",
        "2.65kg",
        "2.77kg",
        "3.2kg",
        "0.69kg",
        "1.49kg",
        "2.4kg",
        "2.13kg",
        "2.43kg",
        "1.7kg",
        "1.4kg",
        "1.8kg",
        "1.9kg",
        "3kg",
        "1.252kg",
        "2.7kg",
        "2.02kg",
        "1.63kg",
        "1.96kg",
        "1.21kg",
        "2.45kg",
        "1.25kg",
        "1.5kg",
        "2.62kg",
        "1.38kg",
        "1.58kg",
        "1.85kg",
        "1.23kg",
        "1.26kg",
        "2.16kg",
        "2.36kg",
        "2.05kg",
        "1.32kg",
        "1.75kg",
        "0.97kg",
        "2.9kg",
        "2.56kg",
        "1.48kg",
        "1.74kg",
        "1.1kg",
        "1.56kg",
        "2.03kg",
        "1.05kg",
        "4.4kg",
        "1.90kg",
        "1.29kg",
        "2.0kg",
        "1.95kg",
        "2.06kg",
        "1.12kg",
        "1.42kg",
        "3.49kg",
        "3.35kg",
        "2.23kg",
        "4.42kg",
        "2.69kg",
        "2.37kg",
        "4.7kg",
        "3.6kg",
        "2.08kg",
        "4.3kg",
        "1.68kg",
        "1.41kg",
        "4.14kg",
        "2.18kg",
        "2.24kg",
        "2.67kg",
        "2.14kg",
        "1.36kg",
        "2.25kg",
        "2.15kg",
        "2.19kg",
        "2.54kg",
        "3.42kg",
        "1.28kg",
        "2.33kg",
        "1.45kg",
        "2.79kg",
        "1.84kg",
        "2.6kg",
        "2.26kg",
        "3.25kg",
        "1.59kg",
        "1.13kg",
        "1.78kg",
        "1.10kg",
        "1.15kg",
        "1.27kg",
        "1.43kg",
        "2.31kg",
        "1.16kg",
        "1.64kg",
        "2.17kg",
        "1.47kg",
        "3.78kg",
        "1.79kg",
        "0.91kg",
        "1.99kg",
        "4.33kg",
        "1.93kg",
        "1.87kg",
        "2.63kg",
        "3.4kg",
        "3.14kg",
        "1.94kg",
        "1.24kg",
        "4.6kg",
        "4.5kg",
        "2.73kg",
        "1.39kg",
        "2.29kg",
        "2.59kg",
        "2.94kg",
        "1.14kg",
        "3.8kg",
        "3.31kg",
        "1.09kg",
        "3.21kg",
        "1.19kg",
        "1.98kg",
        "1.17kg",
        "4.36kg",
        "1.71kg",
        "2.32kg",
        "4.2kg",
        "1.55kg",
        "0.81kg",
        "1.18kg",
        "2.72kg",
        "1.31kg",
        "0.920kg",
        "3.74kg",
        "1.76kg",
        "1.54kg",
        "2.83kg",
        "2.07kg",
        "2.38kg",
        "3.58kg",
        "1.08kg",
        "2.20kg",
        "2.75kg",
        "1.70kg"]

In [253]:
temp = []
for i in kg:
    temp.append([i, float(i.replace('kg', ''))])
temp

[['1.37kg', 1.37],
 ['1.34kg', 1.34],
 ['1.86kg', 1.86],
 ['1.83kg', 1.83],
 ['2.1kg', 2.1],
 ['2.04kg', 2.04],
 ['1.3kg', 1.3],
 ['1.6kg', 1.6],
 ['2.2kg', 2.2],
 ['0.92kg', 0.92],
 ['1.22kg', 1.22],
 ['0.98kg', 0.98],
 ['2.5kg', 2.5],
 ['1.62kg', 1.62],
 ['1.91kg', 1.91],
 ['2.3kg', 2.3],
 ['1.35kg', 1.35],
 ['1.88kg', 1.88],
 ['1.89kg', 1.89],
 ['1.65kg', 1.65],
 ['2.71kg', 2.71],
 ['1.2kg', 1.2],
 ['1.44kg', 1.44],
 ['2.8kg', 2.8],
 ['2kg', 2.0],
 ['2.65kg', 2.65],
 ['2.77kg', 2.77],
 ['3.2kg', 3.2],
 ['0.69kg', 0.69],
 ['1.49kg', 1.49],
 ['2.4kg', 2.4],
 ['2.13kg', 2.13],
 ['2.43kg', 2.43],
 ['1.7kg', 1.7],
 ['1.4kg', 1.4],
 ['1.8kg', 1.8],
 ['1.9kg', 1.9],
 ['3kg', 3.0],
 ['1.252kg', 1.252],
 ['2.7kg', 2.7],
 ['2.02kg', 2.02],
 ['1.63kg', 1.63],
 ['1.96kg', 1.96],
 ['1.21kg', 1.21],
 ['2.45kg', 2.45],
 ['1.25kg', 1.25],
 ['1.5kg', 1.5],
 ['2.62kg', 2.62],
 ['1.38kg', 1.38],
 ['1.58kg', 1.58],
 ['1.85kg', 1.85],
 ['1.23kg', 1.23],
 ['1.26kg', 1.26],
 ['2.16kg', 2.16],
 ['2.36kg', 

In [256]:
kg = sorted(temp, key=lambda x:x[1])

In [257]:
[i[0] for i in kg]

['0.69kg',
 '0.81kg',
 '0.91kg',
 '0.92kg',
 '0.920kg',
 '0.97kg',
 '0.98kg',
 '1.05kg',
 '1.08kg',
 '1.09kg',
 '1.1kg',
 '1.10kg',
 '1.12kg',
 '1.13kg',
 '1.14kg',
 '1.15kg',
 '1.16kg',
 '1.17kg',
 '1.18kg',
 '1.19kg',
 '1.2kg',
 '1.21kg',
 '1.22kg',
 '1.23kg',
 '1.24kg',
 '1.25kg',
 '1.252kg',
 '1.26kg',
 '1.27kg',
 '1.28kg',
 '1.29kg',
 '1.3kg',
 '1.31kg',
 '1.32kg',
 '1.34kg',
 '1.35kg',
 '1.36kg',
 '1.37kg',
 '1.38kg',
 '1.39kg',
 '1.4kg',
 '1.41kg',
 '1.42kg',
 '1.43kg',
 '1.44kg',
 '1.45kg',
 '1.47kg',
 '1.48kg',
 '1.49kg',
 '1.5kg',
 '1.54kg',
 '1.55kg',
 '1.56kg',
 '1.58kg',
 '1.59kg',
 '1.6kg',
 '1.62kg',
 '1.63kg',
 '1.64kg',
 '1.65kg',
 '1.68kg',
 '1.7kg',
 '1.70kg',
 '1.71kg',
 '1.74kg',
 '1.75kg',
 '1.76kg',
 '1.78kg',
 '1.79kg',
 '1.8kg',
 '1.83kg',
 '1.84kg',
 '1.85kg',
 '1.86kg',
 '1.87kg',
 '1.88kg',
 '1.89kg',
 '1.9kg',
 '1.90kg',
 '1.91kg',
 '1.93kg',
 '1.94kg',
 '1.95kg',
 '1.96kg',
 '1.98kg',
 '1.99kg',
 '2kg',
 '2.0kg',
 '2.02kg',
 '2.03kg',
 '2.04kg',
 '2.05kg',