In [1]:
import pandas as pd
import os
import numpy as np
import random
from tqdm import tqdm
from src.preprocessing import remove_stop_words
from src.common import create_final_data, Common

In [67]:
class LaptopAttributes():
    '''
    Different from LaptopAttributes, this is specific for creating spec data.
    The spec data was gathered from PCPartPicker and is used to create more laptop data.
    '''

    video_card = {'GeForce RTX 2070'}
    ram = [str(x) + ' GB' for x in range(2, 130, 2)]
    hard_drive = [str(x) + ' GB' for x in range(120, 513, 8)] + [str(x) + ' TB' for x in range(1, 8)]
    cpu = {}
    laptop_brands = ['Lenovo ThinkPad', 'Lenovo ThinkBook', 'Lenovo IdeaPad', 'Lenovo Yoga', 'Lenovo Legion', 'HP Envy', 'HP Chromebook', 'HP Spectre', 'HP ZBook', 'HP Probook', 'HP Elitebook', 'HP Pavilion', 'HP Omen', 'Dell Alienware', 'Dell Vostro', 'Dell Inspiron', 'Dell Latitude', 'Dell XPS', 'Dell G Series', 'Dell Precision', 'Apple Macbook', 'Apple Macbook Air', 'Apple Mac', 'Acer Aspire', 'Acer Swift', 'Acer Spin', 'Acer Switch', 'Acer Extensa', 'Acer Travelmate', 'Acer Nitro', 'Acer Enduro', 'Acer Predator', 'Asus ZenBook', 'Asus Vivobook', 'Asus Republic of Gamers', 'Asus ROG', 'Asus TUF GAMING']
    screen = {'1440x900'}
    inches = {'13.3'}
    
    @staticmethod
    def get_all_data():
        return {
            'cpu': LaptopAttributes.cpu.keys(),
            'ram': LaptopAttributes.ram,
            'hard_drive': LaptopAttributes.hard_drive,
            'video_card': LaptopAttributes.video_card,
            'laptop_brands': LaptopAttributes.laptop_brands
        }

In [68]:
def populate_spec():
    '''
    Creates a string out of the row of product attributes (so row is a Pandas DataFrame).
    '''

    # Getting the CPU data into SpecAttrbutes
    cpu_df = pd.read_csv('data/train/cpu_data.csv')
    temp_iloc = cpu_df.iloc()
    for idx in range(len(cpu_df)):
        row = temp_iloc[idx]
        LaptopAttributes.cpu[row['name']] = [row['cores'], row['core_clock']]

    # Getting the video card data into SpecAttributes
    video_card_df = pd.read_csv('data/train/video-cards-data.csv')
    temp_iloc = video_card_df.iloc()
    for idx in range(len(video_card_df)):
        row = temp_iloc[idx]
        LaptopAttributes.video_card.update([row['chipset']])
    
    laptops_df = pd.read_csv('data/train/laptops.csv', encoding='latin-1')
    LaptopAttributes.inches.update([str(row.Inches) for row in laptops_df[['Inches']].itertuples()])
    LaptopAttributes.screen.update([row.ScreenResolution for row in laptops_df[['ScreenResolution']].itertuples()])
    LaptopAttributes.video_card.update([row.Gpu for row in laptops_df[['Gpu']].itertuples()]) 
    
    for row in laptops_df.iloc:
        if row.Company != 'Apple':
            LaptopAttributes.cpu[' '.join(row.Cpu.split(' ')[:-1])] = [None, row.Cpu.split(' ')[-1]]
    
    

In [69]:
populate_spec()

In [70]:
LaptopAttributes.cpu

{'AMD Ryzen 5 3600': [6, '3.6 GHz'],
 'AMD Ryzen 7 3700X': [8, '3.6 GHz'],
 'AMD Ryzen 5 2600': [6, '3.4 GHz'],
 'AMD Ryzen 9 3900X': [12, '3.8 GHz'],
 'AMD Ryzen 3 3200G': [4, '3.6 GHz'],
 'AMD Ryzen 5 3600X': [6, '3.8 GHz'],
 'Intel Core i7-9700K': [8, '3.6 GHz'],
 'Intel Core i5-9600K': [6, '3.7 GHz'],
 'Intel Core i9-10900K': [10, '3.7 GHz'],
 'Intel Core i9-9900K': [8, '3.6 GHz'],
 'AMD Ryzen 7 3800X': [8, '3.9 GHz'],
 'AMD Ryzen 5 1600 (12nm)': [6, '3.2 GHz'],
 'AMD Ryzen 9 3950X': [16, '3.5 GHz'],
 'Intel Core i7-10700K': [8, '3.8 GHz'],
 'AMD Ryzen 7 2700X': [8, '3.7 GHz'],
 'Intel Core i3-9100F': [4, '3.6 GHz'],
 'AMD Ryzen 3 3100': [4, '3.6 GHz'],
 'AMD Ryzen 5 3400G': [4, '3.7 GHz'],
 'Intel Core i5-9400F': [6, '2.9 GHz'],
 'Intel Core i5-10600K': [6, '4.1 GHz'],
 'AMD Ryzen 3 3300X': [4, '3.8 GHz'],
 'AMD Threadripper 3990X': [64, '2.9 GHz'],
 'AMD Ryzen 5 2600X': [6, '3.6 GHz'],
 'Intel Core i7-8700K': [6, '3.7 GHz'],
 'Intel Core i5-9400': [6, '2.9 GHz'],
 'Intel Core i5-